Initial commit.
This commit is contained in:
commit
d3bb49b3f5
1073 changed files with 484757 additions and 0 deletions
30
Framework/external/embree/kernels/subdiv/bezier_curve.cpp
vendored
Normal file
30
Framework/external/embree/kernels/subdiv/bezier_curve.cpp
vendored
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bezier_curve.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
PrecomputedBezierBasis::PrecomputedBezierBasis(int dj)
|
||||
{
|
||||
for (size_t i=1; i<=N; i++)
|
||||
{
|
||||
for (size_t j=0; j<=N; j++)
|
||||
{
|
||||
const float u = float(j+dj)/float(i);
|
||||
const Vec4f f = BezierBasis::eval(u);
|
||||
c0[i][j] = f.x;
|
||||
c1[i][j] = f.y;
|
||||
c2[i][j] = f.z;
|
||||
c3[i][j] = f.w;
|
||||
const Vec4f d = BezierBasis::derivative(u);
|
||||
d0[i][j] = d.x;
|
||||
d1[i][j] = d.y;
|
||||
d2[i][j] = d.z;
|
||||
d3[i][j] = d.w;
|
||||
}
|
||||
}
|
||||
}
|
||||
PrecomputedBezierBasis bezier_basis0(0);
|
||||
PrecomputedBezierBasis bezier_basis1(1);
|
||||
}
|
||||
729
Framework/external/embree/kernels/subdiv/bezier_curve.h
vendored
Normal file
729
Framework/external/embree/kernels/subdiv/bezier_curve.h
vendored
Normal file
|
|
@ -0,0 +1,729 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../common/default.h"
|
||||
//#include "../common/scene_curves.h"
|
||||
#include "../common/context.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
class BezierBasis
|
||||
{
|
||||
public:
|
||||
|
||||
template<typename T>
|
||||
static __forceinline Vec4<T> eval(const T& u)
|
||||
{
|
||||
const T t1 = u;
|
||||
const T t0 = 1.0f-t1;
|
||||
const T B0 = t0 * t0 * t0;
|
||||
const T B1 = 3.0f * t1 * (t0 * t0);
|
||||
const T B2 = 3.0f * (t1 * t1) * t0;
|
||||
const T B3 = t1 * t1 * t1;
|
||||
return Vec4<T>(B0,B1,B2,B3);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static __forceinline Vec4<T> derivative(const T& u)
|
||||
{
|
||||
const T t1 = u;
|
||||
const T t0 = 1.0f-t1;
|
||||
const T B0 = -(t0*t0);
|
||||
const T B1 = madd(-2.0f,t0*t1,t0*t0);
|
||||
const T B2 = msub(+2.0f,t0*t1,t1*t1);
|
||||
const T B3 = +(t1*t1);
|
||||
return T(3.0f)*Vec4<T>(B0,B1,B2,B3);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static __forceinline Vec4<T> derivative2(const T& u)
|
||||
{
|
||||
const T t1 = u;
|
||||
const T t0 = 1.0f-t1;
|
||||
const T B0 = t0;
|
||||
const T B1 = madd(-2.0f,t0,t1);
|
||||
const T B2 = madd(-2.0f,t1,t0);
|
||||
const T B3 = t1;
|
||||
return T(6.0f)*Vec4<T>(B0,B1,B2,B3);
|
||||
}
|
||||
};
|
||||
|
||||
struct PrecomputedBezierBasis
|
||||
{
|
||||
enum { N = 16 };
|
||||
public:
|
||||
PrecomputedBezierBasis() {}
|
||||
PrecomputedBezierBasis(int shift);
|
||||
|
||||
/* basis for bezier evaluation */
|
||||
public:
|
||||
float c0[N+1][N+1];
|
||||
float c1[N+1][N+1];
|
||||
float c2[N+1][N+1];
|
||||
float c3[N+1][N+1];
|
||||
|
||||
/* basis for bezier derivative evaluation */
|
||||
public:
|
||||
float d0[N+1][N+1];
|
||||
float d1[N+1][N+1];
|
||||
float d2[N+1][N+1];
|
||||
float d3[N+1][N+1];
|
||||
};
|
||||
extern PrecomputedBezierBasis bezier_basis0;
|
||||
extern PrecomputedBezierBasis bezier_basis1;
|
||||
|
||||
|
||||
template<typename V>
|
||||
struct LinearBezierCurve
|
||||
{
|
||||
V v0,v1;
|
||||
|
||||
__forceinline LinearBezierCurve () {}
|
||||
|
||||
__forceinline LinearBezierCurve (const LinearBezierCurve& other)
|
||||
: v0(other.v0), v1(other.v1) {}
|
||||
|
||||
__forceinline LinearBezierCurve& operator= (const LinearBezierCurve& other) {
|
||||
v0 = other.v0; v1 = other.v1; return *this;
|
||||
}
|
||||
|
||||
__forceinline LinearBezierCurve (const V& v0, const V& v1)
|
||||
: v0(v0), v1(v1) {}
|
||||
|
||||
__forceinline V begin() const { return v0; }
|
||||
__forceinline V end () const { return v1; }
|
||||
|
||||
bool hasRoot() const;
|
||||
|
||||
friend embree_ostream operator<<(embree_ostream cout, const LinearBezierCurve& a) {
|
||||
return cout << "LinearBezierCurve (" << a.v0 << ", " << a.v1 << ")";
|
||||
}
|
||||
};
|
||||
|
||||
template<> __forceinline bool LinearBezierCurve<Interval1f>::hasRoot() const {
|
||||
return numRoots(v0,v1);
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
struct QuadraticBezierCurve
|
||||
{
|
||||
V v0,v1,v2;
|
||||
|
||||
__forceinline QuadraticBezierCurve () {}
|
||||
|
||||
__forceinline QuadraticBezierCurve (const QuadraticBezierCurve& other)
|
||||
: v0(other.v0), v1(other.v1), v2(other.v2) {}
|
||||
|
||||
__forceinline QuadraticBezierCurve& operator= (const QuadraticBezierCurve& other) {
|
||||
v0 = other.v0; v1 = other.v1; v2 = other.v2; return *this;
|
||||
}
|
||||
|
||||
__forceinline QuadraticBezierCurve (const V& v0, const V& v1, const V& v2)
|
||||
: v0(v0), v1(v1), v2(v2) {}
|
||||
|
||||
__forceinline V begin() const { return v0; }
|
||||
__forceinline V end () const { return v2; }
|
||||
|
||||
__forceinline V interval() const {
|
||||
return merge(v0,v1,v2);
|
||||
}
|
||||
|
||||
__forceinline BBox<V> bounds() const {
|
||||
return merge(BBox<V>(v0),BBox<V>(v1),BBox<V>(v2));
|
||||
}
|
||||
|
||||
friend embree_ostream operator<<(embree_ostream cout, const QuadraticBezierCurve& a) {
|
||||
return cout << "QuadraticBezierCurve ( (" << a.u.lower << ", " << a.u.upper << "), " << a.v0 << ", " << a.v1 << ", " << a.v2 << ")";
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
typedef QuadraticBezierCurve<float> QuadraticBezierCurve1f;
|
||||
typedef QuadraticBezierCurve<Vec2fa> QuadraticBezierCurve2fa;
|
||||
typedef QuadraticBezierCurve<Vec3fa> QuadraticBezierCurve3fa;
|
||||
|
||||
template<typename Vertex>
|
||||
struct CubicBezierCurve
|
||||
{
|
||||
Vertex v0,v1,v2,v3;
|
||||
|
||||
__forceinline CubicBezierCurve() {}
|
||||
|
||||
template<typename T1>
|
||||
__forceinline CubicBezierCurve (const CubicBezierCurve<T1>& other)
|
||||
: v0(other.v0), v1(other.v1), v2(other.v2), v3(other.v3) {}
|
||||
|
||||
__forceinline CubicBezierCurve& operator= (const CubicBezierCurve& other) {
|
||||
v0 = other.v0; v1 = other.v1; v2 = other.v2; v3 = other.v3; return *this;
|
||||
}
|
||||
|
||||
__forceinline CubicBezierCurve(const Vertex& v0, const Vertex& v1, const Vertex& v2, const Vertex& v3)
|
||||
: v0(v0), v1(v1), v2(v2), v3(v3) {}
|
||||
|
||||
__forceinline Vertex begin() const {
|
||||
return v0;
|
||||
}
|
||||
|
||||
__forceinline Vertex end() const {
|
||||
return v3;
|
||||
}
|
||||
|
||||
__forceinline Vertex center() const {
|
||||
return 0.25f*(v0+v1+v2+v3);
|
||||
}
|
||||
|
||||
__forceinline Vertex begin_direction() const {
|
||||
return v1-v0;
|
||||
}
|
||||
|
||||
__forceinline Vertex end_direction() const {
|
||||
return v3-v2;
|
||||
}
|
||||
|
||||
__forceinline CubicBezierCurve<float> xfm(const Vertex& dx) const {
|
||||
return CubicBezierCurve<float>(dot(v0,dx),dot(v1,dx),dot(v2,dx),dot(v3,dx));
|
||||
}
|
||||
|
||||
template<int W>
|
||||
__forceinline CubicBezierCurve<vfloat<W>> vxfm(const Vertex& dx) const {
|
||||
return CubicBezierCurve<vfloat<W>>(dot(v0,dx),dot(v1,dx),dot(v2,dx),dot(v3,dx));
|
||||
}
|
||||
|
||||
__forceinline CubicBezierCurve<float> xfm(const Vertex& dx, const Vertex& p) const {
|
||||
return CubicBezierCurve<float>(dot(v0-p,dx),dot(v1-p,dx),dot(v2-p,dx),dot(v3-p,dx));
|
||||
}
|
||||
|
||||
__forceinline CubicBezierCurve<Vec3fa> xfm(const LinearSpace3fa& space) const
|
||||
{
|
||||
const Vec3fa q0 = xfmVector(space,v0);
|
||||
const Vec3fa q1 = xfmVector(space,v1);
|
||||
const Vec3fa q2 = xfmVector(space,v2);
|
||||
const Vec3fa q3 = xfmVector(space,v3);
|
||||
return CubicBezierCurve<Vec3fa>(q0,q1,q2,q3);
|
||||
}
|
||||
|
||||
__forceinline CubicBezierCurve<Vec3fa> xfm(const LinearSpace3fa& space, const Vec3fa& p) const
|
||||
{
|
||||
const Vec3fa q0 = xfmVector(space,v0-p);
|
||||
const Vec3fa q1 = xfmVector(space,v1-p);
|
||||
const Vec3fa q2 = xfmVector(space,v2-p);
|
||||
const Vec3fa q3 = xfmVector(space,v3-p);
|
||||
return CubicBezierCurve<Vec3fa>(q0,q1,q2,q3);
|
||||
}
|
||||
|
||||
__forceinline CubicBezierCurve<Vec3ff> xfm_pr(const LinearSpace3fa& space, const Vec3fa& p) const
|
||||
{
|
||||
const Vec3ff q0(xfmVector(space,(Vec3fa)v0-p), v0.w);
|
||||
const Vec3ff q1(xfmVector(space,(Vec3fa)v1-p), v1.w);
|
||||
const Vec3ff q2(xfmVector(space,(Vec3fa)v2-p), v2.w);
|
||||
const Vec3ff q3(xfmVector(space,(Vec3fa)v3-p), v3.w);
|
||||
return CubicBezierCurve<Vec3ff>(q0,q1,q2,q3);
|
||||
}
|
||||
|
||||
__forceinline CubicBezierCurve<Vec3fa> xfm(const LinearSpace3fa& space, const Vec3fa& p, const float s) const
|
||||
{
|
||||
const Vec3fa q0 = xfmVector(space,s*(v0-p));
|
||||
const Vec3fa q1 = xfmVector(space,s*(v1-p));
|
||||
const Vec3fa q2 = xfmVector(space,s*(v2-p));
|
||||
const Vec3fa q3 = xfmVector(space,s*(v3-p));
|
||||
return CubicBezierCurve<Vec3fa>(q0,q1,q2,q3);
|
||||
}
|
||||
|
||||
__forceinline int maxRoots() const;
|
||||
|
||||
__forceinline BBox<Vertex> bounds() const {
|
||||
return merge(BBox<Vertex>(v0),BBox<Vertex>(v1),BBox<Vertex>(v2),BBox<Vertex>(v3));
|
||||
}
|
||||
|
||||
__forceinline friend CubicBezierCurve operator +( const CubicBezierCurve& a, const CubicBezierCurve& b ) {
|
||||
return CubicBezierCurve(a.v0+b.v0,a.v1+b.v1,a.v2+b.v2,a.v3+b.v3);
|
||||
}
|
||||
|
||||
__forceinline friend CubicBezierCurve operator -( const CubicBezierCurve& a, const CubicBezierCurve& b ) {
|
||||
return CubicBezierCurve(a.v0-b.v0,a.v1-b.v1,a.v2-b.v2,a.v3-b.v3);
|
||||
}
|
||||
|
||||
__forceinline friend CubicBezierCurve operator -( const CubicBezierCurve& a, const Vertex& b ) {
|
||||
return CubicBezierCurve(a.v0-b,a.v1-b,a.v2-b,a.v3-b);
|
||||
}
|
||||
|
||||
__forceinline friend CubicBezierCurve operator *( const Vertex& a, const CubicBezierCurve& b ) {
|
||||
return CubicBezierCurve(a*b.v0,a*b.v1,a*b.v2,a*b.v3);
|
||||
}
|
||||
|
||||
__forceinline friend CubicBezierCurve cmadd( const Vertex& a, const CubicBezierCurve& b, const CubicBezierCurve& c) {
|
||||
return CubicBezierCurve(madd(a,b.v0,c.v0),madd(a,b.v1,c.v1),madd(a,b.v2,c.v2),madd(a,b.v3,c.v3));
|
||||
}
|
||||
|
||||
__forceinline friend CubicBezierCurve clerp ( const CubicBezierCurve& a, const CubicBezierCurve& b, const Vertex& t ) {
|
||||
return cmadd((Vertex(1.0f)-t),a,t*b);
|
||||
}
|
||||
|
||||
__forceinline friend CubicBezierCurve merge ( const CubicBezierCurve& a, const CubicBezierCurve& b ) {
|
||||
return CubicBezierCurve(merge(a.v0,b.v0),merge(a.v1,b.v1),merge(a.v2,b.v2),merge(a.v3,b.v3));
|
||||
}
|
||||
|
||||
__forceinline void split(CubicBezierCurve& left, CubicBezierCurve& right, const float t = 0.5f) const
|
||||
{
|
||||
const Vertex p00 = v0;
|
||||
const Vertex p01 = v1;
|
||||
const Vertex p02 = v2;
|
||||
const Vertex p03 = v3;
|
||||
|
||||
const Vertex p10 = lerp(p00,p01,t);
|
||||
const Vertex p11 = lerp(p01,p02,t);
|
||||
const Vertex p12 = lerp(p02,p03,t);
|
||||
const Vertex p20 = lerp(p10,p11,t);
|
||||
const Vertex p21 = lerp(p11,p12,t);
|
||||
const Vertex p30 = lerp(p20,p21,t);
|
||||
|
||||
new (&left ) CubicBezierCurve(p00,p10,p20,p30);
|
||||
new (&right) CubicBezierCurve(p30,p21,p12,p03);
|
||||
}
|
||||
|
||||
__forceinline CubicBezierCurve<Vec2vfx> split() const
|
||||
{
|
||||
const float u0 = 0.0f, u1 = 1.0f;
|
||||
const float dscale = (u1-u0)*(1.0f/(3.0f*(VSIZEX-1)));
|
||||
const vfloatx vu0 = lerp(u0,u1,vfloatx(StepTy())*(1.0f/(VSIZEX-1)));
|
||||
Vec2vfx P0, dP0du; evalN(vu0,P0,dP0du); dP0du = dP0du * Vec2vfx(dscale);
|
||||
const Vec2vfx P3 = shift_right_1(P0);
|
||||
const Vec2vfx dP3du = shift_right_1(dP0du);
|
||||
const Vec2vfx P1 = P0 + dP0du;
|
||||
const Vec2vfx P2 = P3 - dP3du;
|
||||
return CubicBezierCurve<Vec2vfx>(P0,P1,P2,P3);
|
||||
}
|
||||
|
||||
__forceinline CubicBezierCurve<Vec2vfx> split(const BBox1f& u) const
|
||||
{
|
||||
const float u0 = u.lower, u1 = u.upper;
|
||||
const float dscale = (u1-u0)*(1.0f/(3.0f*(VSIZEX-1)));
|
||||
const vfloatx vu0 = lerp(u0,u1,vfloatx(StepTy())*(1.0f/(VSIZEX-1)));
|
||||
Vec2vfx P0, dP0du; evalN(vu0,P0,dP0du); dP0du = dP0du * Vec2vfx(dscale);
|
||||
const Vec2vfx P3 = shift_right_1(P0);
|
||||
const Vec2vfx dP3du = shift_right_1(dP0du);
|
||||
const Vec2vfx P1 = P0 + dP0du;
|
||||
const Vec2vfx P2 = P3 - dP3du;
|
||||
return CubicBezierCurve<Vec2vfx>(P0,P1,P2,P3);
|
||||
}
|
||||
|
||||
template<int W>
|
||||
__forceinline CubicBezierCurve<Vec2vf<W>> split(const BBox1f& u, int i, int N) const
|
||||
{
|
||||
const float u0 = u.lower, u1 = u.upper;
|
||||
const float dscale = (u1-u0)*(1.0f/(3.0f*N));
|
||||
const vfloat<W> vu0 = lerp(u0,u1,(vfloat<W>(i)+vfloat<W>(StepTy()))*(1.0f/N));
|
||||
Vec2vf<W> P0, dP0du; evalN(vu0,P0,dP0du); dP0du = dP0du * Vec2vf<W>(dscale);
|
||||
const Vec2vf<W> P3 = shift_right_1(P0);
|
||||
const Vec2vf<W> dP3du = shift_right_1(dP0du);
|
||||
const Vec2vf<W> P1 = P0 + dP0du;
|
||||
const Vec2vf<W> P2 = P3 - dP3du;
|
||||
return CubicBezierCurve<Vec2vf<W>>(P0,P1,P2,P3);
|
||||
}
|
||||
|
||||
__forceinline CubicBezierCurve<Vec2f> split1(const BBox1f& u, int i, int N) const
|
||||
{
|
||||
const float u0 = u.lower, u1 = u.upper;
|
||||
const float dscale = (u1-u0)*(1.0f/(3.0f*N));
|
||||
const float vu0 = lerp(u0,u1,(float(i)+0)*(1.0f/N));
|
||||
const float vu1 = lerp(u0,u1,(float(i)+1)*(1.0f/N));
|
||||
Vec2fa P0, dP0du; eval(vu0,P0,dP0du); dP0du = dP0du * Vec2fa(dscale);
|
||||
Vec2fa P3, dP3du; eval(vu1,P3,dP3du); dP3du = dP3du * Vec2fa(dscale);
|
||||
const Vec2fa P1 = P0 + dP0du;
|
||||
const Vec2fa P2 = P3 - dP3du;
|
||||
return CubicBezierCurve<Vec2f>(P0,P1,P2,P3);
|
||||
}
|
||||
|
||||
__forceinline void eval(float t, Vertex& p, Vertex& dp) const
|
||||
{
|
||||
const Vertex p00 = v0;
|
||||
const Vertex p01 = v1;
|
||||
const Vertex p02 = v2;
|
||||
const Vertex p03 = v3;
|
||||
|
||||
const Vertex p10 = lerp(p00,p01,t);
|
||||
const Vertex p11 = lerp(p01,p02,t);
|
||||
const Vertex p12 = lerp(p02,p03,t);
|
||||
const Vertex p20 = lerp(p10,p11,t);
|
||||
const Vertex p21 = lerp(p11,p12,t);
|
||||
const Vertex p30 = lerp(p20,p21,t);
|
||||
|
||||
p = p30;
|
||||
dp = Vertex(3.0f)*(p21-p20);
|
||||
}
|
||||
|
||||
#if 0
|
||||
__forceinline Vertex eval(float t) const
|
||||
{
|
||||
const Vertex p00 = v0;
|
||||
const Vertex p01 = v1;
|
||||
const Vertex p02 = v2;
|
||||
const Vertex p03 = v3;
|
||||
|
||||
const Vertex p10 = lerp(p00,p01,t);
|
||||
const Vertex p11 = lerp(p01,p02,t);
|
||||
const Vertex p12 = lerp(p02,p03,t);
|
||||
const Vertex p20 = lerp(p10,p11,t);
|
||||
const Vertex p21 = lerp(p11,p12,t);
|
||||
const Vertex p30 = lerp(p20,p21,t);
|
||||
|
||||
return p30;
|
||||
}
|
||||
#else
|
||||
__forceinline Vertex eval(const float t) const
|
||||
{
|
||||
const Vec4<float> b = BezierBasis::eval(t);
|
||||
return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
|
||||
}
|
||||
#endif
|
||||
|
||||
__forceinline Vertex eval_dt(float t) const
|
||||
{
|
||||
const Vertex p00 = v1-v0;
|
||||
const Vertex p01 = v2-v1;
|
||||
const Vertex p02 = v3-v2;
|
||||
const Vertex p10 = lerp(p00,p01,t);
|
||||
const Vertex p11 = lerp(p01,p02,t);
|
||||
const Vertex p20 = lerp(p10,p11,t);
|
||||
return Vertex(3.0f)*p20;
|
||||
}
|
||||
|
||||
__forceinline Vertex eval_du(const float t) const
|
||||
{
|
||||
const Vec4<float> b = BezierBasis::derivative(t);
|
||||
return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
|
||||
}
|
||||
|
||||
__forceinline Vertex eval_dudu(const float t) const
|
||||
{
|
||||
const Vec4<float> b = BezierBasis::derivative2(t);
|
||||
return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
|
||||
}
|
||||
|
||||
__forceinline void evalN(const vfloatx& t, Vec2vfx& p, Vec2vfx& dp) const
|
||||
{
|
||||
const Vec2vfx p00 = v0;
|
||||
const Vec2vfx p01 = v1;
|
||||
const Vec2vfx p02 = v2;
|
||||
const Vec2vfx p03 = v3;
|
||||
|
||||
const Vec2vfx p10 = lerp(p00,p01,t);
|
||||
const Vec2vfx p11 = lerp(p01,p02,t);
|
||||
const Vec2vfx p12 = lerp(p02,p03,t);
|
||||
|
||||
const Vec2vfx p20 = lerp(p10,p11,t);
|
||||
const Vec2vfx p21 = lerp(p11,p12,t);
|
||||
|
||||
const Vec2vfx p30 = lerp(p20,p21,t);
|
||||
|
||||
p = p30;
|
||||
dp = vfloatx(3.0f)*(p21-p20);
|
||||
}
|
||||
|
||||
__forceinline void eval(const float t, Vertex& p, Vertex& dp, Vertex& ddp) const
|
||||
{
|
||||
const Vertex p00 = v0;
|
||||
const Vertex p01 = v1;
|
||||
const Vertex p02 = v2;
|
||||
const Vertex p03 = v3;
|
||||
const Vertex p10 = lerp(p00,p01,t);
|
||||
const Vertex p11 = lerp(p01,p02,t);
|
||||
const Vertex p12 = lerp(p02,p03,t);
|
||||
const Vertex p20 = lerp(p10,p11,t);
|
||||
const Vertex p21 = lerp(p11,p12,t);
|
||||
const Vertex p30 = lerp(p20,p21,t);
|
||||
p = p30;
|
||||
dp = 3.0f*(p21-p20);
|
||||
ddp = eval_dudu(t);
|
||||
}
|
||||
|
||||
__forceinline CubicBezierCurve clip(const Interval1f& u1) const
|
||||
{
|
||||
Vertex f0,df0; eval(u1.lower,f0,df0);
|
||||
Vertex f1,df1; eval(u1.upper,f1,df1);
|
||||
float s = u1.upper-u1.lower;
|
||||
return CubicBezierCurve(f0,f0+s*(1.0f/3.0f)*df0,f1-s*(1.0f/3.0f)*df1,f1);
|
||||
}
|
||||
|
||||
__forceinline QuadraticBezierCurve<Vertex> derivative() const
|
||||
{
|
||||
const Vertex q0 = 3.0f*(v1-v0);
|
||||
const Vertex q1 = 3.0f*(v2-v1);
|
||||
const Vertex q2 = 3.0f*(v3-v2);
|
||||
return QuadraticBezierCurve<Vertex>(q0,q1,q2);
|
||||
}
|
||||
|
||||
__forceinline BBox<Vertex> derivative_bounds(const Interval1f& u1) const
|
||||
{
|
||||
Vertex f0,df0; eval(u1.lower,f0,df0);
|
||||
Vertex f3,df3; eval(u1.upper,f3,df3);
|
||||
const float s = u1.upper-u1.lower;
|
||||
const Vertex f1 = f0+s*(1.0f/3.0f)*df0;
|
||||
const Vertex f2 = f3-s*(1.0f/3.0f)*df3;
|
||||
const Vertex q0 = s*df0;
|
||||
const Vertex q1 = 3.0f*(f2-f1);
|
||||
const Vertex q2 = s*df3;
|
||||
return merge(BBox<Vertex>(q0),BBox<Vertex>(q1),BBox<Vertex>(q2));
|
||||
}
|
||||
|
||||
template<int M>
|
||||
__forceinline Vec4vf<M> veval(const vfloat<M>& t) const
|
||||
{
|
||||
const Vec4vf<M> b = BezierBasis::eval(t);
|
||||
return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
|
||||
}
|
||||
|
||||
template<int M>
|
||||
__forceinline Vec4vf<M> veval_du(const vfloat<M>& t) const
|
||||
{
|
||||
const Vec4vf<M> b = BezierBasis::derivative(t);
|
||||
return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
|
||||
}
|
||||
|
||||
template<int M>
|
||||
__forceinline Vec4vf<M> veval_dudu(const vfloat<M>& t) const
|
||||
{
|
||||
const Vec4vf<M> b = BezierBasis::derivative2(t);
|
||||
return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
|
||||
}
|
||||
|
||||
template<int M, typename Vec>
|
||||
__forceinline void veval(const vfloat<M>& t, Vec& p, Vec& dp) const
|
||||
{
|
||||
const Vec p00 = v0;
|
||||
const Vec p01 = v1;
|
||||
const Vec p02 = v2;
|
||||
const Vec p03 = v3;
|
||||
|
||||
const Vec p10 = lerp(p00,p01,t);
|
||||
const Vec p11 = lerp(p01,p02,t);
|
||||
const Vec p12 = lerp(p02,p03,t);
|
||||
const Vec p20 = lerp(p10,p11,t);
|
||||
const Vec p21 = lerp(p11,p12,t);
|
||||
const Vec p30 = lerp(p20,p21,t);
|
||||
|
||||
p = p30;
|
||||
dp = vfloat<M>(3.0f)*(p21-p20);
|
||||
}
|
||||
|
||||
template<int M, typename Vec = Vec4vf<M>>
|
||||
__forceinline Vec eval0(const int ofs, const int size) const
|
||||
{
|
||||
assert(size <= PrecomputedBezierBasis::N);
|
||||
assert(ofs <= size);
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
assert(size > 0);
|
||||
const vfloat<M> t = (vfloat<M>(step) + vfloat<M>(ofs+0))*rcp(float(size));
|
||||
Vec p,dp; veval<M>(t,p,dp);
|
||||
return p;
|
||||
#else
|
||||
return madd(vfloat<M>::loadu(&bezier_basis0.c0[size][ofs]), Vec(v0),
|
||||
madd(vfloat<M>::loadu(&bezier_basis0.c1[size][ofs]), Vec(v1),
|
||||
madd(vfloat<M>::loadu(&bezier_basis0.c2[size][ofs]), Vec(v2),
|
||||
vfloat<M>::loadu(&bezier_basis0.c3[size][ofs]) * Vec(v3))));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<int M, typename Vec = Vec4vf<M>>
|
||||
__forceinline Vec eval1(const int ofs, const int size) const
|
||||
{
|
||||
assert(size <= PrecomputedBezierBasis::N);
|
||||
assert(ofs <= size);
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
assert(size > 0);
|
||||
const vfloat<M> t = (vfloat<M>(step) + vfloat<M>(ofs+1))*rcp(float(size));
|
||||
Vec p,dp; veval<M>(t,p,dp);
|
||||
return p;
|
||||
#else
|
||||
return madd(vfloat<M>::loadu(&bezier_basis1.c0[size][ofs]), Vec(v0),
|
||||
madd(vfloat<M>::loadu(&bezier_basis1.c1[size][ofs]), Vec(v1),
|
||||
madd(vfloat<M>::loadu(&bezier_basis1.c2[size][ofs]), Vec(v2),
|
||||
vfloat<M>::loadu(&bezier_basis1.c3[size][ofs]) * Vec(v3))));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<int M, typename Vec = Vec4vf<M>>
|
||||
__forceinline Vec derivative0(const int ofs, const int size) const
|
||||
{
|
||||
assert(size <= PrecomputedBezierBasis::N);
|
||||
assert(ofs <= size);
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
assert(size > 0);
|
||||
const vfloat<M> t = (vfloat<M>(step) + vfloat<M>(ofs+0))*rcp(float(size));
|
||||
Vec p,dp; veval<M>(t,p,dp);
|
||||
return dp;
|
||||
#else
|
||||
return madd(vfloat<M>::loadu(&bezier_basis0.d0[size][ofs]), Vec(v0),
|
||||
madd(vfloat<M>::loadu(&bezier_basis0.d1[size][ofs]), Vec(v1),
|
||||
madd(vfloat<M>::loadu(&bezier_basis0.d2[size][ofs]), Vec(v2),
|
||||
vfloat<M>::loadu(&bezier_basis0.d3[size][ofs]) * Vec(v3))));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<int M, typename Vec = Vec4vf<M>>
|
||||
__forceinline Vec derivative1(const int ofs, const int size) const
|
||||
{
|
||||
assert(size <= PrecomputedBezierBasis::N);
|
||||
assert(ofs <= size);
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
assert(size > 0);
|
||||
const vfloat<M> t = (vfloat<M>(step) + vfloat<M>(ofs+1))*rcp(float(size));
|
||||
Vec p,dp; veval<M>(t,p,dp);
|
||||
return dp;
|
||||
#else
|
||||
return madd(vfloat<M>::loadu(&bezier_basis1.d0[size][ofs]), Vec(v0),
|
||||
madd(vfloat<M>::loadu(&bezier_basis1.d1[size][ofs]), Vec(v1),
|
||||
madd(vfloat<M>::loadu(&bezier_basis1.d2[size][ofs]), Vec(v2),
|
||||
vfloat<M>::loadu(&bezier_basis1.d3[size][ofs]) * Vec(v3))));
|
||||
#endif
|
||||
}
|
||||
|
||||
/* calculates bounds of bezier curve geometry */
|
||||
__forceinline BBox3fa accurateBounds() const
|
||||
{
|
||||
const int N = 7;
|
||||
const float scale = 1.0f/(3.0f*(N-1));
|
||||
Vec3vfx pl(pos_inf), pu(neg_inf);
|
||||
for (int i=0; i<=N; i+=VSIZEX)
|
||||
{
|
||||
vintx vi = vintx(i)+vintx(StepTy());
|
||||
vboolx valid = vi <= vintx(N);
|
||||
const Vec3vfx p = eval0<VSIZEX,Vec3vf<VSIZEX>>(i,N);
|
||||
const Vec3vfx dp = derivative0<VSIZEX,Vec3vf<VSIZEX>>(i,N);
|
||||
const Vec3vfx pm = p-Vec3vfx(scale)*select(vi!=vintx(0),dp,Vec3vfx(zero));
|
||||
const Vec3vfx pp = p+Vec3vfx(scale)*select(vi!=vintx(N),dp,Vec3vfx(zero));
|
||||
pl = select(valid,min(pl,p,pm,pp),pl); // FIXME: use masked min
|
||||
pu = select(valid,max(pu,p,pm,pp),pu); // FIXME: use masked min
|
||||
}
|
||||
const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z));
|
||||
const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z));
|
||||
return BBox3fa(lower,upper);
|
||||
}
|
||||
|
||||
/* calculates bounds of bezier curve geometry */
|
||||
__forceinline BBox3fa accurateRoundBounds() const
|
||||
{
|
||||
const int N = 7;
|
||||
const float scale = 1.0f/(3.0f*(N-1));
|
||||
Vec4vfx pl(pos_inf), pu(neg_inf);
|
||||
for (int i=0; i<=N; i+=VSIZEX)
|
||||
{
|
||||
vintx vi = vintx(i)+vintx(StepTy());
|
||||
vboolx valid = vi <= vintx(N);
|
||||
const Vec4vfx p = eval0<VSIZEX>(i,N);
|
||||
const Vec4vfx dp = derivative0<VSIZEX>(i,N);
|
||||
const Vec4vfx pm = p-Vec4vfx(scale)*select(vi!=vintx(0),dp,Vec4vfx(zero));
|
||||
const Vec4vfx pp = p+Vec4vfx(scale)*select(vi!=vintx(N),dp,Vec4vfx(zero));
|
||||
pl = select(valid,min(pl,p,pm,pp),pl); // FIXME: use masked min
|
||||
pu = select(valid,max(pu,p,pm,pp),pu); // FIXME: use masked min
|
||||
}
|
||||
const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z));
|
||||
const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z));
|
||||
const float r_min = reduce_min(pl.w);
|
||||
const float r_max = reduce_max(pu.w);
|
||||
const Vec3fa upper_r = Vec3fa(max(abs(r_min),abs(r_max)));
|
||||
return enlarge(BBox3fa(lower,upper),upper_r);
|
||||
}
|
||||
|
||||
/* calculates bounds when tessellated into N line segments */
|
||||
__forceinline BBox3fa accurateFlatBounds(int N) const
|
||||
{
|
||||
if (likely(N == 4))
|
||||
{
|
||||
const Vec4vf4 pi = eval0<4>(0,4);
|
||||
const Vec3fa lower(reduce_min(pi.x),reduce_min(pi.y),reduce_min(pi.z));
|
||||
const Vec3fa upper(reduce_max(pi.x),reduce_max(pi.y),reduce_max(pi.z));
|
||||
const Vec3fa upper_r = Vec3fa(reduce_max(abs(pi.w)));
|
||||
return enlarge(BBox3fa(min(lower,v3),max(upper,v3)),max(upper_r,Vec3fa(abs(v3.w))));
|
||||
}
|
||||
else
|
||||
{
|
||||
Vec3vfx pl(pos_inf), pu(neg_inf); vfloatx ru(0.0f);
|
||||
for (int i=0; i<N; i+=VSIZEX)
|
||||
{
|
||||
vboolx valid = vintx(i)+vintx(StepTy()) < vintx(N);
|
||||
const Vec4vfx pi = eval0<VSIZEX>(i,N);
|
||||
|
||||
pl.x = select(valid,min(pl.x,pi.x),pl.x); // FIXME: use masked min
|
||||
pl.y = select(valid,min(pl.y,pi.y),pl.y);
|
||||
pl.z = select(valid,min(pl.z,pi.z),pl.z);
|
||||
|
||||
pu.x = select(valid,max(pu.x,pi.x),pu.x); // FIXME: use masked min
|
||||
pu.y = select(valid,max(pu.y,pi.y),pu.y);
|
||||
pu.z = select(valid,max(pu.z,pi.z),pu.z);
|
||||
|
||||
ru = select(valid,max(ru,abs(pi.w)),ru);
|
||||
}
|
||||
const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z));
|
||||
const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z));
|
||||
const Vec3fa upper_r(reduce_max(ru));
|
||||
return enlarge(BBox3fa(min(lower,v3),max(upper,v3)),max(upper_r,Vec3fa(abs(v3.w))));
|
||||
}
|
||||
}
|
||||
|
||||
friend __forceinline embree_ostream operator<<(embree_ostream cout, const CubicBezierCurve& curve) {
|
||||
return cout << "CubicBezierCurve { v0 = " << curve.v0 << ", v1 = " << curve.v1 << ", v2 = " << curve.v2 << ", v3 = " << curve.v3 << " }";
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(__AVX__)
|
||||
template<>
|
||||
__forceinline CubicBezierCurve<vfloat4> CubicBezierCurve<vfloat4>::clip(const Interval1f& u1) const
|
||||
{
|
||||
const vfloat8 p00 = vfloat8(v0);
|
||||
const vfloat8 p01 = vfloat8(v1);
|
||||
const vfloat8 p02 = vfloat8(v2);
|
||||
const vfloat8 p03 = vfloat8(v3);
|
||||
|
||||
const vfloat8 t(vfloat4(u1.lower),vfloat4(u1.upper));
|
||||
const vfloat8 p10 = lerp(p00,p01,t);
|
||||
const vfloat8 p11 = lerp(p01,p02,t);
|
||||
const vfloat8 p12 = lerp(p02,p03,t);
|
||||
const vfloat8 p20 = lerp(p10,p11,t);
|
||||
const vfloat8 p21 = lerp(p11,p12,t);
|
||||
const vfloat8 p30 = lerp(p20,p21,t);
|
||||
|
||||
const vfloat8 f01 = p30;
|
||||
const vfloat8 df01 = vfloat8(3.0f)*(p21-p20);
|
||||
|
||||
const vfloat4 f0 = extract4<0>(f01), f1 = extract4<1>(f01);
|
||||
const vfloat4 df0 = extract4<0>(df01), df1 = extract4<1>(df01);
|
||||
const float s = u1.upper-u1.lower;
|
||||
return CubicBezierCurve(f0,f0+s*(1.0f/3.0f)*df0,f1-s*(1.0f/3.0f)*df1,f1);
|
||||
}
|
||||
#endif
|
||||
|
||||
template<typename Vertex> using BezierCurveT = CubicBezierCurve<Vertex>;
|
||||
|
||||
typedef CubicBezierCurve<float> CubicBezierCurve1f;
|
||||
typedef CubicBezierCurve<Vec2fa> CubicBezierCurve2fa;
|
||||
typedef CubicBezierCurve<Vec3fa> CubicBezierCurve3fa;
|
||||
typedef CubicBezierCurve<Vec3fa> BezierCurve3fa;
|
||||
typedef CubicBezierCurve<Vec3ff> BezierCurve3ff;
|
||||
|
||||
template<> __forceinline int CubicBezierCurve<float>::maxRoots() const
|
||||
{
|
||||
float eps = 1E-4f;
|
||||
bool neg0 = v0 <= 0.0f; bool zero0 = fabs(v0) < eps;
|
||||
bool neg1 = v1 <= 0.0f; bool zero1 = fabs(v1) < eps;
|
||||
bool neg2 = v2 <= 0.0f; bool zero2 = fabs(v2) < eps;
|
||||
bool neg3 = v3 <= 0.0f; bool zero3 = fabs(v3) < eps;
|
||||
return (neg0 != neg1 || zero0) + (neg1 != neg2 || zero1) + (neg2 != neg3 || zero2 || zero3);
|
||||
}
|
||||
|
||||
template<> __forceinline int CubicBezierCurve<Interval1f>::maxRoots() const {
|
||||
return numRoots(v0,v1) + numRoots(v1,v2) + numRoots(v2,v3);
|
||||
}
|
||||
|
||||
struct CurveGeometry; // FIXME: this code should move !
|
||||
template<typename CurveGeometry>
|
||||
__forceinline CubicBezierCurve<Vec3ff> enlargeRadiusToMinWidth(const RayQueryContext* context, const CurveGeometry* geom, const Vec3fa& ray_org, const CubicBezierCurve<Vec3ff>& curve)
|
||||
{
|
||||
return CubicBezierCurve<Vec3ff>(enlargeRadiusToMinWidth(context,geom,ray_org,curve.v0),
|
||||
enlargeRadiusToMinWidth(context,geom,ray_org,curve.v1),
|
||||
enlargeRadiusToMinWidth(context,geom,ray_org,curve.v2),
|
||||
enlargeRadiusToMinWidth(context,geom,ray_org,curve.v3));
|
||||
}
|
||||
}
|
||||
372
Framework/external/embree/kernels/subdiv/bezier_patch.h
vendored
Normal file
372
Framework/external/embree/kernels/subdiv/bezier_patch.h
vendored
Normal file
|
|
@ -0,0 +1,372 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "catmullclark_patch.h"
|
||||
#include "bezier_curve.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<class T, class S>
|
||||
static __forceinline T deCasteljau(const S& uu, const T& v0, const T& v1, const T& v2, const T& v3)
|
||||
{
|
||||
const T v0_1 = lerp(v0,v1,uu);
|
||||
const T v1_1 = lerp(v1,v2,uu);
|
||||
const T v2_1 = lerp(v2,v3,uu);
|
||||
const T v0_2 = lerp(v0_1,v1_1,uu);
|
||||
const T v1_2 = lerp(v1_1,v2_1,uu);
|
||||
const T v0_3 = lerp(v0_2,v1_2,uu);
|
||||
return v0_3;
|
||||
}
|
||||
|
||||
template<class T, class S>
|
||||
static __forceinline T deCasteljau_tangent(const S& uu, const T& v0, const T& v1, const T& v2, const T& v3)
|
||||
{
|
||||
const T v0_1 = lerp(v0,v1,uu);
|
||||
const T v1_1 = lerp(v1,v2,uu);
|
||||
const T v2_1 = lerp(v2,v3,uu);
|
||||
const T v0_2 = lerp(v0_1,v1_1,uu);
|
||||
const T v1_2 = lerp(v1_1,v2_1,uu);
|
||||
return S(3.0f)*(v1_2-v0_2);
|
||||
}
|
||||
|
||||
template<typename Vertex>
|
||||
__forceinline Vertex computeInnerBezierControlPoint(const Vertex v[4][4], const size_t y, const size_t x) {
|
||||
return 1.0f / 36.0f * (16.0f * v[y][x] + 4.0f * (v[y-1][x] + v[y+1][x] + v[y][x-1] + v[y][x+1]) + (v[y-1][x-1] + v[y+1][x+1] + v[y-1][x+1] + v[y+1][x-1]));
|
||||
}
|
||||
|
||||
template<typename Vertex>
|
||||
__forceinline Vertex computeTopEdgeBezierControlPoint(const Vertex v[4][4], const size_t y, const size_t x) {
|
||||
return 1.0f / 18.0f * (8.0f * v[y][x] + 4.0f * v[y-1][x] + 2.0f * (v[y][x-1] + v[y][x+1]) + (v[y-1][x-1] + v[y-1][x+1]));
|
||||
}
|
||||
|
||||
template<typename Vertex>
|
||||
__forceinline Vertex computeBottomEdgeBezierControlPoint(const Vertex v[4][4], const size_t y, const size_t x) {
|
||||
return 1.0f / 18.0f * (8.0f * v[y][x] + 4.0f * v[y+1][x] + 2.0f * (v[y][x-1] + v[y][x+1]) + v[y+1][x-1] + v[y+1][x+1]);
|
||||
}
|
||||
|
||||
template<typename Vertex>
|
||||
__forceinline Vertex computeLeftEdgeBezierControlPoint(const Vertex v[4][4], const size_t y, const size_t x) {
|
||||
return 1.0f / 18.0f * (8.0f * v[y][x] + 4.0f * v[y][x-1] + 2.0f * (v[y-1][x] + v[y+1][x]) + v[y-1][x-1] + v[y+1][x-1]);
|
||||
}
|
||||
|
||||
template<typename Vertex>
|
||||
__forceinline Vertex computeRightEdgeBezierControlPoint(const Vertex v[4][4], const size_t y, const size_t x) {
|
||||
return 1.0f / 18.0f * (8.0f * v[y][x] + 4.0f * v[y][x+1] + 2.0f * (v[y-1][x] + v[y+1][x]) + v[y-1][x+1] + v[y+1][x+1]);
|
||||
}
|
||||
|
||||
template<typename Vertex>
|
||||
__forceinline Vertex computeCornerBezierControlPoint(const Vertex v[4][4], const size_t y, const size_t x, const ssize_t delta_y, const ssize_t delta_x)
|
||||
{
|
||||
return 1.0f / 9.0f * (4.0f * v[y][x] + 2.0f * (v[y+delta_y][x] + v[y][x+delta_x]) + v[y+delta_y][x+delta_x]);
|
||||
}
|
||||
|
||||
template<typename Vertex, typename Vertex_t>
|
||||
class __aligned(64) BezierPatchT
|
||||
{
|
||||
public:
|
||||
Vertex matrix[4][4];
|
||||
|
||||
public:
|
||||
|
||||
__forceinline BezierPatchT() {}
|
||||
|
||||
__forceinline BezierPatchT (const HalfEdge* edge, const char* vertices, size_t stride);
|
||||
|
||||
__forceinline BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t>& patch);
|
||||
|
||||
__forceinline BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t>& patch,
|
||||
const BezierCurveT<Vertex>* border0,
|
||||
const BezierCurveT<Vertex>* border1,
|
||||
const BezierCurveT<Vertex>* border2,
|
||||
const BezierCurveT<Vertex>* border3);
|
||||
|
||||
__forceinline BezierPatchT(const BSplinePatchT<Vertex,Vertex_t>& source)
|
||||
{
|
||||
/* compute inner bezier control points */
|
||||
matrix[0][0] = computeInnerBezierControlPoint(source.v,1,1);
|
||||
matrix[0][3] = computeInnerBezierControlPoint(source.v,1,2);
|
||||
matrix[3][3] = computeInnerBezierControlPoint(source.v,2,2);
|
||||
matrix[3][0] = computeInnerBezierControlPoint(source.v,2,1);
|
||||
|
||||
/* compute top edge control points */
|
||||
matrix[0][1] = computeRightEdgeBezierControlPoint(source.v,1,1);
|
||||
matrix[0][2] = computeLeftEdgeBezierControlPoint(source.v,1,2);
|
||||
|
||||
/* compute bottom edge control points */
|
||||
matrix[3][1] = computeRightEdgeBezierControlPoint(source.v,2,1);
|
||||
matrix[3][2] = computeLeftEdgeBezierControlPoint(source.v,2,2);
|
||||
|
||||
/* compute left edge control points */
|
||||
matrix[1][0] = computeBottomEdgeBezierControlPoint(source.v,1,1);
|
||||
matrix[2][0] = computeTopEdgeBezierControlPoint(source.v,2,1);
|
||||
|
||||
/* compute right edge control points */
|
||||
matrix[1][3] = computeBottomEdgeBezierControlPoint(source.v,1,2);
|
||||
matrix[2][3] = computeTopEdgeBezierControlPoint(source.v,2,2);
|
||||
|
||||
/* compute corner control points */
|
||||
matrix[1][1] = computeCornerBezierControlPoint(source.v,1,1, 1, 1);
|
||||
matrix[1][2] = computeCornerBezierControlPoint(source.v,1,2, 1,-1);
|
||||
matrix[2][2] = computeCornerBezierControlPoint(source.v,2,2,-1,-1);
|
||||
matrix[2][1] = computeCornerBezierControlPoint(source.v,2,1,-1, 1);
|
||||
}
|
||||
|
||||
static __forceinline Vertex_t bilinear(const Vec4f Bu, const Vertex matrix[4][4], const Vec4f Bv)
|
||||
{
|
||||
const Vertex_t M0 = madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3])));
|
||||
const Vertex_t M1 = madd(Bu.x,matrix[1][0],madd(Bu.y,matrix[1][1],madd(Bu.z,matrix[1][2],Bu.w * matrix[1][3])));
|
||||
const Vertex_t M2 = madd(Bu.x,matrix[2][0],madd(Bu.y,matrix[2][1],madd(Bu.z,matrix[2][2],Bu.w * matrix[2][3])));
|
||||
const Vertex_t M3 = madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])));
|
||||
return madd(Bv.x,M0,madd(Bv.y,M1,madd(Bv.z,M2,Bv.w*M3)));
|
||||
}
|
||||
|
||||
static __forceinline Vertex_t eval(const Vertex matrix[4][4], const float uu, const float vv)
|
||||
{
|
||||
const Vec4f Bu = BezierBasis::eval(uu);
|
||||
const Vec4f Bv = BezierBasis::eval(vv);
|
||||
return bilinear(Bu,matrix,Bv);
|
||||
}
|
||||
|
||||
static __forceinline Vertex_t eval_du(const Vertex matrix[4][4], const float uu, const float vv)
|
||||
{
|
||||
const Vec4f Bu = BezierBasis::derivative(uu);
|
||||
const Vec4f Bv = BezierBasis::eval(vv);
|
||||
return bilinear(Bu,matrix,Bv);
|
||||
}
|
||||
|
||||
static __forceinline Vertex_t eval_dv(const Vertex matrix[4][4], const float uu, const float vv)
|
||||
{
|
||||
const Vec4f Bu = BezierBasis::eval(uu);
|
||||
const Vec4f Bv = BezierBasis::derivative(vv);
|
||||
return bilinear(Bu,matrix,Bv);
|
||||
}
|
||||
|
||||
static __forceinline Vertex_t eval_dudu(const Vertex matrix[4][4], const float uu, const float vv)
|
||||
{
|
||||
const Vec4f Bu = BezierBasis::derivative2(uu);
|
||||
const Vec4f Bv = BezierBasis::eval(vv);
|
||||
return bilinear(Bu,matrix,Bv);
|
||||
}
|
||||
|
||||
static __forceinline Vertex_t eval_dvdv(const Vertex matrix[4][4], const float uu, const float vv)
|
||||
{
|
||||
const Vec4f Bu = BezierBasis::eval(uu);
|
||||
const Vec4f Bv = BezierBasis::derivative2(vv);
|
||||
return bilinear(Bu,matrix,Bv);
|
||||
}
|
||||
|
||||
static __forceinline Vertex_t eval_dudv(const Vertex matrix[4][4], const float uu, const float vv)
|
||||
{
|
||||
const Vec4f Bu = BezierBasis::derivative(uu);
|
||||
const Vec4f Bv = BezierBasis::derivative(vv);
|
||||
return bilinear(Bu,matrix,Bv);
|
||||
}
|
||||
|
||||
static __forceinline Vertex_t normal(const Vertex matrix[4][4], const float uu, const float vv)
|
||||
{
|
||||
const Vertex_t dPdu = eval_du(matrix,uu,vv);
|
||||
const Vertex_t dPdv = eval_dv(matrix,uu,vv);
|
||||
return cross(dPdu,dPdv);
|
||||
}
|
||||
|
||||
__forceinline Vertex_t normal(const float uu, const float vv)
|
||||
{
|
||||
const Vertex_t dPdu = eval_du(matrix,uu,vv);
|
||||
const Vertex_t dPdv = eval_dv(matrix,uu,vv);
|
||||
return cross(dPdu,dPdv);
|
||||
}
|
||||
|
||||
__forceinline Vertex_t eval(const float uu, const float vv) const {
|
||||
return eval(matrix,uu,vv);
|
||||
}
|
||||
|
||||
__forceinline Vertex_t eval_du(const float uu, const float vv) const {
|
||||
return eval_du(matrix,uu,vv);
|
||||
}
|
||||
|
||||
__forceinline Vertex_t eval_dv(const float uu, const float vv) const {
|
||||
return eval_dv(matrix,uu,vv);
|
||||
}
|
||||
|
||||
__forceinline Vertex_t eval_dudu(const float uu, const float vv) const {
|
||||
return eval_dudu(matrix,uu,vv);
|
||||
}
|
||||
|
||||
__forceinline Vertex_t eval_dvdv(const float uu, const float vv) const {
|
||||
return eval_dvdv(matrix,uu,vv);
|
||||
}
|
||||
|
||||
__forceinline Vertex_t eval_dudv(const float uu, const float vv) const {
|
||||
return eval_dudv(matrix,uu,vv);
|
||||
}
|
||||
|
||||
__forceinline void eval(const float u, const float v, Vertex* P, Vertex* dPdu, Vertex* dPdv, Vertex* ddPdudu, Vertex* ddPdvdv, Vertex* ddPdudv, const float dscale = 1.0f) const
|
||||
{
|
||||
if (P) {
|
||||
*P = eval(u,v);
|
||||
}
|
||||
if (dPdu) {
|
||||
assert(dPdu); *dPdu = eval_du(u,v)*dscale;
|
||||
assert(dPdv); *dPdv = eval_dv(u,v)*dscale;
|
||||
}
|
||||
if (ddPdudu) {
|
||||
assert(ddPdudu); *ddPdudu = eval_dudu(u,v)*sqr(dscale);
|
||||
assert(ddPdvdv); *ddPdvdv = eval_dvdv(u,v)*sqr(dscale);
|
||||
assert(ddPdudv); *ddPdudv = eval_dudv(u,v)*sqr(dscale);
|
||||
}
|
||||
}
|
||||
|
||||
template<class vfloat>
|
||||
__forceinline vfloat eval(const size_t i, const vfloat& uu, const vfloat& vv, const Vec4<vfloat>& u_n, const Vec4<vfloat>& v_n) const
|
||||
{
|
||||
const vfloat curve0_x = v_n[0] * vfloat(matrix[0][0][i]) + v_n[1] * vfloat(matrix[1][0][i]) + v_n[2] * vfloat(matrix[2][0][i]) + v_n[3] * vfloat(matrix[3][0][i]);
|
||||
const vfloat curve1_x = v_n[0] * vfloat(matrix[0][1][i]) + v_n[1] * vfloat(matrix[1][1][i]) + v_n[2] * vfloat(matrix[2][1][i]) + v_n[3] * vfloat(matrix[3][1][i]);
|
||||
const vfloat curve2_x = v_n[0] * vfloat(matrix[0][2][i]) + v_n[1] * vfloat(matrix[1][2][i]) + v_n[2] * vfloat(matrix[2][2][i]) + v_n[3] * vfloat(matrix[3][2][i]);
|
||||
const vfloat curve3_x = v_n[0] * vfloat(matrix[0][3][i]) + v_n[1] * vfloat(matrix[1][3][i]) + v_n[2] * vfloat(matrix[2][3][i]) + v_n[3] * vfloat(matrix[3][3][i]);
|
||||
return u_n[0] * curve0_x + u_n[1] * curve1_x + u_n[2] * curve2_x + u_n[3] * curve3_x;
|
||||
}
|
||||
|
||||
template<typename vbool, typename vfloat>
|
||||
__forceinline void eval(const vbool& valid, const vfloat& uu, const vfloat& vv,
|
||||
float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv,
|
||||
const float dscale, const size_t dstride, const size_t N) const
|
||||
{
|
||||
if (P) {
|
||||
const Vec4<vfloat> u_n = BezierBasis::eval(uu);
|
||||
const Vec4<vfloat> v_n = BezierBasis::eval(vv);
|
||||
for (size_t i=0; i<N; i++) vfloat::store(valid,P+i*dstride,eval(i,uu,vv,u_n,v_n));
|
||||
}
|
||||
if (dPdu)
|
||||
{
|
||||
{
|
||||
assert(dPdu);
|
||||
const Vec4<vfloat> u_n = BezierBasis::derivative(uu);
|
||||
const Vec4<vfloat> v_n = BezierBasis::eval(vv);
|
||||
for (size_t i=0; i<N; i++) vfloat::store(valid,dPdu+i*dstride,eval(i,uu,vv,u_n,v_n)*dscale);
|
||||
}
|
||||
{
|
||||
assert(dPdv);
|
||||
const Vec4<vfloat> u_n = BezierBasis::eval(uu);
|
||||
const Vec4<vfloat> v_n = BezierBasis::derivative(vv);
|
||||
for (size_t i=0; i<N; i++) vfloat::store(valid,dPdv+i*dstride,eval(i,uu,vv,u_n,v_n)*dscale);
|
||||
}
|
||||
}
|
||||
if (ddPdudu)
|
||||
{
|
||||
{
|
||||
assert(ddPdudu);
|
||||
const Vec4<vfloat> u_n = BezierBasis::derivative2(uu);
|
||||
const Vec4<vfloat> v_n = BezierBasis::eval(vv);
|
||||
for (size_t i=0; i<N; i++) vfloat::store(valid,ddPdudu+i*dstride,eval(i,uu,vv,u_n,v_n)*sqr(dscale));
|
||||
}
|
||||
{
|
||||
assert(ddPdvdv);
|
||||
const Vec4<vfloat> u_n = BezierBasis::eval(uu);
|
||||
const Vec4<vfloat> v_n = BezierBasis::derivative2(vv);
|
||||
for (size_t i=0; i<N; i++) vfloat::store(valid,ddPdvdv+i*dstride,eval(i,uu,vv,u_n,v_n)*sqr(dscale));
|
||||
}
|
||||
{
|
||||
assert(ddPdudv);
|
||||
const Vec4<vfloat> u_n = BezierBasis::derivative(uu);
|
||||
const Vec4<vfloat> v_n = BezierBasis::derivative(vv);
|
||||
for (size_t i=0; i<N; i++) vfloat::store(valid,ddPdudv+i*dstride,eval(i,uu,vv,u_n,v_n)*sqr(dscale));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static __forceinline Vec3<T> eval(const Vertex matrix[4][4], const T& uu, const T& vv)
|
||||
{
|
||||
const T one_minus_uu = 1.0f - uu;
|
||||
const T one_minus_vv = 1.0f - vv;
|
||||
|
||||
const T B0_u = one_minus_uu * one_minus_uu * one_minus_uu;
|
||||
const T B0_v = one_minus_vv * one_minus_vv * one_minus_vv;
|
||||
const T B1_u = 3.0f * (one_minus_uu * uu * one_minus_uu);
|
||||
const T B1_v = 3.0f * (one_minus_vv * vv * one_minus_vv);
|
||||
const T B2_u = 3.0f * (uu * one_minus_uu * uu);
|
||||
const T B2_v = 3.0f * (vv * one_minus_vv * vv);
|
||||
const T B3_u = uu * uu * uu;
|
||||
const T B3_v = vv * vv * vv;
|
||||
|
||||
const T x =
|
||||
madd(B0_v,madd(B0_u,matrix[0][0].x,madd(B1_u,matrix[0][1].x,madd(B2_u,matrix[0][2].x,B3_u*matrix[0][3].x))),
|
||||
madd(B1_v,madd(B0_u,matrix[1][0].x,madd(B1_u,matrix[1][1].x,madd(B2_u,matrix[1][2].x,B3_u*matrix[1][3].x))),
|
||||
madd(B2_v,madd(B0_u,matrix[2][0].x,madd(B1_u,matrix[2][1].x,madd(B2_u,matrix[2][2].x,B3_u*matrix[2][3].x))),
|
||||
B3_v*madd(B0_u,matrix[3][0].x,madd(B1_u,matrix[3][1].x,madd(B2_u,matrix[3][2].x,B3_u*matrix[3][3].x))))));
|
||||
|
||||
const T y =
|
||||
madd(B0_v,madd(B0_u,matrix[0][0].y,madd(B1_u,matrix[0][1].y,madd(B2_u,matrix[0][2].y,B3_u*matrix[0][3].y))),
|
||||
madd(B1_v,madd(B0_u,matrix[1][0].y,madd(B1_u,matrix[1][1].y,madd(B2_u,matrix[1][2].y,B3_u*matrix[1][3].y))),
|
||||
madd(B2_v,madd(B0_u,matrix[2][0].y,madd(B1_u,matrix[2][1].y,madd(B2_u,matrix[2][2].y,B3_u*matrix[2][3].y))),
|
||||
B3_v*madd(B0_u,matrix[3][0].y,madd(B1_u,matrix[3][1].y,madd(B2_u,matrix[3][2].y,B3_u*matrix[3][3].y))))));
|
||||
|
||||
const T z =
|
||||
madd(B0_v,madd(B0_u,matrix[0][0].z,madd(B1_u,matrix[0][1].z,madd(B2_u,matrix[0][2].z,B3_u*matrix[0][3].z))),
|
||||
madd(B1_v,madd(B0_u,matrix[1][0].z,madd(B1_u,matrix[1][1].z,madd(B2_u,matrix[1][2].z,B3_u*matrix[1][3].z))),
|
||||
madd(B2_v,madd(B0_u,matrix[2][0].z,madd(B1_u,matrix[2][1].z,madd(B2_u,matrix[2][2].z,B3_u*matrix[2][3].z))),
|
||||
B3_v*madd(B0_u,matrix[3][0].z,madd(B1_u,matrix[3][1].z,madd(B2_u,matrix[3][2].z,B3_u*matrix[3][3].z))))));
|
||||
|
||||
return Vec3<T>(x,y,z);
|
||||
}
|
||||
|
||||
template<typename vfloat>
|
||||
__forceinline Vec3<vfloat> eval(const vfloat& uu, const vfloat& vv) const {
|
||||
return eval(matrix,uu,vv);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
static __forceinline Vec3<T> normal(const Vertex matrix[4][4], const T& uu, const T& vv)
|
||||
{
|
||||
|
||||
const Vec3<T> matrix_00 = Vec3<T>(matrix[0][0].x,matrix[0][0].y,matrix[0][0].z);
|
||||
const Vec3<T> matrix_01 = Vec3<T>(matrix[0][1].x,matrix[0][1].y,matrix[0][1].z);
|
||||
const Vec3<T> matrix_02 = Vec3<T>(matrix[0][2].x,matrix[0][2].y,matrix[0][2].z);
|
||||
const Vec3<T> matrix_03 = Vec3<T>(matrix[0][3].x,matrix[0][3].y,matrix[0][3].z);
|
||||
|
||||
const Vec3<T> matrix_10 = Vec3<T>(matrix[1][0].x,matrix[1][0].y,matrix[1][0].z);
|
||||
const Vec3<T> matrix_11 = Vec3<T>(matrix[1][1].x,matrix[1][1].y,matrix[1][1].z);
|
||||
const Vec3<T> matrix_12 = Vec3<T>(matrix[1][2].x,matrix[1][2].y,matrix[1][2].z);
|
||||
const Vec3<T> matrix_13 = Vec3<T>(matrix[1][3].x,matrix[1][3].y,matrix[1][3].z);
|
||||
|
||||
const Vec3<T> matrix_20 = Vec3<T>(matrix[2][0].x,matrix[2][0].y,matrix[2][0].z);
|
||||
const Vec3<T> matrix_21 = Vec3<T>(matrix[2][1].x,matrix[2][1].y,matrix[2][1].z);
|
||||
const Vec3<T> matrix_22 = Vec3<T>(matrix[2][2].x,matrix[2][2].y,matrix[2][2].z);
|
||||
const Vec3<T> matrix_23 = Vec3<T>(matrix[2][3].x,matrix[2][3].y,matrix[2][3].z);
|
||||
|
||||
const Vec3<T> matrix_30 = Vec3<T>(matrix[3][0].x,matrix[3][0].y,matrix[3][0].z);
|
||||
const Vec3<T> matrix_31 = Vec3<T>(matrix[3][1].x,matrix[3][1].y,matrix[3][1].z);
|
||||
const Vec3<T> matrix_32 = Vec3<T>(matrix[3][2].x,matrix[3][2].y,matrix[3][2].z);
|
||||
const Vec3<T> matrix_33 = Vec3<T>(matrix[3][3].x,matrix[3][3].y,matrix[3][3].z);
|
||||
|
||||
/* tangentU */
|
||||
const Vec3<T> col0 = deCasteljau(vv, matrix_00, matrix_10, matrix_20, matrix_30);
|
||||
const Vec3<T> col1 = deCasteljau(vv, matrix_01, matrix_11, matrix_21, matrix_31);
|
||||
const Vec3<T> col2 = deCasteljau(vv, matrix_02, matrix_12, matrix_22, matrix_32);
|
||||
const Vec3<T> col3 = deCasteljau(vv, matrix_03, matrix_13, matrix_23, matrix_33);
|
||||
|
||||
const Vec3<T> tangentU = deCasteljau_tangent(uu, col0, col1, col2, col3);
|
||||
|
||||
/* tangentV */
|
||||
const Vec3<T> row0 = deCasteljau(uu, matrix_00, matrix_01, matrix_02, matrix_03);
|
||||
const Vec3<T> row1 = deCasteljau(uu, matrix_10, matrix_11, matrix_12, matrix_13);
|
||||
const Vec3<T> row2 = deCasteljau(uu, matrix_20, matrix_21, matrix_22, matrix_23);
|
||||
const Vec3<T> row3 = deCasteljau(uu, matrix_30, matrix_31, matrix_32, matrix_33);
|
||||
|
||||
const Vec3<T> tangentV = deCasteljau_tangent(vv, row0, row1, row2, row3);
|
||||
|
||||
/* normal = tangentU x tangentV */
|
||||
const Vec3<T> n = cross(tangentU,tangentV);
|
||||
return n;
|
||||
}
|
||||
|
||||
template<typename vfloat>
|
||||
__forceinline Vec3<vfloat> normal(const vfloat& uu, const vfloat& vv) const {
|
||||
return normal(matrix,uu,vv);
|
||||
}
|
||||
};
|
||||
|
||||
typedef BezierPatchT<Vec3fa,Vec3fa_t> BezierPatch3fa;
|
||||
}
|
||||
191
Framework/external/embree/kernels/subdiv/bilinear_patch.h
vendored
Normal file
191
Framework/external/embree/kernels/subdiv/bilinear_patch.h
vendored
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "catmullclark_patch.h"
|
||||
#include "bezier_curve.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<typename Vertex, typename Vertex_t = Vertex>
|
||||
class __aligned(64) BilinearPatchT
|
||||
{
|
||||
typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClarkRing;
|
||||
typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch;
|
||||
|
||||
public:
|
||||
Vertex v[4];
|
||||
|
||||
public:
|
||||
|
||||
__forceinline BilinearPatchT () {}
|
||||
|
||||
__forceinline BilinearPatchT (const HalfEdge* edge, const BufferView<Vertex>& vertices) {
|
||||
init(edge,vertices.getPtr(),vertices.getStride());
|
||||
}
|
||||
|
||||
__forceinline BilinearPatchT (const HalfEdge* edge, const char* vertices, size_t stride) {
|
||||
init(edge,vertices,stride);
|
||||
}
|
||||
|
||||
__forceinline void init (const HalfEdge* edge, const char* vertices, size_t stride)
|
||||
{
|
||||
v[0] = Vertex::loadu(vertices+edge->getStartVertexIndex()*stride); edge = edge->next();
|
||||
v[1] = Vertex::loadu(vertices+edge->getStartVertexIndex()*stride); edge = edge->next();
|
||||
v[2] = Vertex::loadu(vertices+edge->getStartVertexIndex()*stride); edge = edge->next();
|
||||
v[3] = Vertex::loadu(vertices+edge->getStartVertexIndex()*stride); edge = edge->next();
|
||||
}
|
||||
|
||||
__forceinline BilinearPatchT (const CatmullClarkPatch& patch)
|
||||
{
|
||||
v[0] = patch.ring[0].getLimitVertex();
|
||||
v[1] = patch.ring[1].getLimitVertex();
|
||||
v[2] = patch.ring[2].getLimitVertex();
|
||||
v[3] = patch.ring[3].getLimitVertex();
|
||||
}
|
||||
|
||||
__forceinline BBox<Vertex> bounds() const
|
||||
{
|
||||
|
||||
BBox<Vertex> bounds (v[0]);
|
||||
bounds.extend(v[1]);
|
||||
bounds.extend(v[2]);
|
||||
bounds.extend(v[3]);
|
||||
return bounds;
|
||||
}
|
||||
|
||||
__forceinline Vertex eval(const float uu, const float vv) const {
|
||||
return lerp(lerp(v[0],v[1],uu),lerp(v[3],v[2],uu),vv);
|
||||
}
|
||||
|
||||
__forceinline Vertex eval_du(const float uu, const float vv) const {
|
||||
return lerp(v[1]-v[0],v[2]-v[3],vv);
|
||||
}
|
||||
|
||||
__forceinline Vertex eval_dv(const float uu, const float vv) const {
|
||||
return lerp(v[3]-v[0],v[2]-v[1],uu);
|
||||
}
|
||||
|
||||
__forceinline Vertex eval_dudu(const float uu, const float vv) const {
|
||||
return Vertex(zero);
|
||||
}
|
||||
|
||||
__forceinline Vertex eval_dvdv(const float uu, const float vv) const {
|
||||
return Vertex(zero);
|
||||
}
|
||||
|
||||
__forceinline Vertex eval_dudv(const float uu, const float vv) const {
|
||||
return (v[2]-v[3]) - (v[1]-v[0]);
|
||||
}
|
||||
|
||||
__forceinline Vertex normal(const float uu, const float vv) const {
|
||||
return cross(eval_du(uu,vv),eval_dv(uu,vv));
|
||||
}
|
||||
|
||||
__forceinline void eval(const float u, const float v,
|
||||
Vertex* P, Vertex* dPdu, Vertex* dPdv, Vertex* ddPdudu, Vertex* ddPdvdv, Vertex* ddPdudv,
|
||||
const float dscale = 1.0f) const
|
||||
{
|
||||
if (P) {
|
||||
*P = eval(u,v);
|
||||
}
|
||||
if (dPdu) {
|
||||
assert(dPdu); *dPdu = eval_du(u,v)*dscale;
|
||||
assert(dPdv); *dPdv = eval_dv(u,v)*dscale;
|
||||
}
|
||||
if (ddPdudu) {
|
||||
assert(ddPdudu); *ddPdudu = eval_dudu(u,v)*sqr(dscale);
|
||||
assert(ddPdvdv); *ddPdvdv = eval_dvdv(u,v)*sqr(dscale);
|
||||
assert(ddPdudv); *ddPdudv = eval_dudv(u,v)*sqr(dscale);
|
||||
}
|
||||
}
|
||||
|
||||
template<class vfloat>
|
||||
__forceinline Vec3<vfloat> eval(const vfloat& uu, const vfloat& vv) const
|
||||
{
|
||||
const vfloat x = lerp(lerp(v[0].x,v[1].x,uu),lerp(v[3].x,v[2].x,uu),vv);
|
||||
const vfloat y = lerp(lerp(v[0].y,v[1].y,uu),lerp(v[3].y,v[2].y,uu),vv);
|
||||
const vfloat z = lerp(lerp(v[0].z,v[1].z,uu),lerp(v[3].z,v[2].z,uu),vv);
|
||||
return Vec3<vfloat>(x,y,z);
|
||||
}
|
||||
|
||||
template<class vfloat>
|
||||
__forceinline Vec3<vfloat> eval_du(const vfloat& uu, const vfloat& vv) const
|
||||
{
|
||||
const vfloat x = lerp(v[1].x-v[0].x,v[2].x-v[3].x,vv);
|
||||
const vfloat y = lerp(v[1].y-v[0].y,v[2].y-v[3].y,vv);
|
||||
const vfloat z = lerp(v[1].z-v[0].z,v[2].z-v[3].z,vv);
|
||||
return Vec3<vfloat>(x,y,z);
|
||||
}
|
||||
|
||||
template<class vfloat>
|
||||
__forceinline Vec3<vfloat> eval_dv(const vfloat& uu, const vfloat& vv) const
|
||||
{
|
||||
const vfloat x = lerp(v[3].x-v[0].x,v[2].x-v[1].x,uu);
|
||||
const vfloat y = lerp(v[3].y-v[0].y,v[2].y-v[1].y,uu);
|
||||
const vfloat z = lerp(v[3].z-v[0].z,v[2].z-v[1].z,uu);
|
||||
return Vec3<vfloat>(x,y,z);
|
||||
}
|
||||
|
||||
template<typename vfloat>
|
||||
__forceinline Vec3<vfloat> normal(const vfloat& uu, const vfloat& vv) const {
|
||||
return cross(eval_du(uu,vv),eval_dv(uu,vv));
|
||||
}
|
||||
|
||||
template<class vfloat>
|
||||
__forceinline vfloat eval(const size_t i, const vfloat& uu, const vfloat& vv) const {
|
||||
return lerp(lerp(v[0][i],v[1][i],uu),lerp(v[3][i],v[2][i],uu),vv);
|
||||
}
|
||||
|
||||
template<class vfloat>
|
||||
__forceinline vfloat eval_du(const size_t i, const vfloat& uu, const vfloat& vv) const {
|
||||
return lerp(v[1][i]-v[0][i],v[2][i]-v[3][i],vv);
|
||||
}
|
||||
|
||||
template<class vfloat>
|
||||
__forceinline vfloat eval_dv(const size_t i, const vfloat& uu, const vfloat& vv) const {
|
||||
return lerp(v[3][i]-v[0][i],v[2][i]-v[1][i],uu);
|
||||
}
|
||||
|
||||
template<class vfloat>
|
||||
__forceinline vfloat eval_dudu(const size_t i, const vfloat& uu, const vfloat& vv) const {
|
||||
return vfloat(zero);
|
||||
}
|
||||
|
||||
template<class vfloat>
|
||||
__forceinline vfloat eval_dvdv(const size_t i, const vfloat& uu, const vfloat& vv) const {
|
||||
return vfloat(zero);
|
||||
}
|
||||
|
||||
template<class vfloat>
|
||||
__forceinline vfloat eval_dudv(const size_t i, const vfloat& uu, const vfloat& vv) const {
|
||||
return (v[2][i]-v[3][i]) - (v[1][i]-v[0][i]);
|
||||
}
|
||||
|
||||
template<typename vbool, typename vfloat>
|
||||
__forceinline void eval(const vbool& valid, const vfloat& uu, const vfloat& vv,
|
||||
float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv,
|
||||
const float dscale, const size_t dstride, const size_t N) const
|
||||
{
|
||||
if (P) {
|
||||
for (size_t i=0; i<N; i++) vfloat::store(valid,P+i*dstride,eval(i,uu,vv));
|
||||
}
|
||||
if (dPdu) {
|
||||
for (size_t i=0; i<N; i++) {
|
||||
assert(dPdu); vfloat::store(valid,dPdu+i*dstride,eval_du(i,uu,vv)*dscale);
|
||||
assert(dPdv); vfloat::store(valid,dPdv+i*dstride,eval_dv(i,uu,vv)*dscale);
|
||||
}
|
||||
}
|
||||
if (ddPdudu) {
|
||||
for (size_t i=0; i<N; i++) {
|
||||
assert(ddPdudu); vfloat::store(valid,ddPdudu+i*dstride,eval_dudu(i,uu,vv)*sqr(dscale));
|
||||
assert(ddPdvdv); vfloat::store(valid,ddPdvdv+i*dstride,eval_dvdv(i,uu,vv)*sqr(dscale));
|
||||
assert(ddPdudv); vfloat::store(valid,ddPdudv+i*dstride,eval_dudv(i,uu,vv)*sqr(dscale));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
typedef BilinearPatchT<Vec3fa,Vec3fa_t> BilinearPatch3fa;
|
||||
}
|
||||
30
Framework/external/embree/kernels/subdiv/bspline_curve.cpp
vendored
Normal file
30
Framework/external/embree/kernels/subdiv/bspline_curve.cpp
vendored
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bspline_curve.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
PrecomputedBSplineBasis::PrecomputedBSplineBasis(int dj)
|
||||
{
|
||||
for (size_t i=1; i<=N; i++)
|
||||
{
|
||||
for (size_t j=0; j<=N; j++)
|
||||
{
|
||||
const float u = float(j+dj)/float(i);
|
||||
const Vec4f f = BSplineBasis::eval(u);
|
||||
c0[i][j] = f.x;
|
||||
c1[i][j] = f.y;
|
||||
c2[i][j] = f.z;
|
||||
c3[i][j] = f.w;
|
||||
const Vec4f d = BSplineBasis::derivative(u);
|
||||
d0[i][j] = d.x;
|
||||
d1[i][j] = d.y;
|
||||
d2[i][j] = d.z;
|
||||
d3[i][j] = d.w;
|
||||
}
|
||||
}
|
||||
}
|
||||
PrecomputedBSplineBasis bspline_basis0(0);
|
||||
PrecomputedBSplineBasis bspline_basis1(1);
|
||||
}
|
||||
326
Framework/external/embree/kernels/subdiv/bspline_curve.h
vendored
Normal file
326
Framework/external/embree/kernels/subdiv/bspline_curve.h
vendored
Normal file
|
|
@ -0,0 +1,326 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../common/default.h"
|
||||
#include "bezier_curve.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
class BSplineBasis
|
||||
{
|
||||
public:
|
||||
|
||||
template<typename T>
|
||||
static __forceinline Vec4<T> eval(const T& u)
|
||||
{
|
||||
const T t = u;
|
||||
const T s = T(1.0f) - u;
|
||||
const T n0 = s*s*s;
|
||||
const T n1 = (4.0f*(s*s*s)+(t*t*t)) + (12.0f*((s*t)*s) + 6.0f*((t*s)*t));
|
||||
const T n2 = (4.0f*(t*t*t)+(s*s*s)) + (12.0f*((t*s)*t) + 6.0f*((s*t)*s));
|
||||
const T n3 = t*t*t;
|
||||
return T(1.0f/6.0f)*Vec4<T>(n0,n1,n2,n3);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static __forceinline Vec4<T> derivative(const T& u)
|
||||
{
|
||||
const T t = u;
|
||||
const T s = 1.0f - u;
|
||||
const T n0 = -s*s;
|
||||
const T n1 = -t*t - 4.0f*(t*s);
|
||||
const T n2 = s*s + 4.0f*(s*t);
|
||||
const T n3 = t*t;
|
||||
return T(0.5f)*Vec4<T>(n0,n1,n2,n3);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static __forceinline Vec4<T> derivative2(const T& u)
|
||||
{
|
||||
const T t = u;
|
||||
const T s = 1.0f - u;
|
||||
const T n0 = s;
|
||||
const T n1 = t - 2.0f*s;
|
||||
const T n2 = s - 2.0f*t;
|
||||
const T n3 = t;
|
||||
return Vec4<T>(n0,n1,n2,n3);
|
||||
}
|
||||
};
|
||||
|
||||
struct PrecomputedBSplineBasis
|
||||
{
|
||||
enum { N = 16 };
|
||||
public:
|
||||
PrecomputedBSplineBasis() {}
|
||||
PrecomputedBSplineBasis(int shift);
|
||||
|
||||
/* basis for bspline evaluation */
|
||||
public:
|
||||
float c0[N+1][N+1];
|
||||
float c1[N+1][N+1];
|
||||
float c2[N+1][N+1];
|
||||
float c3[N+1][N+1];
|
||||
|
||||
/* basis for bspline derivative evaluation */
|
||||
public:
|
||||
float d0[N+1][N+1];
|
||||
float d1[N+1][N+1];
|
||||
float d2[N+1][N+1];
|
||||
float d3[N+1][N+1];
|
||||
};
|
||||
extern PrecomputedBSplineBasis bspline_basis0;
|
||||
extern PrecomputedBSplineBasis bspline_basis1;
|
||||
|
||||
template<typename Vertex>
|
||||
struct BSplineCurveT
|
||||
{
|
||||
Vertex v0,v1,v2,v3;
|
||||
|
||||
__forceinline BSplineCurveT() {}
|
||||
|
||||
__forceinline BSplineCurveT(const Vertex& v0, const Vertex& v1, const Vertex& v2, const Vertex& v3)
|
||||
: v0(v0), v1(v1), v2(v2), v3(v3) {}
|
||||
|
||||
__forceinline Vertex begin() const {
|
||||
return madd(1.0f/6.0f,v0,madd(2.0f/3.0f,v1,1.0f/6.0f*v2));
|
||||
}
|
||||
|
||||
__forceinline Vertex end() const {
|
||||
return madd(1.0f/6.0f,v1,madd(2.0f/3.0f,v2,1.0f/6.0f*v3));
|
||||
}
|
||||
|
||||
__forceinline Vertex center() const {
|
||||
return 0.25f*(v0+v1+v2+v3);
|
||||
}
|
||||
|
||||
__forceinline BBox<Vertex> bounds() const {
|
||||
return merge(BBox<Vertex>(v0),BBox<Vertex>(v1),BBox<Vertex>(v2),BBox<Vertex>(v3));
|
||||
}
|
||||
|
||||
__forceinline friend BSplineCurveT operator -( const BSplineCurveT& a, const Vertex& b ) {
|
||||
return BSplineCurveT(a.v0-b,a.v1-b,a.v2-b,a.v3-b);
|
||||
}
|
||||
|
||||
__forceinline BSplineCurveT<Vec3ff> xfm_pr(const LinearSpace3fa& space, const Vec3fa& p) const
|
||||
{
|
||||
const Vec3ff q0(xfmVector(space,(Vec3fa)v0-p), v0.w);
|
||||
const Vec3ff q1(xfmVector(space,(Vec3fa)v1-p), v1.w);
|
||||
const Vec3ff q2(xfmVector(space,(Vec3fa)v2-p), v2.w);
|
||||
const Vec3ff q3(xfmVector(space,(Vec3fa)v3-p), v3.w);
|
||||
return BSplineCurveT<Vec3ff>(q0,q1,q2,q3);
|
||||
}
|
||||
|
||||
__forceinline Vertex eval(const float t) const
|
||||
{
|
||||
const Vec4<float> b = BSplineBasis::eval(t);
|
||||
return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
|
||||
}
|
||||
|
||||
__forceinline Vertex eval_du(const float t) const
|
||||
{
|
||||
const Vec4<float> b = BSplineBasis::derivative(t);
|
||||
return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
|
||||
}
|
||||
|
||||
__forceinline Vertex eval_dudu(const float t) const
|
||||
{
|
||||
const Vec4<float> b = BSplineBasis::derivative2(t);
|
||||
return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
|
||||
}
|
||||
|
||||
__forceinline void eval(const float t, Vertex& p, Vertex& dp) const
|
||||
{
|
||||
p = eval(t);
|
||||
dp = eval_du(t);
|
||||
}
|
||||
|
||||
__forceinline void eval(const float t, Vertex& p, Vertex& dp, Vertex& ddp) const
|
||||
{
|
||||
p = eval(t);
|
||||
dp = eval_du(t);
|
||||
ddp = eval_dudu(t);
|
||||
}
|
||||
|
||||
template<int M>
|
||||
__forceinline Vec4vf<M> veval(const vfloat<M>& t) const
|
||||
{
|
||||
const Vec4vf<M> b = BSplineBasis::eval(t);
|
||||
return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
|
||||
}
|
||||
|
||||
template<int M>
|
||||
__forceinline Vec4vf<M> veval_du(const vfloat<M>& t) const
|
||||
{
|
||||
const Vec4vf<M> b = BSplineBasis::derivative(t);
|
||||
return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
|
||||
}
|
||||
|
||||
template<int M>
|
||||
__forceinline Vec4vf<M> veval_dudu(const vfloat<M>& t) const
|
||||
{
|
||||
const Vec4vf<M> b = BSplineBasis::derivative2(t);
|
||||
return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
|
||||
}
|
||||
|
||||
template<int M>
|
||||
__forceinline void veval(const vfloat<M>& t, Vec4vf<M>& p, Vec4vf<M>& dp) const
|
||||
{
|
||||
p = veval<M>(t);
|
||||
dp = veval_du<M>(t);
|
||||
}
|
||||
|
||||
template<int M>
|
||||
__forceinline Vec4vf<M> eval0(const int ofs, const int size) const
|
||||
{
|
||||
assert(size <= PrecomputedBSplineBasis::N);
|
||||
assert(ofs <= size);
|
||||
return madd(vfloat<M>::loadu(&bspline_basis0.c0[size][ofs]), Vec4vf<M>(v0),
|
||||
madd(vfloat<M>::loadu(&bspline_basis0.c1[size][ofs]), Vec4vf<M>(v1),
|
||||
madd(vfloat<M>::loadu(&bspline_basis0.c2[size][ofs]), Vec4vf<M>(v2),
|
||||
vfloat<M>::loadu(&bspline_basis0.c3[size][ofs]) * Vec4vf<M>(v3))));
|
||||
}
|
||||
|
||||
template<int M>
|
||||
__forceinline Vec4vf<M> eval1(const int ofs, const int size) const
|
||||
{
|
||||
assert(size <= PrecomputedBSplineBasis::N);
|
||||
assert(ofs <= size);
|
||||
return madd(vfloat<M>::loadu(&bspline_basis1.c0[size][ofs]), Vec4vf<M>(v0),
|
||||
madd(vfloat<M>::loadu(&bspline_basis1.c1[size][ofs]), Vec4vf<M>(v1),
|
||||
madd(vfloat<M>::loadu(&bspline_basis1.c2[size][ofs]), Vec4vf<M>(v2),
|
||||
vfloat<M>::loadu(&bspline_basis1.c3[size][ofs]) * Vec4vf<M>(v3))));
|
||||
}
|
||||
|
||||
template<int M>
|
||||
__forceinline Vec4vf<M> derivative0(const int ofs, const int size) const
|
||||
{
|
||||
assert(size <= PrecomputedBSplineBasis::N);
|
||||
assert(ofs <= size);
|
||||
return madd(vfloat<M>::loadu(&bspline_basis0.d0[size][ofs]), Vec4vf<M>(v0),
|
||||
madd(vfloat<M>::loadu(&bspline_basis0.d1[size][ofs]), Vec4vf<M>(v1),
|
||||
madd(vfloat<M>::loadu(&bspline_basis0.d2[size][ofs]), Vec4vf<M>(v2),
|
||||
vfloat<M>::loadu(&bspline_basis0.d3[size][ofs]) * Vec4vf<M>(v3))));
|
||||
}
|
||||
|
||||
template<int M>
|
||||
__forceinline Vec4vf<M> derivative1(const int ofs, const int size) const
|
||||
{
|
||||
assert(size <= PrecomputedBSplineBasis::N);
|
||||
assert(ofs <= size);
|
||||
return madd(vfloat<M>::loadu(&bspline_basis1.d0[size][ofs]), Vec4vf<M>(v0),
|
||||
madd(vfloat<M>::loadu(&bspline_basis1.d1[size][ofs]), Vec4vf<M>(v1),
|
||||
madd(vfloat<M>::loadu(&bspline_basis1.d2[size][ofs]), Vec4vf<M>(v2),
|
||||
vfloat<M>::loadu(&bspline_basis1.d3[size][ofs]) * Vec4vf<M>(v3))));
|
||||
}
|
||||
|
||||
/* calculates bounds of bspline curve geometry */
|
||||
__forceinline BBox3fa accurateRoundBounds() const
|
||||
{
|
||||
const int N = 7;
|
||||
const float scale = 1.0f/(3.0f*(N-1));
|
||||
Vec4vfx pl(pos_inf), pu(neg_inf);
|
||||
for (int i=0; i<=N; i+=VSIZEX)
|
||||
{
|
||||
vintx vi = vintx(i)+vintx(step);
|
||||
vboolx valid = vi <= vintx(N);
|
||||
const Vec4vfx p = eval0<VSIZEX>(i,N);
|
||||
const Vec4vfx dp = derivative0<VSIZEX>(i,N);
|
||||
const Vec4vfx pm = p-Vec4vfx(scale)*select(vi!=vintx(0),dp,Vec4vfx(zero));
|
||||
const Vec4vfx pp = p+Vec4vfx(scale)*select(vi!=vintx(N),dp,Vec4vfx(zero));
|
||||
pl = select(valid,min(pl,p,pm,pp),pl); // FIXME: use masked min
|
||||
pu = select(valid,max(pu,p,pm,pp),pu); // FIXME: use masked min
|
||||
}
|
||||
const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z));
|
||||
const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z));
|
||||
const float r_min = reduce_min(pl.w);
|
||||
const float r_max = reduce_max(pu.w);
|
||||
const Vec3fa upper_r = Vec3fa(max(abs(r_min),abs(r_max)));
|
||||
return enlarge(BBox3fa(lower,upper),upper_r);
|
||||
}
|
||||
|
||||
/* calculates bounds when tessellated into N line segments */
|
||||
__forceinline BBox3fa accurateFlatBounds(int N) const
|
||||
{
|
||||
if (likely(N == 4))
|
||||
{
|
||||
const Vec4vf4 pi = eval0<4>(0,4);
|
||||
const Vec3fa lower(reduce_min(pi.x),reduce_min(pi.y),reduce_min(pi.z));
|
||||
const Vec3fa upper(reduce_max(pi.x),reduce_max(pi.y),reduce_max(pi.z));
|
||||
const Vec3fa upper_r = Vec3fa(reduce_max(abs(pi.w)));
|
||||
const Vec3ff pe = end();
|
||||
return enlarge(BBox3fa(min(lower,pe),max(upper,pe)),max(upper_r,Vec3fa(abs(pe.w))));
|
||||
}
|
||||
else
|
||||
{
|
||||
Vec3vfx pl(pos_inf), pu(neg_inf); vfloatx ru(0.0f);
|
||||
for (int i=0; i<=N; i+=VSIZEX)
|
||||
{
|
||||
vboolx valid = vintx(i)+vintx(step) <= vintx(N);
|
||||
const Vec4vfx pi = eval0<VSIZEX>(i,N);
|
||||
|
||||
pl.x = select(valid,min(pl.x,pi.x),pl.x); // FIXME: use masked min
|
||||
pl.y = select(valid,min(pl.y,pi.y),pl.y);
|
||||
pl.z = select(valid,min(pl.z,pi.z),pl.z);
|
||||
|
||||
pu.x = select(valid,max(pu.x,pi.x),pu.x); // FIXME: use masked min
|
||||
pu.y = select(valid,max(pu.y,pi.y),pu.y);
|
||||
pu.z = select(valid,max(pu.z,pi.z),pu.z);
|
||||
|
||||
ru = select(valid,max(ru,abs(pi.w)),ru);
|
||||
}
|
||||
const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z));
|
||||
const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z));
|
||||
const Vec3fa upper_r(reduce_max(ru));
|
||||
return enlarge(BBox3fa(lower,upper),upper_r);
|
||||
}
|
||||
}
|
||||
|
||||
friend __forceinline embree_ostream operator<<(embree_ostream cout, const BSplineCurveT& curve) {
|
||||
return cout << "BSplineCurve { v0 = " << curve.v0 << ", v1 = " << curve.v1 << ", v2 = " << curve.v2 << ", v3 = " << curve.v3 << " }";
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Vertex>
|
||||
__forceinline void convert(const BezierCurveT<Vertex>& icurve, BezierCurveT<Vertex>& ocurve) {
|
||||
ocurve = icurve;
|
||||
}
|
||||
|
||||
template<typename Vertex>
|
||||
__forceinline void convert(const BSplineCurveT<Vertex>& icurve, BSplineCurveT<Vertex>& ocurve) {
|
||||
ocurve = icurve;
|
||||
}
|
||||
|
||||
template<typename Vertex>
|
||||
__forceinline void convert(const BezierCurveT<Vertex>& icurve, BSplineCurveT<Vertex>& ocurve)
|
||||
{
|
||||
const Vertex v0 = madd(6.0f,icurve.v0,madd(-7.0f,icurve.v1,2.0f*icurve.v2));
|
||||
const Vertex v1 = msub(2.0f,icurve.v1,icurve.v2);
|
||||
const Vertex v2 = msub(2.0f,icurve.v2,icurve.v1);
|
||||
const Vertex v3 = madd(2.0f,icurve.v1,madd(-7.0f,icurve.v2,6.0f*icurve.v3));
|
||||
ocurve = BSplineCurveT<Vertex>(v0,v1,v2,v3);
|
||||
}
|
||||
|
||||
template<typename Vertex>
|
||||
__forceinline void convert(const BSplineCurveT<Vertex>& icurve, BezierCurveT<Vertex>& ocurve)
|
||||
{
|
||||
const Vertex v0 = madd(1.0f/6.0f,icurve.v0,madd(2.0f/3.0f,icurve.v1,1.0f/6.0f*icurve.v2));
|
||||
const Vertex v1 = madd(2.0f/3.0f,icurve.v1,1.0f/3.0f*icurve.v2);
|
||||
const Vertex v2 = madd(1.0f/3.0f,icurve.v1,2.0f/3.0f*icurve.v2);
|
||||
const Vertex v3 = madd(1.0f/6.0f,icurve.v1,madd(2.0f/3.0f,icurve.v2,1.0f/6.0f*icurve.v3));
|
||||
ocurve = BezierCurveT<Vertex>(v0,v1,v2,v3);
|
||||
}
|
||||
|
||||
template<typename CurveGeometry>
|
||||
__forceinline BSplineCurveT<Vec3ff> enlargeRadiusToMinWidth(const RayQueryContext* context, const CurveGeometry* geom, const Vec3fa& ray_org, const BSplineCurveT<Vec3ff>& curve)
|
||||
{
|
||||
return BSplineCurveT<Vec3ff>(enlargeRadiusToMinWidth(context,geom,ray_org,curve.v0),
|
||||
enlargeRadiusToMinWidth(context,geom,ray_org,curve.v1),
|
||||
enlargeRadiusToMinWidth(context,geom,ray_org,curve.v2),
|
||||
enlargeRadiusToMinWidth(context,geom,ray_org,curve.v3));
|
||||
}
|
||||
|
||||
typedef BSplineCurveT<Vec3fa> BSplineCurve3fa;
|
||||
}
|
||||
|
||||
449
Framework/external/embree/kernels/subdiv/bspline_patch.h
vendored
Normal file
449
Framework/external/embree/kernels/subdiv/bspline_patch.h
vendored
Normal file
|
|
@ -0,0 +1,449 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "catmullclark_patch.h"
|
||||
#include "bspline_curve.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<typename Vertex, typename Vertex_t = Vertex>
|
||||
class __aligned(64) BSplinePatchT
|
||||
{
|
||||
typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClarkRing;
|
||||
typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch;
|
||||
|
||||
public:
|
||||
|
||||
__forceinline BSplinePatchT () {}
|
||||
|
||||
__forceinline BSplinePatchT (const CatmullClarkPatch& patch) {
|
||||
init(patch);
|
||||
}
|
||||
|
||||
__forceinline BSplinePatchT(const CatmullClarkPatch& patch,
|
||||
const BezierCurveT<Vertex>* border0,
|
||||
const BezierCurveT<Vertex>* border1,
|
||||
const BezierCurveT<Vertex>* border2,
|
||||
const BezierCurveT<Vertex>* border3)
|
||||
{
|
||||
init(patch);
|
||||
}
|
||||
|
||||
__forceinline BSplinePatchT (const HalfEdge* edge, const char* vertices, size_t stride) {
|
||||
init(edge,vertices,stride);
|
||||
}
|
||||
|
||||
__forceinline Vertex hard_corner(const Vertex& v01, const Vertex& v02,
|
||||
const Vertex& v10, const Vertex& v11, const Vertex& v12,
|
||||
const Vertex& v20, const Vertex& v21, const Vertex& v22)
|
||||
{
|
||||
return 4.0f*v11 - 2.0f*(v12+v21) + v22;
|
||||
}
|
||||
|
||||
__forceinline Vertex soft_convex_corner( const Vertex& v01, const Vertex& v02,
|
||||
const Vertex& v10, const Vertex& v11, const Vertex& v12,
|
||||
const Vertex& v20, const Vertex& v21, const Vertex& v22)
|
||||
{
|
||||
return -8.0f*v11 + 4.0f*(v12+v21) + v22;
|
||||
}
|
||||
|
||||
__forceinline Vertex convex_corner(const float vertex_crease_weight,
|
||||
const Vertex& v01, const Vertex& v02,
|
||||
const Vertex& v10, const Vertex& v11, const Vertex& v12,
|
||||
const Vertex& v20, const Vertex& v21, const Vertex& v22)
|
||||
{
|
||||
if (std::isinf(vertex_crease_weight)) return hard_corner(v01,v02,v10,v11,v12,v20,v21,v22);
|
||||
else return soft_convex_corner(v01,v02,v10,v11,v12,v20,v21,v22);
|
||||
}
|
||||
|
||||
__forceinline Vertex load(const HalfEdge* edge, const char* vertices, size_t stride) {
|
||||
return Vertex_t::loadu(vertices+edge->getStartVertexIndex()*stride);
|
||||
}
|
||||
|
||||
__forceinline void init_border(const CatmullClarkRing& edge0,
|
||||
Vertex& v01, Vertex& v02,
|
||||
const Vertex& v11, const Vertex& v12,
|
||||
const Vertex& v21, const Vertex& v22)
|
||||
{
|
||||
if (likely(edge0.has_opposite_back(0)))
|
||||
{
|
||||
v01 = edge0.back(2);
|
||||
v02 = edge0.back(1);
|
||||
} else {
|
||||
v01 = 2.0f*v11-v21;
|
||||
v02 = 2.0f*v12-v22;
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline void init_corner(const CatmullClarkRing& edge0,
|
||||
Vertex& v00, const Vertex& v01, const Vertex& v02,
|
||||
const Vertex& v10, const Vertex& v11, const Vertex& v12,
|
||||
const Vertex& v20, const Vertex& v21, const Vertex& v22)
|
||||
{
|
||||
const bool MAYBE_UNUSED has_back1 = edge0.has_opposite_back(1);
|
||||
const bool has_back0 = edge0.has_opposite_back(0);
|
||||
const bool has_front1 = edge0.has_opposite_front(1);
|
||||
const bool MAYBE_UNUSED has_front2 = edge0.has_opposite_front(2);
|
||||
|
||||
if (likely(has_back0)) {
|
||||
if (likely(has_front1)) { assert(has_back1 && has_front2); v00 = edge0.back(3); }
|
||||
else { assert(!has_back1); v00 = 2.0f*v01-v02; }
|
||||
}
|
||||
else {
|
||||
if (likely(has_front1)) { assert(!has_front2); v00 = 2.0f*v10-v20; }
|
||||
else v00 = convex_corner(edge0.vertex_crease_weight,v01,v02,v10,v11,v12,v20,v21,v22);
|
||||
}
|
||||
}
|
||||
|
||||
void init(const CatmullClarkPatch& patch)
|
||||
{
|
||||
/* fill inner vertices */
|
||||
const Vertex v11 = v[1][1] = patch.ring[0].vtx;
|
||||
const Vertex v12 = v[1][2] = patch.ring[1].vtx;
|
||||
const Vertex v22 = v[2][2] = patch.ring[2].vtx;
|
||||
const Vertex v21 = v[2][1] = patch.ring[3].vtx;
|
||||
|
||||
/* fill border vertices */
|
||||
init_border(patch.ring[0],v[0][1],v[0][2],v11,v12,v21,v22);
|
||||
init_border(patch.ring[1],v[1][3],v[2][3],v12,v22,v11,v21);
|
||||
init_border(patch.ring[2],v[3][2],v[3][1],v22,v21,v12,v11);
|
||||
init_border(patch.ring[3],v[2][0],v[1][0],v21,v11,v22,v12);
|
||||
|
||||
/* fill corner vertices */
|
||||
init_corner(patch.ring[0],v[0][0],v[0][1],v[0][2],v[1][0],v11,v12,v[2][0],v21,v22);
|
||||
init_corner(patch.ring[1],v[0][3],v[1][3],v[2][3],v[0][2],v12,v22,v[0][1],v11,v21);
|
||||
init_corner(patch.ring[2],v[3][3],v[3][2],v[3][1],v[2][3],v22,v21,v[1][3],v12,v11);
|
||||
init_corner(patch.ring[3],v[3][0],v[2][0],v[1][0],v[3][1],v21,v11,v[3][2],v22,v12);
|
||||
}
|
||||
|
||||
void init_border(const HalfEdge* edge0, const char* vertices, size_t stride,
|
||||
Vertex& v01, Vertex& v02,
|
||||
const Vertex& v11, const Vertex& v12,
|
||||
const Vertex& v21, const Vertex& v22)
|
||||
{
|
||||
if (likely(edge0->hasOpposite()))
|
||||
{
|
||||
const HalfEdge* e = edge0->opposite()->next()->next();
|
||||
v01 = load(e,vertices,stride);
|
||||
v02 = load(e->next(),vertices,stride);
|
||||
} else {
|
||||
v01 = 2.0f*v11-v21;
|
||||
v02 = 2.0f*v12-v22;
|
||||
}
|
||||
}
|
||||
|
||||
void init_corner(const HalfEdge* edge0, const char* vertices, size_t stride,
|
||||
Vertex& v00, const Vertex& v01, const Vertex& v02,
|
||||
const Vertex& v10, const Vertex& v11, const Vertex& v12,
|
||||
const Vertex& v20, const Vertex& v21, const Vertex& v22)
|
||||
{
|
||||
const bool has_back0 = edge0->hasOpposite();
|
||||
const bool has_front1 = edge0->prev()->hasOpposite();
|
||||
|
||||
if (likely(has_back0))
|
||||
{
|
||||
const HalfEdge* e = edge0->opposite()->next();
|
||||
if (likely(has_front1))
|
||||
{
|
||||
assert(e->hasOpposite());
|
||||
assert(edge0->prev()->opposite()->prev()->hasOpposite());
|
||||
v00 = load(e->opposite()->prev(),vertices,stride);
|
||||
}
|
||||
else {
|
||||
assert(!e->hasOpposite());
|
||||
v00 = 2.0f*v01-v02;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (likely(has_front1)) {
|
||||
assert(!edge0->prev()->opposite()->prev()->hasOpposite());
|
||||
v00 = 2.0f*v10-v20;
|
||||
}
|
||||
else {
|
||||
assert(edge0->vertex_crease_weight == 0.0f || std::isinf(edge0->vertex_crease_weight));
|
||||
v00 = convex_corner(edge0->vertex_crease_weight,v01,v02,v10,v11,v12,v20,v21,v22);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void init(const HalfEdge* edge0, const char* vertices, size_t stride)
|
||||
{
|
||||
assert( edge0->isRegularFace() );
|
||||
|
||||
/* fill inner vertices */
|
||||
const Vertex v11 = v[1][1] = load(edge0,vertices,stride); const HalfEdge* edge1 = edge0->next();
|
||||
const Vertex v12 = v[1][2] = load(edge1,vertices,stride); const HalfEdge* edge2 = edge1->next();
|
||||
const Vertex v22 = v[2][2] = load(edge2,vertices,stride); const HalfEdge* edge3 = edge2->next();
|
||||
const Vertex v21 = v[2][1] = load(edge3,vertices,stride); assert(edge0 == edge3->next());
|
||||
|
||||
/* fill border vertices */
|
||||
init_border(edge0,vertices,stride,v[0][1],v[0][2],v11,v12,v21,v22);
|
||||
init_border(edge1,vertices,stride,v[1][3],v[2][3],v12,v22,v11,v21);
|
||||
init_border(edge2,vertices,stride,v[3][2],v[3][1],v22,v21,v12,v11);
|
||||
init_border(edge3,vertices,stride,v[2][0],v[1][0],v21,v11,v22,v12);
|
||||
|
||||
/* fill corner vertices */
|
||||
init_corner(edge0,vertices,stride,v[0][0],v[0][1],v[0][2],v[1][0],v11,v12,v[2][0],v21,v22);
|
||||
init_corner(edge1,vertices,stride,v[0][3],v[1][3],v[2][3],v[0][2],v12,v22,v[0][1],v11,v21);
|
||||
init_corner(edge2,vertices,stride,v[3][3],v[3][2],v[3][1],v[2][3],v22,v21,v[1][3],v12,v11);
|
||||
init_corner(edge3,vertices,stride,v[3][0],v[2][0],v[1][0],v[3][1],v21,v11,v[3][2],v22,v12);
|
||||
}
|
||||
|
||||
__forceinline BBox<Vertex> bounds() const
|
||||
{
|
||||
const Vertex* const cv = &v[0][0];
|
||||
BBox<Vertex> bounds (cv[0]);
|
||||
for (size_t i=1; i<16 ; i++)
|
||||
bounds.extend( cv[i] );
|
||||
return bounds;
|
||||
}
|
||||
|
||||
__forceinline Vertex eval(const float uu, const float vv) const
|
||||
{
|
||||
const Vec4f v_n = BSplineBasis::eval(vv);
|
||||
const Vertex_t curve0 = madd(v_n[0],v[0][0],madd(v_n[1],v[1][0],madd(v_n[2],v[2][0],v_n[3] * v[3][0])));
|
||||
const Vertex_t curve1 = madd(v_n[0],v[0][1],madd(v_n[1],v[1][1],madd(v_n[2],v[2][1],v_n[3] * v[3][1])));
|
||||
const Vertex_t curve2 = madd(v_n[0],v[0][2],madd(v_n[1],v[1][2],madd(v_n[2],v[2][2],v_n[3] * v[3][2])));
|
||||
const Vertex_t curve3 = madd(v_n[0],v[0][3],madd(v_n[1],v[1][3],madd(v_n[2],v[2][3],v_n[3] * v[3][3])));
|
||||
|
||||
const Vec4f u_n = BSplineBasis::eval(uu);
|
||||
return madd(u_n[0],curve0,madd(u_n[1],curve1,madd(u_n[2],curve2,u_n[3] * curve3)));
|
||||
}
|
||||
|
||||
__forceinline Vertex eval_du(const float uu, const float vv) const
|
||||
{
|
||||
const Vec4f v_n = BSplineBasis::eval(vv);
|
||||
const Vertex_t curve0 = madd(v_n[0],v[0][0],madd(v_n[1],v[1][0],madd(v_n[2],v[2][0],v_n[3] * v[3][0])));
|
||||
const Vertex_t curve1 = madd(v_n[0],v[0][1],madd(v_n[1],v[1][1],madd(v_n[2],v[2][1],v_n[3] * v[3][1])));
|
||||
const Vertex_t curve2 = madd(v_n[0],v[0][2],madd(v_n[1],v[1][2],madd(v_n[2],v[2][2],v_n[3] * v[3][2])));
|
||||
const Vertex_t curve3 = madd(v_n[0],v[0][3],madd(v_n[1],v[1][3],madd(v_n[2],v[2][3],v_n[3] * v[3][3])));
|
||||
|
||||
const Vec4f u_n = BSplineBasis::derivative(uu);
|
||||
return madd(u_n[0],curve0,madd(u_n[1],curve1,madd(u_n[2],curve2,u_n[3] * curve3)));
|
||||
}
|
||||
|
||||
__forceinline Vertex eval_dv(const float uu, const float vv) const
|
||||
{
|
||||
const Vec4f v_n = BSplineBasis::derivative(vv);
|
||||
const Vertex_t curve0 = madd(v_n[0],v[0][0],madd(v_n[1],v[1][0],madd(v_n[2],v[2][0],v_n[3] * v[3][0])));
|
||||
const Vertex_t curve1 = madd(v_n[0],v[0][1],madd(v_n[1],v[1][1],madd(v_n[2],v[2][1],v_n[3] * v[3][1])));
|
||||
const Vertex_t curve2 = madd(v_n[0],v[0][2],madd(v_n[1],v[1][2],madd(v_n[2],v[2][2],v_n[3] * v[3][2])));
|
||||
const Vertex_t curve3 = madd(v_n[0],v[0][3],madd(v_n[1],v[1][3],madd(v_n[2],v[2][3],v_n[3] * v[3][3])));
|
||||
|
||||
const Vec4f u_n = BSplineBasis::eval(uu);
|
||||
return madd(u_n[0],curve0,madd(u_n[1],curve1,madd(u_n[2],curve2,u_n[3] * curve3)));
|
||||
}
|
||||
|
||||
__forceinline Vertex eval_dudu(const float uu, const float vv) const
|
||||
{
|
||||
const Vec4f v_n = BSplineBasis::eval(vv);
|
||||
const Vertex_t curve0 = madd(v_n[0],v[0][0],madd(v_n[1],v[1][0],madd(v_n[2],v[2][0],v_n[3] * v[3][0])));
|
||||
const Vertex_t curve1 = madd(v_n[0],v[0][1],madd(v_n[1],v[1][1],madd(v_n[2],v[2][1],v_n[3] * v[3][1])));
|
||||
const Vertex_t curve2 = madd(v_n[0],v[0][2],madd(v_n[1],v[1][2],madd(v_n[2],v[2][2],v_n[3] * v[3][2])));
|
||||
const Vertex_t curve3 = madd(v_n[0],v[0][3],madd(v_n[1],v[1][3],madd(v_n[2],v[2][3],v_n[3] * v[3][3])));
|
||||
|
||||
const Vec4f u_n = BSplineBasis::derivative2(uu);
|
||||
return madd(u_n[0],curve0,madd(u_n[1],curve1,madd(u_n[2],curve2,u_n[3] * curve3)));
|
||||
}
|
||||
|
||||
__forceinline Vertex eval_dvdv(const float uu, const float vv) const
|
||||
{
|
||||
const Vec4f v_n = BSplineBasis::derivative2(vv);
|
||||
const Vertex_t curve0 = madd(v_n[0],v[0][0],madd(v_n[1],v[1][0],madd(v_n[2],v[2][0],v_n[3] * v[3][0])));
|
||||
const Vertex_t curve1 = madd(v_n[0],v[0][1],madd(v_n[1],v[1][1],madd(v_n[2],v[2][1],v_n[3] * v[3][1])));
|
||||
const Vertex_t curve2 = madd(v_n[0],v[0][2],madd(v_n[1],v[1][2],madd(v_n[2],v[2][2],v_n[3] * v[3][2])));
|
||||
const Vertex_t curve3 = madd(v_n[0],v[0][3],madd(v_n[1],v[1][3],madd(v_n[2],v[2][3],v_n[3] * v[3][3])));
|
||||
|
||||
const Vec4f u_n = BSplineBasis::eval(uu);
|
||||
return madd(u_n[0],curve0,madd(u_n[1],curve1,madd(u_n[2],curve2,u_n[3] * curve3)));
|
||||
}
|
||||
|
||||
__forceinline Vertex eval_dudv(const float uu, const float vv) const
|
||||
{
|
||||
const Vec4f v_n = BSplineBasis::derivative(vv);
|
||||
const Vertex_t curve0 = madd(v_n[0],v[0][0],madd(v_n[1],v[1][0],madd(v_n[2],v[2][0],v_n[3] * v[3][0])));
|
||||
const Vertex_t curve1 = madd(v_n[0],v[0][1],madd(v_n[1],v[1][1],madd(v_n[2],v[2][1],v_n[3] * v[3][1])));
|
||||
const Vertex_t curve2 = madd(v_n[0],v[0][2],madd(v_n[1],v[1][2],madd(v_n[2],v[2][2],v_n[3] * v[3][2])));
|
||||
const Vertex_t curve3 = madd(v_n[0],v[0][3],madd(v_n[1],v[1][3],madd(v_n[2],v[2][3],v_n[3] * v[3][3])));
|
||||
|
||||
const Vec4f u_n = BSplineBasis::derivative(uu);
|
||||
return madd(u_n[0],curve0,madd(u_n[1],curve1,madd(u_n[2],curve2,u_n[3] * curve3)));
|
||||
}
|
||||
|
||||
__forceinline Vertex normal(const float uu, const float vv) const
|
||||
{
|
||||
const Vertex tu = eval_du(uu,vv);
|
||||
const Vertex tv = eval_dv(uu,vv);
|
||||
return cross(tu,tv);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__forceinline Vec3<T> eval(const T& uu, const T& vv, const Vec4<T>& u_n, const Vec4<T>& v_n) const
|
||||
{
|
||||
const T curve0_x = madd(v_n[0],T(v[0][0].x),madd(v_n[1],T(v[1][0].x),madd(v_n[2],T(v[2][0].x),v_n[3] * T(v[3][0].x))));
|
||||
const T curve1_x = madd(v_n[0],T(v[0][1].x),madd(v_n[1],T(v[1][1].x),madd(v_n[2],T(v[2][1].x),v_n[3] * T(v[3][1].x))));
|
||||
const T curve2_x = madd(v_n[0],T(v[0][2].x),madd(v_n[1],T(v[1][2].x),madd(v_n[2],T(v[2][2].x),v_n[3] * T(v[3][2].x))));
|
||||
const T curve3_x = madd(v_n[0],T(v[0][3].x),madd(v_n[1],T(v[1][3].x),madd(v_n[2],T(v[2][3].x),v_n[3] * T(v[3][3].x))));
|
||||
const T x = madd(u_n[0],curve0_x,madd(u_n[1],curve1_x,madd(u_n[2],curve2_x,u_n[3] * curve3_x)));
|
||||
|
||||
const T curve0_y = madd(v_n[0],T(v[0][0].y),madd(v_n[1],T(v[1][0].y),madd(v_n[2],T(v[2][0].y),v_n[3] * T(v[3][0].y))));
|
||||
const T curve1_y = madd(v_n[0],T(v[0][1].y),madd(v_n[1],T(v[1][1].y),madd(v_n[2],T(v[2][1].y),v_n[3] * T(v[3][1].y))));
|
||||
const T curve2_y = madd(v_n[0],T(v[0][2].y),madd(v_n[1],T(v[1][2].y),madd(v_n[2],T(v[2][2].y),v_n[3] * T(v[3][2].y))));
|
||||
const T curve3_y = madd(v_n[0],T(v[0][3].y),madd(v_n[1],T(v[1][3].y),madd(v_n[2],T(v[2][3].y),v_n[3] * T(v[3][3].y))));
|
||||
const T y = madd(u_n[0],curve0_y,madd(u_n[1],curve1_y,madd(u_n[2],curve2_y,u_n[3] * curve3_y)));
|
||||
|
||||
const T curve0_z = madd(v_n[0],T(v[0][0].z),madd(v_n[1],T(v[1][0].z),madd(v_n[2],T(v[2][0].z),v_n[3] * T(v[3][0].z))));
|
||||
const T curve1_z = madd(v_n[0],T(v[0][1].z),madd(v_n[1],T(v[1][1].z),madd(v_n[2],T(v[2][1].z),v_n[3] * T(v[3][1].z))));
|
||||
const T curve2_z = madd(v_n[0],T(v[0][2].z),madd(v_n[1],T(v[1][2].z),madd(v_n[2],T(v[2][2].z),v_n[3] * T(v[3][2].z))));
|
||||
const T curve3_z = madd(v_n[0],T(v[0][3].z),madd(v_n[1],T(v[1][3].z),madd(v_n[2],T(v[2][3].z),v_n[3] * T(v[3][3].z))));
|
||||
const T z = madd(u_n[0],curve0_z,madd(u_n[1],curve1_z,madd(u_n[2],curve2_z,u_n[3] * curve3_z)));
|
||||
|
||||
return Vec3<T>(x,y,z);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__forceinline Vec3<T> eval(const T& uu, const T& vv) const
|
||||
{
|
||||
const Vec4<T> u_n = BSplineBasis::eval(uu);
|
||||
const Vec4<T> v_n = BSplineBasis::eval(vv);
|
||||
return eval(uu,vv,u_n,v_n);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__forceinline Vec3<T> eval_du(const T& uu, const T& vv) const
|
||||
{
|
||||
const Vec4<T> u_n = BSplineBasis::derivative(uu);
|
||||
const Vec4<T> v_n = BSplineBasis::eval(vv);
|
||||
return eval(uu,vv,u_n,v_n);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__forceinline Vec3<T> eval_dv(const T& uu, const T& vv) const
|
||||
{
|
||||
const Vec4<T> u_n = BSplineBasis::eval(uu);
|
||||
const Vec4<T> v_n = BSplineBasis::derivative(vv);
|
||||
return eval(uu,vv,u_n,v_n);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__forceinline Vec3<T> eval_dudu(const T& uu, const T& vv) const
|
||||
{
|
||||
const Vec4<T> u_n = BSplineBasis::derivative2(uu);
|
||||
const Vec4<T> v_n = BSplineBasis::eval(vv);
|
||||
return eval(uu,vv,u_n,v_n);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__forceinline Vec3<T> eval_dvdv(const T& uu, const T& vv) const
|
||||
{
|
||||
const Vec4<T> u_n = BSplineBasis::eval(uu);
|
||||
const Vec4<T> v_n = BSplineBasis::derivative2(vv);
|
||||
return eval(uu,vv,u_n,v_n);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__forceinline Vec3<T> eval_dudv(const T& uu, const T& vv) const
|
||||
{
|
||||
const Vec4<T> u_n = BSplineBasis::derivative(uu);
|
||||
const Vec4<T> v_n = BSplineBasis::derivative(vv);
|
||||
return eval(uu,vv,u_n,v_n);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__forceinline Vec3<T> normal(const T& uu, const T& vv) const {
|
||||
return cross(eval_du(uu,vv),eval_dv(uu,vv));
|
||||
}
|
||||
|
||||
void eval(const float u, const float v,
|
||||
Vertex* P, Vertex* dPdu, Vertex* dPdv, Vertex* ddPdudu, Vertex* ddPdvdv, Vertex* ddPdudv,
|
||||
const float dscale = 1.0f) const
|
||||
{
|
||||
if (P) {
|
||||
*P = eval(u,v);
|
||||
}
|
||||
if (dPdu) {
|
||||
assert(dPdu); *dPdu = eval_du(u,v)*dscale;
|
||||
assert(dPdv); *dPdv = eval_dv(u,v)*dscale;
|
||||
}
|
||||
if (ddPdudu) {
|
||||
assert(ddPdudu); *ddPdudu = eval_dudu(u,v)*sqr(dscale);
|
||||
assert(ddPdvdv); *ddPdvdv = eval_dvdv(u,v)*sqr(dscale);
|
||||
assert(ddPdudv); *ddPdudv = eval_dudv(u,v)*sqr(dscale);
|
||||
}
|
||||
}
|
||||
|
||||
template<class vfloat>
|
||||
__forceinline vfloat eval(const size_t i, const vfloat& uu, const vfloat& vv, const Vec4<vfloat>& u_n, const Vec4<vfloat>& v_n) const
|
||||
{
|
||||
const vfloat curve0_x = madd(v_n[0],vfloat(v[0][0][i]),madd(v_n[1],vfloat(v[1][0][i]),madd(v_n[2],vfloat(v[2][0][i]),v_n[3] * vfloat(v[3][0][i]))));
|
||||
const vfloat curve1_x = madd(v_n[0],vfloat(v[0][1][i]),madd(v_n[1],vfloat(v[1][1][i]),madd(v_n[2],vfloat(v[2][1][i]),v_n[3] * vfloat(v[3][1][i]))));
|
||||
const vfloat curve2_x = madd(v_n[0],vfloat(v[0][2][i]),madd(v_n[1],vfloat(v[1][2][i]),madd(v_n[2],vfloat(v[2][2][i]),v_n[3] * vfloat(v[3][2][i]))));
|
||||
const vfloat curve3_x = madd(v_n[0],vfloat(v[0][3][i]),madd(v_n[1],vfloat(v[1][3][i]),madd(v_n[2],vfloat(v[2][3][i]),v_n[3] * vfloat(v[3][3][i]))));
|
||||
return madd(u_n[0],curve0_x,madd(u_n[1],curve1_x,madd(u_n[2],curve2_x,u_n[3] * curve3_x)));
|
||||
}
|
||||
|
||||
template<typename vbool, typename vfloat>
|
||||
void eval(const vbool& valid, const vfloat& uu, const vfloat& vv,
|
||||
float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv,
|
||||
const float dscale, const size_t dstride, const size_t N) const
|
||||
{
|
||||
if (P) {
|
||||
const Vec4<vfloat> u_n = BSplineBasis::eval(uu);
|
||||
const Vec4<vfloat> v_n = BSplineBasis::eval(vv);
|
||||
for (size_t i=0; i<N; i++) vfloat::store(valid,P+i*dstride,eval(i,uu,vv,u_n,v_n));
|
||||
}
|
||||
if (dPdu)
|
||||
{
|
||||
{
|
||||
assert(dPdu);
|
||||
const Vec4<vfloat> u_n = BSplineBasis::derivative(uu);
|
||||
const Vec4<vfloat> v_n = BSplineBasis::eval(vv);
|
||||
for (size_t i=0; i<N; i++) vfloat::store(valid,dPdu+i*dstride,eval(i,uu,vv,u_n,v_n)*dscale);
|
||||
}
|
||||
{
|
||||
assert(dPdv);
|
||||
const Vec4<vfloat> u_n = BSplineBasis::eval(uu);
|
||||
const Vec4<vfloat> v_n = BSplineBasis::derivative(vv);
|
||||
for (size_t i=0; i<N; i++) vfloat::store(valid,dPdv+i*dstride,eval(i,uu,vv,u_n,v_n)*dscale);
|
||||
}
|
||||
}
|
||||
if (ddPdudu)
|
||||
{
|
||||
{
|
||||
assert(ddPdudu);
|
||||
const Vec4<vfloat> u_n = BSplineBasis::derivative2(uu);
|
||||
const Vec4<vfloat> v_n = BSplineBasis::eval(vv);
|
||||
for (size_t i=0; i<N; i++) vfloat::store(valid,ddPdudu+i*dstride,eval(i,uu,vv,u_n,v_n)*sqr(dscale));
|
||||
}
|
||||
{
|
||||
assert(ddPdvdv);
|
||||
const Vec4<vfloat> u_n = BSplineBasis::eval(uu);
|
||||
const Vec4<vfloat> v_n = BSplineBasis::derivative2(vv);
|
||||
for (size_t i=0; i<N; i++) vfloat::store(valid,ddPdvdv+i*dstride,eval(i,uu,vv,u_n,v_n)*sqr(dscale));
|
||||
}
|
||||
{
|
||||
assert(ddPdudv);
|
||||
const Vec4<vfloat> u_n = BSplineBasis::derivative(uu);
|
||||
const Vec4<vfloat> v_n = BSplineBasis::derivative(vv);
|
||||
for (size_t i=0; i<N; i++) vfloat::store(valid,ddPdudv+i*dstride,eval(i,uu,vv,u_n,v_n)*sqr(dscale));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
friend __forceinline embree_ostream operator<<(embree_ostream o, const BSplinePatchT& p)
|
||||
{
|
||||
for (size_t y=0; y<4; y++)
|
||||
for (size_t x=0; x<4; x++)
|
||||
o << "[" << y << "][" << x << "] " << p.v[y][x] << embree_endl;
|
||||
return o;
|
||||
}
|
||||
|
||||
public:
|
||||
Vertex v[4][4];
|
||||
};
|
||||
|
||||
typedef BSplinePatchT<Vec3fa,Vec3fa_t> BSplinePatch3fa;
|
||||
}
|
||||
41
Framework/external/embree/kernels/subdiv/catmullclark_coefficients.cpp
vendored
Normal file
41
Framework/external/embree/kernels/subdiv/catmullclark_coefficients.cpp
vendored
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "catmullclark_coefficients.h"
|
||||
|
||||
|
||||
namespace embree
|
||||
{
|
||||
CatmullClarkPrecomputedCoefficients CatmullClarkPrecomputedCoefficients::table;
|
||||
|
||||
CatmullClarkPrecomputedCoefficients::CatmullClarkPrecomputedCoefficients()
|
||||
{
|
||||
/* precompute cosf(2.0f*M_PI/n) */
|
||||
for (size_t n=0; n<=MAX_RING_FACE_VALENCE; n++)
|
||||
table_cos_2PI_div_n[n] = set_cos_2PI_div_n(n);
|
||||
|
||||
/* precompute limit tangents coefficients */
|
||||
for (size_t n=0; n<=MAX_RING_FACE_VALENCE; n++)
|
||||
{
|
||||
table_limittangent_a[n] = new float[n];
|
||||
table_limittangent_b[n] = new float[n];
|
||||
|
||||
for (size_t i=0; i<n; i++) {
|
||||
table_limittangent_a[n][i] = set_limittangent_a(i,n);
|
||||
table_limittangent_b[n][i] = set_limittangent_b(i,n);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t n=0; n<=MAX_RING_FACE_VALENCE; n++)
|
||||
table_limittangent_c[n] = set_limittangent_c(n);
|
||||
}
|
||||
|
||||
CatmullClarkPrecomputedCoefficients::~CatmullClarkPrecomputedCoefficients()
|
||||
{
|
||||
for (size_t n=0; n<=MAX_RING_FACE_VALENCE; n++)
|
||||
{
|
||||
delete [] table_limittangent_a[n];
|
||||
delete [] table_limittangent_b[n];
|
||||
}
|
||||
}
|
||||
}
|
||||
85
Framework/external/embree/kernels/subdiv/catmullclark_coefficients.h
vendored
Normal file
85
Framework/external/embree/kernels/subdiv/catmullclark_coefficients.h
vendored
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../common/geometry.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
static const size_t MAX_PATCH_VALENCE = 16; //!< maximum number of vertices of a patch
|
||||
static const size_t MAX_RING_FACE_VALENCE = 64; //!< maximum number of faces per ring
|
||||
static const size_t MAX_RING_EDGE_VALENCE = 2*64; //!< maximum number of edges per ring
|
||||
|
||||
class CatmullClarkPrecomputedCoefficients
|
||||
{
|
||||
private:
|
||||
|
||||
float table_cos_2PI_div_n[MAX_RING_FACE_VALENCE+1];
|
||||
|
||||
float* table_limittangent_a[MAX_RING_FACE_VALENCE+1];
|
||||
float* table_limittangent_b[MAX_RING_FACE_VALENCE+1];
|
||||
float table_limittangent_c[MAX_RING_FACE_VALENCE+1];
|
||||
|
||||
__forceinline float set_cos_2PI_div_n(const size_t n) {
|
||||
if (unlikely(n == 0)) return 1.0f;
|
||||
return cosf(2.0f*float(pi)/(float)n);
|
||||
}
|
||||
|
||||
__forceinline float set_limittangent_a(const size_t i, const size_t n)
|
||||
{
|
||||
if (unlikely(n == 0)) return 1.0f;
|
||||
const float c0 = 1.0f/(float)n * 1.0f / sqrtf(4.0f + cosf(float(pi)/(float)n)*cosf(float(pi)/(float)n));
|
||||
const float c1 = (1.0f/(float)n + cosf(float(pi)/(float)n) * c0);
|
||||
return cosf(2.0f*float(pi)*(float)i/(float)n) * c1;
|
||||
}
|
||||
|
||||
__forceinline float set_limittangent_b(const size_t i, const size_t n)
|
||||
{
|
||||
if (unlikely(n == 0)) return 1.0f;
|
||||
const float c0 = 1.0f/(float)n * 1.0f / sqrtf(4.0f + cosf(float(pi)/(float)n)*cosf(float(pi)/(float)n));
|
||||
return cosf((2.0f*float(pi)*i+float(pi))/(float)n) * c0;
|
||||
}
|
||||
|
||||
__forceinline float set_limittangent_c(const size_t n)
|
||||
{
|
||||
if (unlikely(n == 0)) return 1.0f;
|
||||
return 2.0f/16.0f * (5.0f + cosf(2.0f*float(pi)/(float)n) + cosf(float(pi)/(float)n) * sqrtf(18.0f+2.0f*cosf(2.0f*float(pi)/(float)n)));
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
__forceinline float cos_2PI_div_n(const size_t n)
|
||||
{
|
||||
if (likely(n <= MAX_RING_FACE_VALENCE))
|
||||
return table_cos_2PI_div_n[n];
|
||||
else
|
||||
return set_cos_2PI_div_n(n);
|
||||
}
|
||||
|
||||
__forceinline float limittangent_a(const size_t i, const size_t n)
|
||||
{
|
||||
assert(n <= MAX_RING_FACE_VALENCE);
|
||||
assert(i < n);
|
||||
return table_limittangent_a[n][i];
|
||||
}
|
||||
|
||||
__forceinline float limittangent_b(const size_t i, const size_t n)
|
||||
{
|
||||
assert(n <= MAX_RING_FACE_VALENCE);
|
||||
assert(i < n);
|
||||
return table_limittangent_b[n][i];
|
||||
}
|
||||
|
||||
__forceinline float limittangent_c(const size_t n)
|
||||
{
|
||||
assert(n <= MAX_RING_FACE_VALENCE);
|
||||
return table_limittangent_c[n];
|
||||
}
|
||||
|
||||
static CatmullClarkPrecomputedCoefficients table;
|
||||
|
||||
CatmullClarkPrecomputedCoefficients();
|
||||
~CatmullClarkPrecomputedCoefficients();
|
||||
};
|
||||
}
|
||||
562
Framework/external/embree/kernels/subdiv/catmullclark_patch.h
vendored
Normal file
562
Framework/external/embree/kernels/subdiv/catmullclark_patch.h
vendored
Normal file
|
|
@ -0,0 +1,562 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "catmullclark_ring.h"
|
||||
#include "bezier_curve.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<typename Vertex, typename Vertex_t = Vertex>
|
||||
class __aligned(64) CatmullClarkPatchT
|
||||
{
|
||||
public:
|
||||
typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClark1Ring;
|
||||
typedef typename CatmullClark1Ring::Type Type;
|
||||
|
||||
array_t<CatmullClark1RingT<Vertex,Vertex_t>,4> ring;
|
||||
|
||||
public:
|
||||
__forceinline CatmullClarkPatchT () {}
|
||||
|
||||
__forceinline CatmullClarkPatchT (const HalfEdge* first_half_edge, const char* vertices, size_t stride) {
|
||||
init(first_half_edge,vertices,stride);
|
||||
}
|
||||
|
||||
__forceinline CatmullClarkPatchT (const HalfEdge* first_half_edge, const BufferView<Vec3fa>& vertices) {
|
||||
init(first_half_edge,vertices.getPtr(),vertices.getStride());
|
||||
}
|
||||
|
||||
__forceinline void init (const HalfEdge* first_half_edge, const char* vertices, size_t stride)
|
||||
{
|
||||
for (unsigned i=0; i<4; i++)
|
||||
ring[i].init(first_half_edge+i,vertices,stride);
|
||||
|
||||
assert(verify());
|
||||
}
|
||||
|
||||
__forceinline size_t bytes() const {
|
||||
return ring[0].bytes()+ring[1].bytes()+ring[2].bytes()+ring[3].bytes();
|
||||
}
|
||||
|
||||
__forceinline void serialize(void* ptr, size_t& ofs) const
|
||||
{
|
||||
for (size_t i=0; i<4; i++)
|
||||
ring[i].serialize((char*)ptr,ofs);
|
||||
}
|
||||
|
||||
__forceinline void deserialize(void* ptr)
|
||||
{
|
||||
size_t ofs = 0;
|
||||
for (size_t i=0; i<4; i++)
|
||||
ring[i].deserialize((char*)ptr,ofs);
|
||||
}
|
||||
|
||||
__forceinline BBox3fa bounds() const
|
||||
{
|
||||
BBox3fa bounds (ring[0].bounds());
|
||||
for (size_t i=1; i<4; i++)
|
||||
bounds.extend(ring[i].bounds());
|
||||
return bounds;
|
||||
}
|
||||
|
||||
__forceinline Type type() const
|
||||
{
|
||||
const int ty0 = ring[0].type() ^ CatmullClark1Ring::TYPE_CREASES;
|
||||
const int ty1 = ring[1].type() ^ CatmullClark1Ring::TYPE_CREASES;
|
||||
const int ty2 = ring[2].type() ^ CatmullClark1Ring::TYPE_CREASES;
|
||||
const int ty3 = ring[3].type() ^ CatmullClark1Ring::TYPE_CREASES;
|
||||
return (Type) ((ty0 & ty1 & ty2 & ty3) ^ CatmullClark1Ring::TYPE_CREASES);
|
||||
}
|
||||
|
||||
__forceinline bool isFinalResolution(float res) const {
|
||||
return ring[0].isFinalResolution(res) && ring[1].isFinalResolution(res) && ring[2].isFinalResolution(res) && ring[3].isFinalResolution(res);
|
||||
}
|
||||
|
||||
static __forceinline void init_regular(const CatmullClark1RingT<Vertex,Vertex_t>& p0,
|
||||
const CatmullClark1RingT<Vertex,Vertex_t>& p1,
|
||||
CatmullClark1RingT<Vertex,Vertex_t>& dest0,
|
||||
CatmullClark1RingT<Vertex,Vertex_t>& dest1)
|
||||
{
|
||||
assert(p1.face_valence > 2);
|
||||
dest1.vertex_level = dest0.vertex_level = p0.edge_level;
|
||||
dest1.face_valence = dest0.face_valence = 4;
|
||||
dest1.edge_valence = dest0.edge_valence = 8;
|
||||
dest1.border_index = dest0.border_index = -1;
|
||||
dest1.vtx = dest0.vtx = (Vertex_t)p0.ring[0];
|
||||
dest1.vertex_crease_weight = dest0.vertex_crease_weight = 0.0f;
|
||||
|
||||
dest1.ring[2] = dest0.ring[0] = (Vertex_t)p0.ring[1];
|
||||
dest1.ring[1] = dest0.ring[7] = (Vertex_t)p1.ring[0];
|
||||
dest1.ring[0] = dest0.ring[6] = (Vertex_t)p1.vtx;
|
||||
dest1.ring[7] = dest0.ring[5] = (Vertex_t)p1.ring[4];
|
||||
dest1.ring[6] = dest0.ring[4] = (Vertex_t)p0.ring[p0.edge_valence-1];
|
||||
dest1.ring[5] = dest0.ring[3] = (Vertex_t)p0.ring[p0.edge_valence-2];
|
||||
dest1.ring[4] = dest0.ring[2] = (Vertex_t)p0.vtx;
|
||||
dest1.ring[3] = dest0.ring[1] = (Vertex_t)p0.ring[2];
|
||||
|
||||
dest1.crease_weight[1] = dest0.crease_weight[0] = 0.0f;
|
||||
dest1.crease_weight[0] = dest0.crease_weight[3] = p1.crease_weight[1];
|
||||
dest1.crease_weight[3] = dest0.crease_weight[2] = 0.0f;
|
||||
dest1.crease_weight[2] = dest0.crease_weight[1] = p0.crease_weight[0];
|
||||
|
||||
if (p0.eval_unique_identifier <= p1.eval_unique_identifier)
|
||||
{
|
||||
dest0.eval_start_index = 3;
|
||||
dest1.eval_start_index = 0;
|
||||
dest0.eval_unique_identifier = p0.eval_unique_identifier;
|
||||
dest1.eval_unique_identifier = p0.eval_unique_identifier;
|
||||
}
|
||||
else
|
||||
{
|
||||
dest0.eval_start_index = 1;
|
||||
dest1.eval_start_index = 2;
|
||||
dest0.eval_unique_identifier = p1.eval_unique_identifier;
|
||||
dest1.eval_unique_identifier = p1.eval_unique_identifier;
|
||||
}
|
||||
}
|
||||
|
||||
static __forceinline void init_border(const CatmullClark1RingT<Vertex,Vertex_t> &p0,
|
||||
const CatmullClark1RingT<Vertex,Vertex_t> &p1,
|
||||
CatmullClark1RingT<Vertex,Vertex_t> &dest0,
|
||||
CatmullClark1RingT<Vertex,Vertex_t> &dest1)
|
||||
{
|
||||
dest1.vertex_level = dest0.vertex_level = p0.edge_level;
|
||||
dest1.face_valence = dest0.face_valence = 3;
|
||||
dest1.edge_valence = dest0.edge_valence = 6;
|
||||
dest0.border_index = 2;
|
||||
dest1.border_index = 4;
|
||||
dest1.vtx = dest0.vtx = (Vertex_t)p0.ring[0];
|
||||
dest1.vertex_crease_weight = dest0.vertex_crease_weight = 0.0f;
|
||||
|
||||
dest1.ring[2] = dest0.ring[0] = (Vertex_t)p0.ring[1];
|
||||
dest1.ring[1] = dest0.ring[5] = (Vertex_t)p1.ring[0];
|
||||
dest1.ring[0] = dest0.ring[4] = (Vertex_t)p1.vtx;
|
||||
dest1.ring[5] = dest0.ring[3] = (Vertex_t)p0.ring[p0.border_index+1]; // dummy
|
||||
dest1.ring[4] = dest0.ring[2] = (Vertex_t)p0.vtx;
|
||||
dest1.ring[3] = dest0.ring[1] = (Vertex_t)p0.ring[2];
|
||||
|
||||
dest1.crease_weight[1] = dest0.crease_weight[0] = 0.0f;
|
||||
dest1.crease_weight[0] = dest0.crease_weight[2] = p1.crease_weight[1];
|
||||
dest1.crease_weight[2] = dest0.crease_weight[1] = p0.crease_weight[0];
|
||||
|
||||
if (p0.eval_unique_identifier <= p1.eval_unique_identifier)
|
||||
{
|
||||
dest0.eval_start_index = 1;
|
||||
dest1.eval_start_index = 2;
|
||||
dest0.eval_unique_identifier = p0.eval_unique_identifier;
|
||||
dest1.eval_unique_identifier = p0.eval_unique_identifier;
|
||||
}
|
||||
else
|
||||
{
|
||||
dest0.eval_start_index = 2;
|
||||
dest1.eval_start_index = 0;
|
||||
dest0.eval_unique_identifier = p1.eval_unique_identifier;
|
||||
dest1.eval_unique_identifier = p1.eval_unique_identifier;
|
||||
}
|
||||
}
|
||||
|
||||
static __forceinline void init_regular(const Vertex_t ¢er, const Vertex_t center_ring[8], const unsigned int offset, CatmullClark1RingT<Vertex,Vertex_t> &dest)
|
||||
{
|
||||
dest.vertex_level = 0.0f;
|
||||
dest.face_valence = 4;
|
||||
dest.edge_valence = 8;
|
||||
dest.border_index = -1;
|
||||
dest.vtx = (Vertex_t)center;
|
||||
dest.vertex_crease_weight = 0.0f;
|
||||
for (size_t i=0; i<8; i++)
|
||||
dest.ring[i] = (Vertex_t)center_ring[(offset+i)%8];
|
||||
for (size_t i=0; i<4; i++)
|
||||
dest.crease_weight[i] = 0.0f;
|
||||
|
||||
dest.eval_start_index = (8-offset)>>1;
|
||||
if (dest.eval_start_index >= dest.face_valence) dest.eval_start_index -= dest.face_valence;
|
||||
assert( dest.eval_start_index < dest.face_valence );
|
||||
dest.eval_unique_identifier = 0;
|
||||
}
|
||||
|
||||
__noinline void subdivide(array_t<CatmullClarkPatchT,4>& patch) const
|
||||
{
|
||||
ring[0].subdivide(patch[0].ring[0]);
|
||||
ring[1].subdivide(patch[1].ring[1]);
|
||||
ring[2].subdivide(patch[2].ring[2]);
|
||||
ring[3].subdivide(patch[3].ring[3]);
|
||||
|
||||
patch[0].ring[0].edge_level = 0.5f*ring[0].edge_level;
|
||||
patch[0].ring[1].edge_level = 0.25f*(ring[1].edge_level+ring[3].edge_level);
|
||||
patch[0].ring[2].edge_level = 0.25f*(ring[0].edge_level+ring[2].edge_level);
|
||||
patch[0].ring[3].edge_level = 0.5f*ring[3].edge_level;
|
||||
|
||||
patch[1].ring[0].edge_level = 0.5f*ring[0].edge_level;
|
||||
patch[1].ring[1].edge_level = 0.5f*ring[1].edge_level;
|
||||
patch[1].ring[2].edge_level = 0.25f*(ring[0].edge_level+ring[2].edge_level);
|
||||
patch[1].ring[3].edge_level = 0.25f*(ring[1].edge_level+ring[3].edge_level);
|
||||
|
||||
patch[2].ring[0].edge_level = 0.25f*(ring[0].edge_level+ring[2].edge_level);
|
||||
patch[2].ring[1].edge_level = 0.5f*ring[1].edge_level;
|
||||
patch[2].ring[2].edge_level = 0.5f*ring[2].edge_level;
|
||||
patch[2].ring[3].edge_level = 0.25f*(ring[1].edge_level+ring[3].edge_level);
|
||||
|
||||
patch[3].ring[0].edge_level = 0.25f*(ring[0].edge_level+ring[2].edge_level);
|
||||
patch[3].ring[1].edge_level = 0.25f*(ring[1].edge_level+ring[3].edge_level);
|
||||
patch[3].ring[2].edge_level = 0.5f*ring[2].edge_level;
|
||||
patch[3].ring[3].edge_level = 0.5f*ring[3].edge_level;
|
||||
|
||||
const bool regular0 = ring[0].has_last_face() && ring[1].face_valence > 2;
|
||||
if (likely(regular0))
|
||||
init_regular(patch[0].ring[0],patch[1].ring[1],patch[0].ring[1],patch[1].ring[0]);
|
||||
else
|
||||
init_border(patch[0].ring[0],patch[1].ring[1],patch[0].ring[1],patch[1].ring[0]);
|
||||
|
||||
const bool regular1 = ring[1].has_last_face() && ring[2].face_valence > 2;
|
||||
if (likely(regular1))
|
||||
init_regular(patch[1].ring[1],patch[2].ring[2],patch[1].ring[2],patch[2].ring[1]);
|
||||
else
|
||||
init_border(patch[1].ring[1],patch[2].ring[2],patch[1].ring[2],patch[2].ring[1]);
|
||||
|
||||
const bool regular2 = ring[2].has_last_face() && ring[3].face_valence > 2;
|
||||
if (likely(regular2))
|
||||
init_regular(patch[2].ring[2],patch[3].ring[3],patch[2].ring[3],patch[3].ring[2]);
|
||||
else
|
||||
init_border(patch[2].ring[2],patch[3].ring[3],patch[2].ring[3],patch[3].ring[2]);
|
||||
|
||||
const bool regular3 = ring[3].has_last_face() && ring[0].face_valence > 2;
|
||||
if (likely(regular3))
|
||||
init_regular(patch[3].ring[3],patch[0].ring[0],patch[3].ring[0],patch[0].ring[3]);
|
||||
else
|
||||
init_border(patch[3].ring[3],patch[0].ring[0],patch[3].ring[0],patch[0].ring[3]);
|
||||
|
||||
Vertex_t center = (ring[0].vtx + ring[1].vtx + ring[2].vtx + ring[3].vtx) * 0.25f;
|
||||
|
||||
Vertex_t center_ring[8];
|
||||
center_ring[0] = (Vertex_t)patch[3].ring[3].ring[0];
|
||||
center_ring[7] = (Vertex_t)patch[3].ring[3].vtx;
|
||||
center_ring[6] = (Vertex_t)patch[2].ring[2].ring[0];
|
||||
center_ring[5] = (Vertex_t)patch[2].ring[2].vtx;
|
||||
center_ring[4] = (Vertex_t)patch[1].ring[1].ring[0];
|
||||
center_ring[3] = (Vertex_t)patch[1].ring[1].vtx;
|
||||
center_ring[2] = (Vertex_t)patch[0].ring[0].ring[0];
|
||||
center_ring[1] = (Vertex_t)patch[0].ring[0].vtx;
|
||||
|
||||
init_regular(center,center_ring,0,patch[0].ring[2]);
|
||||
init_regular(center,center_ring,2,patch[1].ring[3]);
|
||||
init_regular(center,center_ring,4,patch[2].ring[0]);
|
||||
init_regular(center,center_ring,6,patch[3].ring[1]);
|
||||
|
||||
assert(patch[0].verify());
|
||||
assert(patch[1].verify());
|
||||
assert(patch[2].verify());
|
||||
assert(patch[3].verify());
|
||||
}
|
||||
|
||||
bool verify() const {
|
||||
return ring[0].hasValidPositions() && ring[1].hasValidPositions() && ring[2].hasValidPositions() && ring[3].hasValidPositions();
|
||||
}
|
||||
|
||||
__forceinline void init( FinalQuad& quad ) const
|
||||
{
|
||||
quad.vtx[0] = (Vertex_t)ring[0].vtx;
|
||||
quad.vtx[1] = (Vertex_t)ring[1].vtx;
|
||||
quad.vtx[2] = (Vertex_t)ring[2].vtx;
|
||||
quad.vtx[3] = (Vertex_t)ring[3].vtx;
|
||||
};
|
||||
|
||||
friend __forceinline embree_ostream operator<<(embree_ostream o, const CatmullClarkPatchT &p)
|
||||
{
|
||||
o << "CatmullClarkPatch { " << embree_endl;
|
||||
for (size_t i=0; i<4; i++)
|
||||
o << "ring" << i << ": " << p.ring[i] << embree_endl;
|
||||
o << "}" << embree_endl;
|
||||
return o;
|
||||
}
|
||||
};
|
||||
|
||||
typedef CatmullClarkPatchT<Vec3fa,Vec3fa_t> CatmullClarkPatch3fa;
|
||||
|
||||
template<typename Vertex, typename Vertex_t = Vertex>
|
||||
class __aligned(64) GeneralCatmullClarkPatchT
|
||||
{
|
||||
public:
|
||||
typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch;
|
||||
typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClark1Ring;
|
||||
typedef BezierCurveT<Vertex> BezierCurve;
|
||||
|
||||
static const unsigned SIZE = MAX_PATCH_VALENCE;
|
||||
DynamicStackArray<GeneralCatmullClark1RingT<Vertex,Vertex_t>,8,SIZE> ring;
|
||||
unsigned N;
|
||||
|
||||
__forceinline GeneralCatmullClarkPatchT ()
|
||||
: N(0) {}
|
||||
|
||||
GeneralCatmullClarkPatchT (const HalfEdge* h, const char* vertices, size_t stride) {
|
||||
init(h,vertices,stride);
|
||||
}
|
||||
|
||||
__forceinline GeneralCatmullClarkPatchT (const HalfEdge* first_half_edge, const BufferView<Vec3fa>& vertices) {
|
||||
init(first_half_edge,vertices.getPtr(),vertices.getStride());
|
||||
}
|
||||
|
||||
__forceinline void init (const HalfEdge* h, const char* vertices, size_t stride)
|
||||
{
|
||||
unsigned int i = 0;
|
||||
const HalfEdge* edge = h;
|
||||
do {
|
||||
ring[i].init(edge,vertices,stride);
|
||||
edge = edge->next();
|
||||
i++;
|
||||
} while ((edge != h) && (i < SIZE));
|
||||
N = i;
|
||||
}
|
||||
|
||||
__forceinline unsigned size() const {
|
||||
return N;
|
||||
}
|
||||
|
||||
__forceinline bool isQuadPatch() const {
|
||||
return (N == 4) && ring[0].only_quads && ring[1].only_quads && ring[2].only_quads && ring[3].only_quads;
|
||||
}
|
||||
|
||||
static __forceinline void init_regular(const CatmullClark1RingT<Vertex,Vertex_t>& p0,
|
||||
const CatmullClark1RingT<Vertex,Vertex_t>& p1,
|
||||
CatmullClark1RingT<Vertex,Vertex_t>& dest0,
|
||||
CatmullClark1RingT<Vertex,Vertex_t>& dest1)
|
||||
{
|
||||
assert(p1.face_valence > 2);
|
||||
dest1.vertex_level = dest0.vertex_level = p0.edge_level;
|
||||
dest1.face_valence = dest0.face_valence = 4;
|
||||
dest1.edge_valence = dest0.edge_valence = 8;
|
||||
dest1.border_index = dest0.border_index = -1;
|
||||
dest1.vtx = dest0.vtx = (Vertex_t)p0.ring[0];
|
||||
dest1.vertex_crease_weight = dest0.vertex_crease_weight = 0.0f;
|
||||
|
||||
dest1.ring[2] = dest0.ring[0] = (Vertex_t)p0.ring[1];
|
||||
dest1.ring[1] = dest0.ring[7] = (Vertex_t)p1.ring[0];
|
||||
dest1.ring[0] = dest0.ring[6] = (Vertex_t)p1.vtx;
|
||||
dest1.ring[7] = dest0.ring[5] = (Vertex_t)p1.ring[4];
|
||||
dest1.ring[6] = dest0.ring[4] = (Vertex_t)p0.ring[p0.edge_valence-1];
|
||||
dest1.ring[5] = dest0.ring[3] = (Vertex_t)p0.ring[p0.edge_valence-2];
|
||||
dest1.ring[4] = dest0.ring[2] = (Vertex_t)p0.vtx;
|
||||
dest1.ring[3] = dest0.ring[1] = (Vertex_t)p0.ring[2];
|
||||
|
||||
dest1.crease_weight[1] = dest0.crease_weight[0] = 0.0f;
|
||||
dest1.crease_weight[0] = dest0.crease_weight[3] = p1.crease_weight[1];
|
||||
dest1.crease_weight[3] = dest0.crease_weight[2] = 0.0f;
|
||||
dest1.crease_weight[2] = dest0.crease_weight[1] = p0.crease_weight[0];
|
||||
|
||||
if (p0.eval_unique_identifier <= p1.eval_unique_identifier)
|
||||
{
|
||||
dest0.eval_start_index = 3;
|
||||
dest1.eval_start_index = 0;
|
||||
dest0.eval_unique_identifier = p0.eval_unique_identifier;
|
||||
dest1.eval_unique_identifier = p0.eval_unique_identifier;
|
||||
}
|
||||
else
|
||||
{
|
||||
dest0.eval_start_index = 1;
|
||||
dest1.eval_start_index = 2;
|
||||
dest0.eval_unique_identifier = p1.eval_unique_identifier;
|
||||
dest1.eval_unique_identifier = p1.eval_unique_identifier;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static __forceinline void init_border(const CatmullClark1RingT<Vertex,Vertex_t> &p0,
|
||||
const CatmullClark1RingT<Vertex,Vertex_t> &p1,
|
||||
CatmullClark1RingT<Vertex,Vertex_t> &dest0,
|
||||
CatmullClark1RingT<Vertex,Vertex_t> &dest1)
|
||||
{
|
||||
dest1.vertex_level = dest0.vertex_level = p0.edge_level;
|
||||
dest1.face_valence = dest0.face_valence = 3;
|
||||
dest1.edge_valence = dest0.edge_valence = 6;
|
||||
dest0.border_index = 2;
|
||||
dest1.border_index = 4;
|
||||
dest1.vtx = dest0.vtx = (Vertex_t)p0.ring[0];
|
||||
dest1.vertex_crease_weight = dest0.vertex_crease_weight = 0.0f;
|
||||
|
||||
dest1.ring[2] = dest0.ring[0] = (Vertex_t)p0.ring[1];
|
||||
dest1.ring[1] = dest0.ring[5] = (Vertex_t)p1.ring[0];
|
||||
dest1.ring[0] = dest0.ring[4] = (Vertex_t)p1.vtx;
|
||||
dest1.ring[5] = dest0.ring[3] = (Vertex_t)p0.ring[p0.border_index+1]; // dummy
|
||||
dest1.ring[4] = dest0.ring[2] = (Vertex_t)p0.vtx;
|
||||
dest1.ring[3] = dest0.ring[1] = (Vertex_t)p0.ring[2];
|
||||
|
||||
dest1.crease_weight[1] = dest0.crease_weight[0] = 0.0f;
|
||||
dest1.crease_weight[0] = dest0.crease_weight[2] = p1.crease_weight[1];
|
||||
dest1.crease_weight[2] = dest0.crease_weight[1] = p0.crease_weight[0];
|
||||
|
||||
if (p0.eval_unique_identifier <= p1.eval_unique_identifier)
|
||||
{
|
||||
dest0.eval_start_index = 1;
|
||||
dest1.eval_start_index = 2;
|
||||
dest0.eval_unique_identifier = p0.eval_unique_identifier;
|
||||
dest1.eval_unique_identifier = p0.eval_unique_identifier;
|
||||
}
|
||||
else
|
||||
{
|
||||
dest0.eval_start_index = 2;
|
||||
dest1.eval_start_index = 0;
|
||||
dest0.eval_unique_identifier = p1.eval_unique_identifier;
|
||||
dest1.eval_unique_identifier = p1.eval_unique_identifier;
|
||||
}
|
||||
}
|
||||
|
||||
static __forceinline void init_regular(const Vertex_t ¢er, const array_t<Vertex_t,2*SIZE>& center_ring, const float vertex_level, const unsigned int N, const unsigned int offset, CatmullClark1RingT<Vertex,Vertex_t> &dest)
|
||||
{
|
||||
assert(N<(MAX_RING_FACE_VALENCE));
|
||||
assert(2*N<(MAX_RING_EDGE_VALENCE));
|
||||
dest.vertex_level = vertex_level;
|
||||
dest.face_valence = N;
|
||||
dest.edge_valence = 2*N;
|
||||
dest.border_index = -1;
|
||||
dest.vtx = (Vertex_t)center;
|
||||
dest.vertex_crease_weight = 0.0f;
|
||||
for (unsigned i=0; i<2*N; i++) {
|
||||
dest.ring[i] = (Vertex_t)center_ring[(2*N+offset+i-1)%(2*N)];
|
||||
assert(isvalid(dest.ring[i]));
|
||||
}
|
||||
for (unsigned i=0; i<N; i++)
|
||||
dest.crease_weight[i] = 0.0f;
|
||||
|
||||
assert(offset <= 2*N);
|
||||
dest.eval_start_index = (2*N-offset)>>1;
|
||||
if (dest.eval_start_index >= dest.face_valence) dest.eval_start_index -= dest.face_valence;
|
||||
|
||||
assert( dest.eval_start_index < dest.face_valence );
|
||||
dest.eval_unique_identifier = 0;
|
||||
}
|
||||
|
||||
__noinline void subdivide(array_t<CatmullClarkPatch,SIZE>& patch, unsigned& N_o) const
|
||||
{
|
||||
N_o = N;
|
||||
assert( N );
|
||||
for (unsigned i=0; i<N; i++) {
|
||||
unsigned ip1 = (i+1)%N; // FIXME: %
|
||||
ring[i].subdivide(patch[i].ring[0]);
|
||||
patch[i] .ring[0].edge_level = 0.5f*ring[i].edge_level;
|
||||
patch[ip1].ring[3].edge_level = 0.5f*ring[i].edge_level;
|
||||
|
||||
assert( patch[i].ring[0].hasValidPositions() );
|
||||
|
||||
}
|
||||
assert(N < 2*SIZE);
|
||||
Vertex_t center = Vertex_t(0.0f);
|
||||
array_t<Vertex_t,2*SIZE> center_ring;
|
||||
float center_vertex_level = 2.0f; // guarantees that irregular vertices get always isolated also for non-quads
|
||||
|
||||
for (unsigned i=0; i<N; i++)
|
||||
{
|
||||
unsigned ip1 = (i+1)%N; // FIXME: %
|
||||
unsigned im1 = (i+N-1)%N; // FIXME: %
|
||||
bool regular = ring[i].has_last_face() && ring[ip1].face_valence > 2;
|
||||
if (likely(regular)) init_regular(patch[i].ring[0],patch[ip1].ring[0],patch[i].ring[1],patch[ip1].ring[3]);
|
||||
else init_border (patch[i].ring[0],patch[ip1].ring[0],patch[i].ring[1],patch[ip1].ring[3]);
|
||||
|
||||
assert( patch[i].ring[1].hasValidPositions() );
|
||||
assert( patch[ip1].ring[3].hasValidPositions() );
|
||||
|
||||
float level = 0.25f*(ring[im1].edge_level+ring[ip1].edge_level);
|
||||
patch[i].ring[1].edge_level = patch[ip1].ring[2].edge_level = level;
|
||||
center_vertex_level = max(center_vertex_level,level);
|
||||
|
||||
center += ring[i].vtx;
|
||||
center_ring[2*i+0] = (Vertex_t)patch[i].ring[0].vtx;
|
||||
center_ring[2*i+1] = (Vertex_t)patch[i].ring[0].ring[0];
|
||||
}
|
||||
center /= float(N);
|
||||
|
||||
for (unsigned int i=0; i<N; i++) {
|
||||
init_regular(center,center_ring,center_vertex_level,N,2*i,patch[i].ring[2]);
|
||||
|
||||
assert( patch[i].ring[2].hasValidPositions() );
|
||||
}
|
||||
}
|
||||
|
||||
void init(CatmullClarkPatch& patch) const
|
||||
{
|
||||
assert(size() == 4);
|
||||
ring[0].convert(patch.ring[0]);
|
||||
ring[1].convert(patch.ring[1]);
|
||||
ring[2].convert(patch.ring[2]);
|
||||
ring[3].convert(patch.ring[3]);
|
||||
}
|
||||
|
||||
static void fix_quad_ring_order (array_t<CatmullClarkPatch,GeneralCatmullClarkPatchT::SIZE>& patches)
|
||||
{
|
||||
CatmullClark1Ring patches1ring1 = patches[1].ring[1];
|
||||
patches[1].ring[1] = patches[1].ring[0]; // FIXME: optimize these assignments
|
||||
patches[1].ring[0] = patches[1].ring[3];
|
||||
patches[1].ring[3] = patches[1].ring[2];
|
||||
patches[1].ring[2] = patches1ring1;
|
||||
|
||||
CatmullClark1Ring patches2ring2 = patches[2].ring[2];
|
||||
patches[2].ring[2] = patches[2].ring[0];
|
||||
patches[2].ring[0] = patches2ring2;
|
||||
CatmullClark1Ring patches2ring3 = patches[2].ring[3];
|
||||
patches[2].ring[3] = patches[2].ring[1];
|
||||
patches[2].ring[1] = patches2ring3;
|
||||
|
||||
CatmullClark1Ring patches3ring3 = patches[3].ring[3];
|
||||
patches[3].ring[3] = patches[3].ring[0];
|
||||
patches[3].ring[0] = patches[3].ring[1];
|
||||
patches[3].ring[1] = patches[3].ring[2];
|
||||
patches[3].ring[2] = patches3ring3;
|
||||
}
|
||||
|
||||
__forceinline void getLimitBorder(BezierCurve curves[GeneralCatmullClarkPatchT::SIZE]) const
|
||||
{
|
||||
Vertex P0 = ring[0].getLimitVertex();
|
||||
for (unsigned i=0; i<N; i++)
|
||||
{
|
||||
const unsigned i0 = i, i1 = i+1==N ? 0 : i+1;
|
||||
const Vertex P1 = madd(1.0f/3.0f,ring[i0].getLimitTangent(),P0);
|
||||
const Vertex P3 = ring[i1].getLimitVertex();
|
||||
const Vertex P2 = madd(1.0f/3.0f,ring[i1].getSecondLimitTangent(),P3);
|
||||
new (&curves[i]) BezierCurve(P0,P1,P2,P3);
|
||||
P0 = P3;
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline void getLimitBorder(BezierCurve curves[2], const unsigned subPatch) const
|
||||
{
|
||||
const unsigned i0 = subPatch;
|
||||
const Vertex t0_p = ring[i0].getLimitTangent();
|
||||
const Vertex t0_m = ring[i0].getSecondLimitTangent();
|
||||
|
||||
const unsigned i1 = subPatch+1 == N ? 0 : subPatch+1;
|
||||
const Vertex t1_p = ring[i1].getLimitTangent();
|
||||
const Vertex t1_m = ring[i1].getSecondLimitTangent();
|
||||
|
||||
const unsigned i2 = subPatch == 0 ? N-1 : subPatch-1;
|
||||
const Vertex t2_p = ring[i2].getLimitTangent();
|
||||
const Vertex t2_m = ring[i2].getSecondLimitTangent();
|
||||
|
||||
const Vertex b00 = ring[i0].getLimitVertex();
|
||||
const Vertex b03 = ring[i1].getLimitVertex();
|
||||
const Vertex b33 = ring[i2].getLimitVertex();
|
||||
|
||||
const Vertex b01 = madd(1.0/3.0f,t0_p,b00);
|
||||
const Vertex b11 = madd(1.0/3.0f,t0_m,b00);
|
||||
|
||||
//const Vertex b13 = madd(1.0/3.0f,t1_p,b03);
|
||||
const Vertex b02 = madd(1.0/3.0f,t1_m,b03);
|
||||
|
||||
const Vertex b22 = madd(1.0/3.0f,t2_p,b33);
|
||||
const Vertex b23 = madd(1.0/3.0f,t2_m,b33);
|
||||
|
||||
new (&curves[0]) BezierCurve(b00,b01,b02,b03);
|
||||
new (&curves[1]) BezierCurve(b33,b22,b11,b00);
|
||||
}
|
||||
|
||||
friend __forceinline embree_ostream operator<<(embree_ostream o, const GeneralCatmullClarkPatchT &p)
|
||||
{
|
||||
o << "GeneralCatmullClarkPatch { " << embree_endl;
|
||||
for (unsigned i=0; i<p.N; i++)
|
||||
o << "ring" << i << ": " << p.ring[i] << embree_endl;
|
||||
o << "}" << embree_endl;
|
||||
return o;
|
||||
}
|
||||
};
|
||||
|
||||
typedef GeneralCatmullClarkPatchT<Vec3fa,Vec3fa_t> GeneralCatmullClarkPatch3fa;
|
||||
}
|
||||
826
Framework/external/embree/kernels/subdiv/catmullclark_ring.h
vendored
Normal file
826
Framework/external/embree/kernels/subdiv/catmullclark_ring.h
vendored
Normal file
|
|
@ -0,0 +1,826 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../common/geometry.h"
|
||||
#include "../common/buffer.h"
|
||||
#include "half_edge.h"
|
||||
#include "catmullclark_coefficients.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct __aligned(64) FinalQuad {
|
||||
Vec3fa vtx[4];
|
||||
};
|
||||
|
||||
template<typename Vertex, typename Vertex_t = Vertex>
|
||||
struct __aligned(64) CatmullClark1RingT
|
||||
{
|
||||
ALIGNED_STRUCT_(64);
|
||||
|
||||
int border_index; //!< edge index where border starts
|
||||
unsigned int face_valence; //!< number of adjacent quad faces
|
||||
unsigned int edge_valence; //!< number of adjacent edges (2*face_valence)
|
||||
float vertex_crease_weight; //!< weight of vertex crease (0 if no vertex crease)
|
||||
DynamicStackArray<float,16,MAX_RING_FACE_VALENCE> crease_weight; //!< edge crease weights for each adjacent edge
|
||||
float vertex_level; //!< maximum level of all adjacent edges
|
||||
float edge_level; //!< level of first edge
|
||||
unsigned int eval_start_index; //!< topology dependent index to start evaluation
|
||||
unsigned int eval_unique_identifier; //!< topology dependent unique identifier for this ring
|
||||
Vertex vtx; //!< center vertex
|
||||
DynamicStackArray<Vertex,32,MAX_RING_EDGE_VALENCE> ring; //!< ring of neighboring vertices
|
||||
|
||||
public:
|
||||
CatmullClark1RingT ()
|
||||
: eval_start_index(0), eval_unique_identifier(0) {} // FIXME: default constructor should be empty
|
||||
|
||||
/*! calculates number of bytes required to serialize this structure */
|
||||
__forceinline size_t bytes() const
|
||||
{
|
||||
size_t ofs = 0;
|
||||
ofs += sizeof(border_index);
|
||||
ofs += sizeof(face_valence);
|
||||
assert(2*face_valence == edge_valence);
|
||||
ofs += sizeof(vertex_crease_weight);
|
||||
ofs += face_valence*sizeof(float);
|
||||
ofs += sizeof(vertex_level);
|
||||
ofs += sizeof(edge_level);
|
||||
ofs += sizeof(eval_start_index);
|
||||
ofs += sizeof(eval_unique_identifier);
|
||||
ofs += sizeof(vtx);
|
||||
ofs += edge_valence*sizeof(Vertex);
|
||||
return ofs;
|
||||
}
|
||||
|
||||
template<typename Ty>
|
||||
static __forceinline void store(char* ptr, size_t& ofs, const Ty& v) {
|
||||
*(Ty*)&ptr[ofs] = v; ofs += sizeof(Ty);
|
||||
}
|
||||
|
||||
template<typename Ty>
|
||||
static __forceinline void load(char* ptr, size_t& ofs, Ty& v) {
|
||||
v = *(Ty*)&ptr[ofs]; ofs += sizeof(Ty);
|
||||
}
|
||||
|
||||
/*! serializes the ring to some memory location */
|
||||
__forceinline void serialize(char* ptr, size_t& ofs) const
|
||||
{
|
||||
store(ptr,ofs,border_index);
|
||||
store(ptr,ofs,face_valence);
|
||||
store(ptr,ofs,vertex_crease_weight);
|
||||
for (size_t i=0; i<face_valence; i++)
|
||||
store(ptr,ofs,crease_weight[i]);
|
||||
store(ptr,ofs,vertex_level);
|
||||
store(ptr,ofs,edge_level);
|
||||
store(ptr,ofs,eval_start_index);
|
||||
store(ptr,ofs,eval_unique_identifier);
|
||||
Vertex_t::storeu(&ptr[ofs],vtx); ofs += sizeof(Vertex);
|
||||
for (size_t i=0; i<edge_valence; i++) {
|
||||
Vertex_t::storeu(&ptr[ofs],ring[i]); ofs += sizeof(Vertex);
|
||||
}
|
||||
}
|
||||
|
||||
/*! deserializes the ring from some memory location */
|
||||
__forceinline void deserialize(char* ptr, size_t& ofs)
|
||||
{
|
||||
load(ptr,ofs,border_index);
|
||||
load(ptr,ofs,face_valence);
|
||||
edge_valence = 2*face_valence;
|
||||
load(ptr,ofs,vertex_crease_weight);
|
||||
for (size_t i=0; i<face_valence; i++)
|
||||
load(ptr,ofs,crease_weight[i]);
|
||||
load(ptr,ofs,vertex_level);
|
||||
load(ptr,ofs,edge_level);
|
||||
load(ptr,ofs,eval_start_index);
|
||||
load(ptr,ofs,eval_unique_identifier);
|
||||
vtx = Vertex_t::loadu(&ptr[ofs]); ofs += sizeof(Vertex);
|
||||
for (size_t i=0; i<edge_valence; i++) {
|
||||
ring[i] = Vertex_t::loadu(&ptr[ofs]); ofs += sizeof(Vertex);
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline bool hasBorder() const {
|
||||
return border_index != -1;
|
||||
}
|
||||
|
||||
__forceinline const Vertex& front(size_t i) const {
|
||||
assert(edge_valence>i);
|
||||
return ring[i];
|
||||
}
|
||||
|
||||
__forceinline const Vertex& back(size_t i) const {
|
||||
assert(edge_valence>=i);
|
||||
return ring[edge_valence-i];
|
||||
}
|
||||
|
||||
__forceinline bool has_last_face() const {
|
||||
return (size_t)border_index != (size_t)edge_valence-2;
|
||||
}
|
||||
|
||||
__forceinline bool has_opposite_front(size_t i) const {
|
||||
return (size_t)border_index != 2*i;
|
||||
}
|
||||
|
||||
__forceinline bool has_opposite_back(size_t i) const {
|
||||
return (size_t)border_index != ((size_t)edge_valence-2-2*i);
|
||||
}
|
||||
|
||||
__forceinline BBox3fa bounds() const
|
||||
{
|
||||
BBox3fa bounds ( vtx );
|
||||
for (size_t i = 0; i<edge_valence ; i++)
|
||||
bounds.extend( ring[i] );
|
||||
return bounds;
|
||||
}
|
||||
|
||||
/*! initializes the ring from the half edge structure */
|
||||
__forceinline void init(const HalfEdge* const h, const char* vertices, size_t stride)
|
||||
{
|
||||
border_index = -1;
|
||||
vtx = Vertex_t::loadu(vertices+h->getStartVertexIndex()*stride);
|
||||
vertex_crease_weight = h->vertex_crease_weight;
|
||||
|
||||
HalfEdge* p = (HalfEdge*) h;
|
||||
|
||||
unsigned i=0;
|
||||
unsigned min_vertex_index = (unsigned)-1;
|
||||
unsigned min_vertex_index_face = (unsigned)-1;
|
||||
edge_level = p->edge_level;
|
||||
vertex_level = 0.0f;
|
||||
|
||||
do
|
||||
{
|
||||
vertex_level = max(vertex_level,p->edge_level);
|
||||
crease_weight[i/2] = p->edge_crease_weight;
|
||||
assert(p->hasOpposite() || p->edge_crease_weight == float(inf));
|
||||
|
||||
/* store first two vertices of face */
|
||||
p = p->next();
|
||||
const unsigned index0 = p->getStartVertexIndex();
|
||||
ring[i++] = Vertex_t::loadu(vertices+index0*stride);
|
||||
if (index0 < min_vertex_index) { min_vertex_index = index0; min_vertex_index_face = i>>1; }
|
||||
p = p->next();
|
||||
|
||||
const unsigned index1 = p->getStartVertexIndex();
|
||||
ring[i++] = Vertex_t::loadu(vertices+index1*stride);
|
||||
p = p->next();
|
||||
|
||||
/* continue with next face */
|
||||
if (likely(p->hasOpposite()))
|
||||
p = p->opposite();
|
||||
|
||||
/* if there is no opposite go the long way to the other side of the border */
|
||||
else
|
||||
{
|
||||
/* find minimum start vertex */
|
||||
const unsigned index0 = p->getStartVertexIndex();
|
||||
if (index0 < min_vertex_index) { min_vertex_index = index0; min_vertex_index_face = i>>1; }
|
||||
|
||||
/*! mark first border edge and store dummy vertex for face between the two border edges */
|
||||
border_index = i;
|
||||
crease_weight[i/2] = inf;
|
||||
ring[i++] = Vertex_t::loadu(vertices+index0*stride);
|
||||
ring[i++] = vtx; // dummy vertex
|
||||
|
||||
/*! goto other side of border */
|
||||
p = (HalfEdge*) h;
|
||||
while (p->hasOpposite())
|
||||
p = p->opposite()->next();
|
||||
}
|
||||
|
||||
} while (p != h);
|
||||
|
||||
edge_valence = i;
|
||||
face_valence = i >> 1;
|
||||
eval_unique_identifier = min_vertex_index;
|
||||
eval_start_index = min_vertex_index_face;
|
||||
|
||||
assert( hasValidPositions() );
|
||||
}
|
||||
|
||||
__forceinline void subdivide(CatmullClark1RingT& dest) const
|
||||
{
|
||||
dest.edge_level = 0.5f*edge_level;
|
||||
dest.vertex_level = 0.5f*vertex_level;
|
||||
dest.face_valence = face_valence;
|
||||
dest.edge_valence = edge_valence;
|
||||
dest.border_index = border_index;
|
||||
dest.vertex_crease_weight = max(0.0f,vertex_crease_weight-1.0f);
|
||||
dest.eval_start_index = eval_start_index;
|
||||
dest.eval_unique_identifier = eval_unique_identifier;
|
||||
|
||||
/* calculate face points */
|
||||
Vertex_t S = Vertex_t(0.0f);
|
||||
for (size_t i=0; i<face_valence; i++)
|
||||
{
|
||||
size_t face_index = i + eval_start_index; if (face_index >= face_valence) face_index -= face_valence; assert(face_index < face_valence);
|
||||
size_t index0 = 2*face_index+0; if (index0 >= edge_valence) index0 -= edge_valence; assert(index0 < edge_valence);
|
||||
size_t index1 = 2*face_index+1; if (index1 >= edge_valence) index1 -= edge_valence; assert(index1 < edge_valence);
|
||||
size_t index2 = 2*face_index+2; if (index2 >= edge_valence) index2 -= edge_valence; assert(index2 < edge_valence);
|
||||
S += dest.ring[index1] = ((vtx + ring[index1]) + (ring[index0] + ring[index2])) * 0.25f;
|
||||
}
|
||||
|
||||
/* calculate new edge points */
|
||||
size_t num_creases = 0;
|
||||
array_t<size_t,MAX_RING_FACE_VALENCE> crease_id;
|
||||
|
||||
for (size_t i=0; i<face_valence; i++)
|
||||
{
|
||||
size_t face_index = i + eval_start_index;
|
||||
if (face_index >= face_valence) face_index -= face_valence;
|
||||
const float edge_crease = crease_weight[face_index];
|
||||
dest.crease_weight[face_index] = max(edge_crease-1.0f,0.0f);
|
||||
|
||||
size_t index = 2*face_index;
|
||||
size_t prev_index = face_index == 0 ? edge_valence-1 : 2*face_index-1;
|
||||
size_t next_index = 2*face_index+1;
|
||||
|
||||
const Vertex_t v = vtx + ring[index];
|
||||
const Vertex_t f = dest.ring[prev_index] + dest.ring[next_index];
|
||||
S += ring[index];
|
||||
|
||||
/* fast path for regular edge points */
|
||||
if (likely(edge_crease <= 0.0f)) {
|
||||
dest.ring[index] = (v+f) * 0.25f;
|
||||
}
|
||||
|
||||
/* slower path for hard edge rule */
|
||||
else {
|
||||
crease_id[num_creases++] = face_index;
|
||||
dest.ring[index] = v*0.5f;
|
||||
|
||||
/* even slower path for blended edge rule */
|
||||
if (unlikely(edge_crease < 1.0f)) {
|
||||
dest.ring[index] = lerp((v+f)*0.25f,v*0.5f,edge_crease);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* compute new vertex using smooth rule */
|
||||
const float inv_face_valence = 1.0f / (float)face_valence;
|
||||
const Vertex_t v_smooth = (Vertex_t) madd(inv_face_valence,S,(float(face_valence)-2.0f)*vtx)*inv_face_valence;
|
||||
dest.vtx = v_smooth;
|
||||
|
||||
/* compute new vertex using vertex_crease_weight rule */
|
||||
if (unlikely(vertex_crease_weight > 0.0f))
|
||||
{
|
||||
if (vertex_crease_weight >= 1.0f) {
|
||||
dest.vtx = vtx;
|
||||
} else {
|
||||
dest.vtx = lerp(v_smooth,vtx,vertex_crease_weight);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* no edge crease rule and dart rule */
|
||||
if (likely(num_creases <= 1))
|
||||
return;
|
||||
|
||||
/* compute new vertex using crease rule */
|
||||
if (likely(num_creases == 2))
|
||||
{
|
||||
/* update vertex using crease rule */
|
||||
const size_t crease0 = crease_id[0], crease1 = crease_id[1];
|
||||
const Vertex_t v_sharp = (Vertex_t)(ring[2*crease0] + 6.0f*vtx + ring[2*crease1]) * (1.0f / 8.0f);
|
||||
dest.vtx = v_sharp;
|
||||
|
||||
/* update crease_weights using chaikin rule */
|
||||
const float crease_weight0 = crease_weight[crease0], crease_weight1 = crease_weight[crease1];
|
||||
dest.crease_weight[crease0] = max(0.25f*(3.0f*crease_weight0 + crease_weight1)-1.0f,0.0f);
|
||||
dest.crease_weight[crease1] = max(0.25f*(3.0f*crease_weight1 + crease_weight0)-1.0f,0.0f);
|
||||
|
||||
/* interpolate between sharp and smooth rule */
|
||||
const float v_blend = 0.5f*(crease_weight0+crease_weight1);
|
||||
if (unlikely(v_blend < 1.0f)) {
|
||||
dest.vtx = lerp(v_smooth,v_sharp,v_blend);
|
||||
}
|
||||
}
|
||||
|
||||
/* compute new vertex using corner rule */
|
||||
else {
|
||||
dest.vtx = vtx;
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline bool isRegular1() const
|
||||
{
|
||||
if (border_index == -1) {
|
||||
if (face_valence == 4) return true;
|
||||
} else {
|
||||
if (face_valence < 4) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
__forceinline size_t numEdgeCreases() const
|
||||
{
|
||||
ssize_t numCreases = 0;
|
||||
for (size_t i=0; i<face_valence; i++) {
|
||||
numCreases += crease_weight[i] > 0.0f;
|
||||
}
|
||||
return numCreases;
|
||||
}
|
||||
|
||||
enum Type {
|
||||
TYPE_NONE = 0, //!< invalid type
|
||||
TYPE_REGULAR = 1, //!< regular patch when ignoring creases
|
||||
TYPE_REGULAR_CREASES = 2, //!< regular patch when considering creases
|
||||
TYPE_GREGORY = 4, //!< gregory patch when ignoring creases
|
||||
TYPE_GREGORY_CREASES = 8, //!< gregory patch when considering creases
|
||||
TYPE_CREASES = 16 //!< patch has crease features
|
||||
};
|
||||
|
||||
__forceinline Type type() const
|
||||
{
|
||||
/* check if there is an edge crease anywhere */
|
||||
const size_t numCreases = numEdgeCreases();
|
||||
const bool noInnerCreases = hasBorder() ? numCreases == 2 : numCreases == 0;
|
||||
|
||||
Type crease_mask = (Type) (TYPE_REGULAR | TYPE_GREGORY);
|
||||
if (noInnerCreases ) crease_mask = (Type) (crease_mask | TYPE_REGULAR_CREASES | TYPE_GREGORY_CREASES);
|
||||
if (numCreases != 0) crease_mask = (Type) (crease_mask | TYPE_CREASES);
|
||||
|
||||
/* calculate if this vertex is regular */
|
||||
bool hasBorder = border_index != -1;
|
||||
if (face_valence == 2 && hasBorder) {
|
||||
if (vertex_crease_weight == 0.0f ) return (Type) (crease_mask & (TYPE_REGULAR | TYPE_REGULAR_CREASES | TYPE_GREGORY | TYPE_GREGORY_CREASES | TYPE_CREASES));
|
||||
else if (vertex_crease_weight == float(inf)) return (Type) (crease_mask & (TYPE_REGULAR | TYPE_REGULAR_CREASES | TYPE_GREGORY | TYPE_GREGORY_CREASES | TYPE_CREASES));
|
||||
else return TYPE_CREASES;
|
||||
}
|
||||
else if (vertex_crease_weight != 0.0f) return TYPE_CREASES;
|
||||
else if (face_valence == 3 && hasBorder) return (Type) (crease_mask & (TYPE_REGULAR | TYPE_REGULAR_CREASES | TYPE_GREGORY | TYPE_GREGORY_CREASES | TYPE_CREASES));
|
||||
else if (face_valence == 4 && !hasBorder) return (Type) (crease_mask & (TYPE_REGULAR | TYPE_REGULAR_CREASES | TYPE_GREGORY | TYPE_GREGORY_CREASES | TYPE_CREASES));
|
||||
else return (Type) (crease_mask & (TYPE_GREGORY | TYPE_GREGORY_CREASES | TYPE_CREASES));
|
||||
}
|
||||
|
||||
__forceinline bool isFinalResolution(float res) const {
|
||||
return vertex_level <= res;
|
||||
}
|
||||
|
||||
/* computes the limit vertex */
|
||||
__forceinline Vertex getLimitVertex() const
|
||||
{
|
||||
/* return hard corner */
|
||||
if (unlikely(std::isinf(vertex_crease_weight)))
|
||||
return vtx;
|
||||
|
||||
/* border vertex rule */
|
||||
if (unlikely(border_index != -1))
|
||||
{
|
||||
const unsigned int second_border_index = border_index+2 >= int(edge_valence) ? 0 : border_index+2;
|
||||
return (4.0f * vtx + (ring[border_index] + ring[second_border_index])) * 1.0f/6.0f;
|
||||
}
|
||||
|
||||
Vertex_t F( 0.0f );
|
||||
Vertex_t E( 0.0f );
|
||||
|
||||
assert(eval_start_index < face_valence);
|
||||
|
||||
for (size_t i=0; i<face_valence; i++) {
|
||||
size_t index = i+eval_start_index;
|
||||
if (index >= face_valence) index -= face_valence;
|
||||
F += ring[2*index+1];
|
||||
E += ring[2*index];
|
||||
}
|
||||
|
||||
const float n = (float)face_valence;
|
||||
return (Vertex_t)(n*n*vtx+4.0f*E+F) / ((n+5.0f)*n);
|
||||
}
|
||||
|
||||
/* gets limit tangent in the direction of edge vtx -> ring[0] */
|
||||
__forceinline Vertex getLimitTangent() const
|
||||
{
|
||||
if (unlikely(std::isinf(vertex_crease_weight)))
|
||||
return ring[0] - vtx;
|
||||
|
||||
/* border vertex rule */
|
||||
if (unlikely(border_index != -1))
|
||||
{
|
||||
if (border_index != (int)edge_valence-2 ) {
|
||||
return ring[0] - vtx;
|
||||
}
|
||||
else
|
||||
{
|
||||
const unsigned int second_border_index = border_index+2 >= int(edge_valence) ? 0 : border_index+2;
|
||||
return (ring[second_border_index] - ring[border_index]) * 0.5f;
|
||||
}
|
||||
}
|
||||
|
||||
Vertex_t alpha( 0.0f );
|
||||
Vertex_t beta ( 0.0f );
|
||||
|
||||
const size_t n = face_valence;
|
||||
|
||||
assert(eval_start_index < face_valence);
|
||||
|
||||
Vertex_t q( 0.0f );
|
||||
for (size_t i=0; i<face_valence; i++)
|
||||
{
|
||||
size_t index = i+eval_start_index;
|
||||
if (index >= face_valence) index -= face_valence;
|
||||
const float a = CatmullClarkPrecomputedCoefficients::table.limittangent_a(index,n);
|
||||
const float b = CatmullClarkPrecomputedCoefficients::table.limittangent_b(index,n);
|
||||
alpha += a * ring[2*index];
|
||||
beta += b * ring[2*index+1];
|
||||
}
|
||||
|
||||
const float sigma = CatmullClarkPrecomputedCoefficients::table.limittangent_c(n);
|
||||
return sigma * (alpha + beta);
|
||||
}
|
||||
|
||||
/* gets limit tangent in the direction of edge vtx -> ring[edge_valence-2] */
|
||||
__forceinline Vertex getSecondLimitTangent() const
|
||||
{
|
||||
if (unlikely(std::isinf(vertex_crease_weight)))
|
||||
return ring[2] - vtx;
|
||||
|
||||
/* border vertex rule */
|
||||
if (unlikely(border_index != -1))
|
||||
{
|
||||
if (border_index != 2) {
|
||||
return ring[2] - vtx;
|
||||
}
|
||||
else {
|
||||
const unsigned int second_border_index = border_index+2 >= int(edge_valence) ? 0 : border_index+2;
|
||||
return (ring[border_index] - ring[second_border_index]) * 0.5f;
|
||||
}
|
||||
}
|
||||
|
||||
Vertex_t alpha( 0.0f );
|
||||
Vertex_t beta ( 0.0f );
|
||||
|
||||
const size_t n = face_valence;
|
||||
|
||||
assert(eval_start_index < face_valence);
|
||||
|
||||
for (size_t i=0; i<face_valence; i++)
|
||||
{
|
||||
size_t index = i+eval_start_index;
|
||||
if (index >= face_valence) index -= face_valence;
|
||||
|
||||
size_t prev_index = index == 0 ? face_valence-1 : index-1; // need to be bit-wise exact in cosf eval
|
||||
const float a = CatmullClarkPrecomputedCoefficients::table.limittangent_a(prev_index,n);
|
||||
const float b = CatmullClarkPrecomputedCoefficients::table.limittangent_b(prev_index,n);
|
||||
alpha += a * ring[2*index];
|
||||
beta += b * ring[2*index+1];
|
||||
}
|
||||
|
||||
const float sigma = CatmullClarkPrecomputedCoefficients::table.limittangent_c(n);
|
||||
return sigma* (alpha + beta);
|
||||
}
|
||||
|
||||
/* gets surface normal */
|
||||
const Vertex getNormal() const {
|
||||
return cross(getLimitTangent(),getSecondLimitTangent());
|
||||
}
|
||||
|
||||
/* returns center of the n-th quad in the 1-ring */
|
||||
__forceinline Vertex getQuadCenter(const size_t index) const
|
||||
{
|
||||
const Vertex_t &p0 = vtx;
|
||||
const Vertex_t &p1 = ring[2*index+0];
|
||||
const Vertex_t &p2 = ring[2*index+1];
|
||||
const Vertex_t &p3 = index == face_valence-1 ? ring[0] : ring[2*index+2];
|
||||
const Vertex p = (p0+p1+p2+p3) * 0.25f;
|
||||
return p;
|
||||
}
|
||||
|
||||
/* returns center of the n-th edge in the 1-ring */
|
||||
__forceinline Vertex getEdgeCenter(const size_t index) const {
|
||||
return (vtx + ring[index*2]) * 0.5f;
|
||||
}
|
||||
|
||||
bool hasValidPositions() const
|
||||
{
|
||||
for (size_t i=0; i<edge_valence; i++) {
|
||||
if (!isvalid(ring[i]))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
friend __forceinline embree_ostream operator<<(embree_ostream o, const CatmullClark1RingT &c)
|
||||
{
|
||||
o << "vtx " << c.vtx << " size = " << c.edge_valence << ", " <<
|
||||
"hard_edge = " << c.border_index << ", face_valence " << c.face_valence <<
|
||||
", edge_level = " << c.edge_level << ", vertex_level = " << c.vertex_level << ", eval_start_index: " << c.eval_start_index << ", ring: " << embree_endl;
|
||||
|
||||
for (unsigned int i=0; i<min(c.edge_valence,(unsigned int)MAX_RING_FACE_VALENCE); i++) {
|
||||
o << i << " -> " << c.ring[i];
|
||||
if (i % 2 == 0) o << " crease = " << c.crease_weight[i/2];
|
||||
o << embree_endl;
|
||||
}
|
||||
return o;
|
||||
}
|
||||
};
|
||||
|
||||
typedef CatmullClark1RingT<Vec3fa,Vec3fa_t> CatmullClark1Ring3fa;
|
||||
|
||||
template<typename Vertex, typename Vertex_t = Vertex>
|
||||
struct __aligned(64) GeneralCatmullClark1RingT
|
||||
{
|
||||
ALIGNED_STRUCT_(64);
|
||||
|
||||
typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClark1Ring;
|
||||
|
||||
struct Face
|
||||
{
|
||||
__forceinline Face() {}
|
||||
__forceinline Face (int size, float crease_weight)
|
||||
: size(size), crease_weight(crease_weight) {}
|
||||
|
||||
// FIXME: add member that returns total number of vertices
|
||||
|
||||
int size; // number of vertices-2 of nth face in ring
|
||||
float crease_weight;
|
||||
};
|
||||
|
||||
Vertex vtx;
|
||||
DynamicStackArray<Vertex,32,MAX_RING_EDGE_VALENCE> ring;
|
||||
DynamicStackArray<Face,16,MAX_RING_FACE_VALENCE> faces;
|
||||
unsigned int face_valence;
|
||||
unsigned int edge_valence;
|
||||
int border_face;
|
||||
float vertex_crease_weight;
|
||||
float vertex_level; //!< maximum level of adjacent edges
|
||||
float edge_level; // level of first edge
|
||||
bool only_quads; // true if all faces are quads
|
||||
unsigned int eval_start_face_index;
|
||||
unsigned int eval_start_vertex_index;
|
||||
unsigned int eval_unique_identifier;
|
||||
|
||||
public:
|
||||
GeneralCatmullClark1RingT()
|
||||
: eval_start_face_index(0), eval_start_vertex_index(0), eval_unique_identifier(0) {}
|
||||
|
||||
__forceinline bool isRegular() const
|
||||
{
|
||||
if (border_face == -1 && face_valence == 4) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
__forceinline bool has_last_face() const {
|
||||
return border_face != (int)face_valence-1;
|
||||
}
|
||||
|
||||
__forceinline bool has_second_face() const {
|
||||
return (border_face == -1) || (border_face >= 2);
|
||||
}
|
||||
|
||||
bool hasValidPositions() const
|
||||
{
|
||||
for (size_t i=0; i<edge_valence; i++) {
|
||||
if (!isvalid(ring[i]))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
__forceinline void init(const HalfEdge* const h, const char* vertices, size_t stride)
|
||||
{
|
||||
only_quads = true;
|
||||
border_face = -1;
|
||||
vtx = Vertex_t::loadu(vertices+h->getStartVertexIndex()*stride);
|
||||
vertex_crease_weight = h->vertex_crease_weight;
|
||||
HalfEdge* p = (HalfEdge*) h;
|
||||
|
||||
unsigned int e=0, f=0;
|
||||
unsigned min_vertex_index = (unsigned)-1;
|
||||
unsigned min_vertex_index_face = (unsigned)-1;
|
||||
unsigned min_vertex_index_vertex = (unsigned)-1;
|
||||
edge_level = p->edge_level;
|
||||
vertex_level = 0.0f;
|
||||
do
|
||||
{
|
||||
HalfEdge* p_prev = p->prev();
|
||||
HalfEdge* p_next = p->next();
|
||||
const float crease_weight = p->edge_crease_weight;
|
||||
assert(p->hasOpposite() || p->edge_crease_weight == float(inf));
|
||||
vertex_level = max(vertex_level,p->edge_level);
|
||||
|
||||
/* find minimum start vertex */
|
||||
unsigned vertex_index = p_next->getStartVertexIndex();
|
||||
if (vertex_index < min_vertex_index) { min_vertex_index = vertex_index; min_vertex_index_face = f; min_vertex_index_vertex = e; }
|
||||
|
||||
/* store first N-2 vertices of face */
|
||||
unsigned int vn = 0;
|
||||
for (p = p_next; p!=p_prev; p=p->next()) {
|
||||
ring[e++] = Vertex_t::loadu(vertices+p->getStartVertexIndex()*stride);
|
||||
vn++;
|
||||
}
|
||||
faces[f++] = Face(vn,crease_weight);
|
||||
only_quads &= (vn == 2);
|
||||
|
||||
/* continue with next face */
|
||||
if (likely(p->hasOpposite()))
|
||||
p = p->opposite();
|
||||
|
||||
/* if there is no opposite go the long way to the other side of the border */
|
||||
else
|
||||
{
|
||||
/* find minimum start vertex */
|
||||
unsigned vertex_index = p->getStartVertexIndex();
|
||||
if (vertex_index < min_vertex_index) { min_vertex_index = vertex_index; min_vertex_index_face = f; min_vertex_index_vertex = e; }
|
||||
|
||||
/*! mark first border edge and store dummy vertex for face between the two border edges */
|
||||
border_face = f;
|
||||
faces[f++] = Face(2,inf);
|
||||
ring[e++] = Vertex_t::loadu(vertices+p->getStartVertexIndex()*stride);
|
||||
ring[e++] = vtx; // dummy vertex
|
||||
|
||||
/*! goto other side of border */
|
||||
p = (HalfEdge*) h;
|
||||
while (p->hasOpposite())
|
||||
p = p->opposite()->next();
|
||||
}
|
||||
|
||||
} while (p != h);
|
||||
|
||||
edge_valence = e;
|
||||
face_valence = f;
|
||||
eval_unique_identifier = min_vertex_index;
|
||||
eval_start_face_index = min_vertex_index_face;
|
||||
eval_start_vertex_index = min_vertex_index_vertex;
|
||||
|
||||
assert( hasValidPositions() );
|
||||
}
|
||||
|
||||
__forceinline void subdivide(CatmullClark1Ring& dest) const
|
||||
{
|
||||
dest.edge_level = 0.5f*edge_level;
|
||||
dest.vertex_level = 0.5f*vertex_level;
|
||||
dest.face_valence = face_valence;
|
||||
dest.edge_valence = 2*face_valence;
|
||||
dest.border_index = border_face == -1 ? -1 : 2*border_face; // FIXME:
|
||||
dest.vertex_crease_weight = max(0.0f,vertex_crease_weight-1.0f);
|
||||
dest.eval_start_index = eval_start_face_index;
|
||||
dest.eval_unique_identifier = eval_unique_identifier;
|
||||
assert(dest.face_valence <= MAX_RING_FACE_VALENCE);
|
||||
|
||||
/* calculate face points */
|
||||
Vertex_t S = Vertex_t(0.0f);
|
||||
for (size_t face=0, v=eval_start_vertex_index; face<face_valence; face++) {
|
||||
size_t f = (face + eval_start_face_index)%face_valence;
|
||||
|
||||
Vertex_t F = vtx;
|
||||
for (size_t k=v; k<=v+faces[f].size; k++) F += ring[k%edge_valence]; // FIXME: optimize
|
||||
S += dest.ring[2*f+1] = F/float(faces[f].size+2);
|
||||
v+=faces[f].size;
|
||||
v%=edge_valence;
|
||||
}
|
||||
|
||||
/* calculate new edge points */
|
||||
size_t num_creases = 0;
|
||||
array_t<size_t,MAX_RING_FACE_VALENCE> crease_id;
|
||||
Vertex_t C = Vertex_t(0.0f);
|
||||
for (size_t face=0, j=eval_start_vertex_index; face<face_valence; face++)
|
||||
{
|
||||
size_t i = (face + eval_start_face_index)%face_valence;
|
||||
|
||||
const Vertex_t v = vtx + ring[j];
|
||||
Vertex_t f = dest.ring[2*i+1];
|
||||
if (i == 0) f += dest.ring[dest.edge_valence-1];
|
||||
else f += dest.ring[2*i-1];
|
||||
S += ring[j];
|
||||
dest.crease_weight[i] = max(faces[i].crease_weight-1.0f,0.0f);
|
||||
|
||||
/* fast path for regular edge points */
|
||||
if (likely(faces[i].crease_weight <= 0.0f)) {
|
||||
dest.ring[2*i] = (v+f) * 0.25f;
|
||||
}
|
||||
|
||||
/* slower path for hard edge rule */
|
||||
else {
|
||||
C += ring[j]; crease_id[num_creases++] = i;
|
||||
dest.ring[2*i] = v*0.5f;
|
||||
|
||||
/* even slower path for blended edge rule */
|
||||
if (unlikely(faces[i].crease_weight < 1.0f)) {
|
||||
dest.ring[2*i] = lerp((v+f)*0.25f,v*0.5f,faces[i].crease_weight);
|
||||
}
|
||||
}
|
||||
j+=faces[i].size;
|
||||
j%=edge_valence;
|
||||
}
|
||||
|
||||
/* compute new vertex using smooth rule */
|
||||
const float inv_face_valence = 1.0f / (float)face_valence;
|
||||
const Vertex_t v_smooth = (Vertex_t) madd(inv_face_valence,S,(float(face_valence)-2.0f)*vtx)*inv_face_valence;
|
||||
dest.vtx = v_smooth;
|
||||
|
||||
/* compute new vertex using vertex_crease_weight rule */
|
||||
if (unlikely(vertex_crease_weight > 0.0f))
|
||||
{
|
||||
if (vertex_crease_weight >= 1.0f) {
|
||||
dest.vtx = vtx;
|
||||
} else {
|
||||
dest.vtx = lerp(vtx,v_smooth,vertex_crease_weight);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (likely(num_creases <= 1))
|
||||
return;
|
||||
|
||||
/* compute new vertex using crease rule */
|
||||
if (likely(num_creases == 2)) {
|
||||
const Vertex_t v_sharp = (Vertex_t)(C + 6.0f * vtx) * (1.0f / 8.0f);
|
||||
const float crease_weight0 = faces[crease_id[0]].crease_weight;
|
||||
const float crease_weight1 = faces[crease_id[1]].crease_weight;
|
||||
dest.vtx = v_sharp;
|
||||
dest.crease_weight[crease_id[0]] = max(0.25f*(3.0f*crease_weight0 + crease_weight1)-1.0f,0.0f);
|
||||
dest.crease_weight[crease_id[1]] = max(0.25f*(3.0f*crease_weight1 + crease_weight0)-1.0f,0.0f);
|
||||
const float v_blend = 0.5f*(crease_weight0+crease_weight1);
|
||||
if (unlikely(v_blend < 1.0f)) {
|
||||
dest.vtx = lerp(v_sharp,v_smooth,v_blend);
|
||||
}
|
||||
}
|
||||
|
||||
/* compute new vertex using corner rule */
|
||||
else {
|
||||
dest.vtx = vtx;
|
||||
}
|
||||
}
|
||||
|
||||
void convert(CatmullClark1Ring& dst) const
|
||||
{
|
||||
dst.edge_level = edge_level;
|
||||
dst.vertex_level = vertex_level;
|
||||
dst.vtx = vtx;
|
||||
dst.face_valence = face_valence;
|
||||
dst.edge_valence = 2*face_valence;
|
||||
dst.border_index = border_face == -1 ? -1 : 2*border_face;
|
||||
for (size_t i=0; i<face_valence; i++)
|
||||
dst.crease_weight[i] = faces[i].crease_weight;
|
||||
dst.vertex_crease_weight = vertex_crease_weight;
|
||||
for (size_t i=0; i<edge_valence; i++) dst.ring[i] = ring[i];
|
||||
|
||||
dst.eval_start_index = eval_start_face_index;
|
||||
dst.eval_unique_identifier = eval_unique_identifier;
|
||||
|
||||
assert( dst.hasValidPositions() );
|
||||
}
|
||||
|
||||
|
||||
/* gets limit tangent in the direction of edge vtx -> ring[0] */
|
||||
__forceinline Vertex getLimitTangent() const
|
||||
{
|
||||
CatmullClark1Ring cc_vtx;
|
||||
|
||||
/* fast path for quad only rings */
|
||||
if (only_quads)
|
||||
{
|
||||
convert(cc_vtx);
|
||||
return cc_vtx.getLimitTangent();
|
||||
}
|
||||
|
||||
subdivide(cc_vtx);
|
||||
return 2.0f * cc_vtx.getLimitTangent();
|
||||
}
|
||||
|
||||
/* gets limit tangent in the direction of edge vtx -> ring[edge_valence-2] */
|
||||
__forceinline Vertex getSecondLimitTangent() const
|
||||
{
|
||||
CatmullClark1Ring cc_vtx;
|
||||
|
||||
/* fast path for quad only rings */
|
||||
if (only_quads)
|
||||
{
|
||||
convert(cc_vtx);
|
||||
return cc_vtx.getSecondLimitTangent();
|
||||
}
|
||||
|
||||
subdivide(cc_vtx);
|
||||
return 2.0f * cc_vtx.getSecondLimitTangent();
|
||||
}
|
||||
|
||||
|
||||
/* gets limit vertex */
|
||||
__forceinline Vertex getLimitVertex() const
|
||||
{
|
||||
CatmullClark1Ring cc_vtx;
|
||||
|
||||
/* fast path for quad only rings */
|
||||
if (only_quads)
|
||||
convert(cc_vtx);
|
||||
else
|
||||
subdivide(cc_vtx);
|
||||
return cc_vtx.getLimitVertex();
|
||||
}
|
||||
|
||||
friend __forceinline embree_ostream operator<<(embree_ostream o, const GeneralCatmullClark1RingT &c)
|
||||
{
|
||||
o << "vtx " << c.vtx << " size = " << c.edge_valence << ", border_face = " << c.border_face << ", " << " face_valence = " << c.face_valence <<
|
||||
", edge_level = " << c.edge_level << ", vertex_level = " << c.vertex_level << ", ring: " << embree_endl;
|
||||
for (size_t v=0, f=0; f<c.face_valence; v+=c.faces[f++].size) {
|
||||
for (size_t i=v; i<v+c.faces[f].size; i++) {
|
||||
o << i << " -> " << c.ring[i];
|
||||
if (i == v) o << " crease = " << c.faces[f].crease_weight;
|
||||
o << embree_endl;
|
||||
}
|
||||
}
|
||||
return o;
|
||||
}
|
||||
};
|
||||
}
|
||||
30
Framework/external/embree/kernels/subdiv/catmullrom_curve.cpp
vendored
Normal file
30
Framework/external/embree/kernels/subdiv/catmullrom_curve.cpp
vendored
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "catmullrom_curve.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
PrecomputedCatmullRomBasis::PrecomputedCatmullRomBasis(int dj)
|
||||
{
|
||||
for (size_t i=1; i<=N; i++)
|
||||
{
|
||||
for (size_t j=0; j<=N; j++)
|
||||
{
|
||||
const float u = float(j+dj)/float(i);
|
||||
const Vec4f f = CatmullRomBasis::eval(u);
|
||||
c0[i][j] = f.x;
|
||||
c1[i][j] = f.y;
|
||||
c2[i][j] = f.z;
|
||||
c3[i][j] = f.w;
|
||||
const Vec4f d = CatmullRomBasis::derivative(u);
|
||||
d0[i][j] = d.x;
|
||||
d1[i][j] = d.y;
|
||||
d2[i][j] = d.z;
|
||||
d3[i][j] = d.w;
|
||||
}
|
||||
}
|
||||
}
|
||||
PrecomputedCatmullRomBasis catmullrom_basis0(0);
|
||||
PrecomputedCatmullRomBasis catmullrom_basis1(1);
|
||||
}
|
||||
314
Framework/external/embree/kernels/subdiv/catmullrom_curve.h
vendored
Normal file
314
Framework/external/embree/kernels/subdiv/catmullrom_curve.h
vendored
Normal file
|
|
@ -0,0 +1,314 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../common/default.h"
|
||||
#include "bezier_curve.h"
|
||||
#include "../common/scene_curves.h"
|
||||
|
||||
/*
|
||||
|
||||
Implements Catmul Rom curves with control points p0, p1, p2, p3. At
|
||||
t=0 the curve goes through p1, with tangent (p2-p0)/2, and for t=1
|
||||
the curve goes through p2 with tangent (p3-p2)/2.
|
||||
|
||||
*/
|
||||
|
||||
namespace embree
|
||||
{
|
||||
class CatmullRomBasis
|
||||
{
|
||||
public:
|
||||
|
||||
template<typename T>
|
||||
static __forceinline Vec4<T> eval(const T& u)
|
||||
{
|
||||
const T t = u;
|
||||
const T s = T(1.0f) - u;
|
||||
const T n0 = - t * s * s;
|
||||
const T n1 = 2.0f + t * t * (3.0f * t - 5.0f);
|
||||
const T n2 = 2.0f + s * s * (3.0f * s - 5.0f);
|
||||
const T n3 = - s * t * t;
|
||||
return T(0.5f) * Vec4<T>(n0, n1, n2, n3);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static __forceinline Vec4<T> derivative(const T& u)
|
||||
{
|
||||
const T t = u;
|
||||
const T s = 1.0f - u;
|
||||
const T n0 = - s * s + 2.0f * s * t;
|
||||
const T n1 = 2.0f * t * (3.0f * t - 5.0f) + 3.0f * t * t;
|
||||
const T n2 = 2.0f * s * (3.0f * t + 2.0f) - 3.0f * s * s;
|
||||
const T n3 = -2.0f * s * t + t * t;
|
||||
return T(0.5f) * Vec4<T>(n0, n1, n2, n3);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static __forceinline Vec4<T> derivative2(const T& u)
|
||||
{
|
||||
const T t = u;
|
||||
const T n0 = -3.0f * t + 2.0f;
|
||||
const T n1 = 9.0f * t - 5.0f;
|
||||
const T n2 = -9.0f * t + 4.0f;
|
||||
const T n3 = 3.0f * t - 1.0f;
|
||||
return Vec4<T>(n0, n1, n2, n3);
|
||||
}
|
||||
};
|
||||
|
||||
struct PrecomputedCatmullRomBasis
|
||||
{
|
||||
enum { N = 16 };
|
||||
public:
|
||||
PrecomputedCatmullRomBasis() {}
|
||||
PrecomputedCatmullRomBasis(int shift);
|
||||
|
||||
/* basis for bspline evaluation */
|
||||
public:
|
||||
float c0[N+1][N+1];
|
||||
float c1[N+1][N+1];
|
||||
float c2[N+1][N+1];
|
||||
float c3[N+1][N+1];
|
||||
|
||||
/* basis for bspline derivative evaluation */
|
||||
public:
|
||||
float d0[N+1][N+1];
|
||||
float d1[N+1][N+1];
|
||||
float d2[N+1][N+1];
|
||||
float d3[N+1][N+1];
|
||||
};
|
||||
extern PrecomputedCatmullRomBasis catmullrom_basis0;
|
||||
extern PrecomputedCatmullRomBasis catmullrom_basis1;
|
||||
|
||||
template<typename Vertex>
|
||||
struct CatmullRomCurveT
|
||||
{
|
||||
Vertex v0,v1,v2,v3;
|
||||
|
||||
__forceinline CatmullRomCurveT() {}
|
||||
|
||||
__forceinline CatmullRomCurveT(const Vertex& v0, const Vertex& v1, const Vertex& v2, const Vertex& v3)
|
||||
: v0(v0), v1(v1), v2(v2), v3(v3) {}
|
||||
|
||||
__forceinline Vertex begin() const {
|
||||
return v1;
|
||||
}
|
||||
|
||||
__forceinline Vertex end() const {
|
||||
return v2;
|
||||
}
|
||||
|
||||
__forceinline Vertex center() const {
|
||||
return 0.5f*(v0+v1);
|
||||
}
|
||||
|
||||
__forceinline BBox<Vertex> bounds() const {
|
||||
return merge(BBox<Vertex>(v0),BBox<Vertex>(v1),BBox<Vertex>(v2),BBox<Vertex>(v3));
|
||||
}
|
||||
|
||||
__forceinline friend CatmullRomCurveT operator -( const CatmullRomCurveT& a, const Vertex& b ) {
|
||||
return CatmullRomCurveT(a.v0-b,a.v1-b,a.v2-b,a.v3-b);
|
||||
}
|
||||
|
||||
__forceinline CatmullRomCurveT<Vec3ff> xfm_pr(const LinearSpace3fa& space, const Vec3fa& p) const
|
||||
{
|
||||
const Vec3ff q0(xfmVector(space,v0-p), v0.w);
|
||||
const Vec3ff q1(xfmVector(space,v1-p), v1.w);
|
||||
const Vec3ff q2(xfmVector(space,v2-p), v2.w);
|
||||
const Vec3ff q3(xfmVector(space,v3-p), v3.w);
|
||||
return CatmullRomCurveT<Vec3ff>(q0,q1,q2,q3);
|
||||
}
|
||||
|
||||
__forceinline Vertex eval(const float t) const
|
||||
{
|
||||
const Vec4<float> b = CatmullRomBasis::eval(t);
|
||||
return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
|
||||
}
|
||||
|
||||
__forceinline Vertex eval_du(const float t) const
|
||||
{
|
||||
const Vec4<float> b = CatmullRomBasis::derivative(t);
|
||||
return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
|
||||
}
|
||||
|
||||
__forceinline Vertex eval_dudu(const float t) const
|
||||
{
|
||||
const Vec4<float> b = CatmullRomBasis::derivative2(t);
|
||||
return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
|
||||
}
|
||||
|
||||
__forceinline void eval(const float t, Vertex& p, Vertex& dp) const
|
||||
{
|
||||
p = eval(t);
|
||||
dp = eval_du(t);
|
||||
}
|
||||
|
||||
__forceinline void eval(const float t, Vertex& p, Vertex& dp, Vertex& ddp) const
|
||||
{
|
||||
p = eval(t);
|
||||
dp = eval_du(t);
|
||||
ddp = eval_dudu(t);
|
||||
}
|
||||
|
||||
template<int M>
|
||||
__forceinline Vec4vf<M> veval(const vfloat<M>& t) const
|
||||
{
|
||||
const Vec4vf<M> b = CatmullRomBasis::eval(t);
|
||||
return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
|
||||
}
|
||||
|
||||
template<int M>
|
||||
__forceinline Vec4vf<M> veval_du(const vfloat<M>& t) const
|
||||
{
|
||||
const Vec4vf<M> b = CatmullRomBasis::derivative(t);
|
||||
return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
|
||||
}
|
||||
|
||||
template<int M>
|
||||
__forceinline Vec4vf<M> veval_dudu(const vfloat<M>& t) const
|
||||
{
|
||||
const Vec4vf<M> b = CatmullRomBasis::derivative2(t);
|
||||
return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
|
||||
}
|
||||
|
||||
template<int M>
|
||||
__forceinline void veval(const vfloat<M>& t, Vec4vf<M>& p, Vec4vf<M>& dp) const
|
||||
{
|
||||
p = veval<M>(t);
|
||||
dp = veval_du<M>(t);
|
||||
}
|
||||
|
||||
template<int M>
|
||||
__forceinline Vec4vf<M> eval0(const int ofs, const int size) const
|
||||
{
|
||||
assert(size <= PrecomputedCatmullRomBasis::N);
|
||||
assert(ofs <= size);
|
||||
return madd(vfloat<M>::loadu(&catmullrom_basis0.c0[size][ofs]), Vec4vf<M>(v0),
|
||||
madd(vfloat<M>::loadu(&catmullrom_basis0.c1[size][ofs]), Vec4vf<M>(v1),
|
||||
madd(vfloat<M>::loadu(&catmullrom_basis0.c2[size][ofs]), Vec4vf<M>(v2),
|
||||
vfloat<M>::loadu(&catmullrom_basis0.c3[size][ofs]) * Vec4vf<M>(v3))));
|
||||
}
|
||||
|
||||
template<int M>
|
||||
__forceinline Vec4vf<M> eval1(const int ofs, const int size) const
|
||||
{
|
||||
assert(size <= PrecomputedCatmullRomBasis::N);
|
||||
assert(ofs <= size);
|
||||
return madd(vfloat<M>::loadu(&catmullrom_basis1.c0[size][ofs]), Vec4vf<M>(v0),
|
||||
madd(vfloat<M>::loadu(&catmullrom_basis1.c1[size][ofs]), Vec4vf<M>(v1),
|
||||
madd(vfloat<M>::loadu(&catmullrom_basis1.c2[size][ofs]), Vec4vf<M>(v2),
|
||||
vfloat<M>::loadu(&catmullrom_basis1.c3[size][ofs]) * Vec4vf<M>(v3))));
|
||||
}
|
||||
|
||||
template<int M>
|
||||
__forceinline Vec4vf<M> derivative0(const int ofs, const int size) const
|
||||
{
|
||||
assert(size <= PrecomputedCatmullRomBasis::N);
|
||||
assert(ofs <= size);
|
||||
return madd(vfloat<M>::loadu(&catmullrom_basis0.d0[size][ofs]), Vec4vf<M>(v0),
|
||||
madd(vfloat<M>::loadu(&catmullrom_basis0.d1[size][ofs]), Vec4vf<M>(v1),
|
||||
madd(vfloat<M>::loadu(&catmullrom_basis0.d2[size][ofs]), Vec4vf<M>(v2),
|
||||
vfloat<M>::loadu(&catmullrom_basis0.d3[size][ofs]) * Vec4vf<M>(v3))));
|
||||
}
|
||||
|
||||
template<int M>
|
||||
__forceinline Vec4vf<M> derivative1(const int ofs, const int size) const
|
||||
{
|
||||
assert(size <= PrecomputedCatmullRomBasis::N);
|
||||
assert(ofs <= size);
|
||||
return madd(vfloat<M>::loadu(&catmullrom_basis1.d0[size][ofs]), Vec4vf<M>(v0),
|
||||
madd(vfloat<M>::loadu(&catmullrom_basis1.d1[size][ofs]), Vec4vf<M>(v1),
|
||||
madd(vfloat<M>::loadu(&catmullrom_basis1.d2[size][ofs]), Vec4vf<M>(v2),
|
||||
vfloat<M>::loadu(&catmullrom_basis1.d3[size][ofs]) * Vec4vf<M>(v3))));
|
||||
}
|
||||
|
||||
/* calculates bounds of catmull-rom curve geometry */
|
||||
__forceinline BBox3fa accurateRoundBounds() const
|
||||
{
|
||||
const int N = 7;
|
||||
const float scale = 1.0f/(3.0f*(N-1));
|
||||
Vec4vfx pl(pos_inf), pu(neg_inf);
|
||||
for (int i=0; i<=N; i+=VSIZEX)
|
||||
{
|
||||
vintx vi = vintx(i)+vintx(step);
|
||||
vboolx valid = vi <= vintx(N);
|
||||
const Vec4vfx p = eval0<VSIZEX>(i,N);
|
||||
const Vec4vfx dp = derivative0<VSIZEX>(i,N);
|
||||
const Vec4vfx pm = p-Vec4vfx(scale)*select(vi!=vintx(0),dp,Vec4vfx(zero));
|
||||
const Vec4vfx pp = p+Vec4vfx(scale)*select(vi!=vintx(N),dp,Vec4vfx(zero));
|
||||
pl = select(valid,min(pl,p,pm,pp),pl); // FIXME: use masked min
|
||||
pu = select(valid,max(pu,p,pm,pp),pu); // FIXME: use masked min
|
||||
}
|
||||
const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z));
|
||||
const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z));
|
||||
const float r_min = reduce_min(pl.w);
|
||||
const float r_max = reduce_max(pu.w);
|
||||
const Vec3fa upper_r = Vec3fa(max(abs(r_min),abs(r_max)));
|
||||
return enlarge(BBox3fa(lower,upper),upper_r);
|
||||
}
|
||||
|
||||
/* calculates bounds when tessellated into N line segments */
|
||||
__forceinline BBox3fa accurateFlatBounds(int N) const
|
||||
{
|
||||
if (likely(N == 4))
|
||||
{
|
||||
const Vec4vf4 pi = eval0<4>(0,4);
|
||||
const Vec3fa lower(reduce_min(pi.x),reduce_min(pi.y),reduce_min(pi.z));
|
||||
const Vec3fa upper(reduce_max(pi.x),reduce_max(pi.y),reduce_max(pi.z));
|
||||
const Vec3fa upper_r = Vec3fa(reduce_max(abs(pi.w)));
|
||||
const Vec3ff pe = end();
|
||||
return enlarge(BBox3fa(min(lower,pe),max(upper,pe)),max(upper_r,Vec3fa(abs(pe.w))));
|
||||
}
|
||||
else
|
||||
{
|
||||
Vec3vfx pl(pos_inf), pu(neg_inf); vfloatx ru(0.0f);
|
||||
for (int i=0; i<=N; i+=VSIZEX)
|
||||
{
|
||||
vboolx valid = vintx(i)+vintx(step) <= vintx(N);
|
||||
const Vec4vfx pi = eval0<VSIZEX>(i,N);
|
||||
|
||||
pl.x = select(valid,min(pl.x,pi.x),pl.x); // FIXME: use masked min
|
||||
pl.y = select(valid,min(pl.y,pi.y),pl.y);
|
||||
pl.z = select(valid,min(pl.z,pi.z),pl.z);
|
||||
|
||||
pu.x = select(valid,max(pu.x,pi.x),pu.x); // FIXME: use masked min
|
||||
pu.y = select(valid,max(pu.y,pi.y),pu.y);
|
||||
pu.z = select(valid,max(pu.z,pi.z),pu.z);
|
||||
|
||||
ru = select(valid,max(ru,abs(pi.w)),ru);
|
||||
}
|
||||
const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z));
|
||||
const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z));
|
||||
const Vec3fa upper_r(reduce_max(ru));
|
||||
return enlarge(BBox3fa(lower,upper),upper_r);
|
||||
}
|
||||
}
|
||||
|
||||
friend __forceinline embree_ostream operator<<(embree_ostream cout, const CatmullRomCurveT& curve) {
|
||||
return cout << "CatmullRomCurve { v0 = " << curve.v0 << ", v1 = " << curve.v1 << ", v2 = " << curve.v2 << ", v3 = " << curve.v3 << " }";
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Vertex>
|
||||
__forceinline void convert(const CatmullRomCurveT<Vertex>& icurve, BezierCurveT<Vertex>& ocurve)
|
||||
{
|
||||
const Vertex v0 = icurve.v1;
|
||||
const Vertex v1 = icurve.v1+(icurve.v2-icurve.v0)*(1.0f/6.0f);
|
||||
const Vertex v2 = icurve.v2+(icurve.v1-icurve.v3)*(1.0f/6.0f);
|
||||
const Vertex v3 = icurve.v2;
|
||||
ocurve = BezierCurveT<Vertex>(v0,v1,v2,v3);
|
||||
}
|
||||
|
||||
template<typename CurveGeometry>
|
||||
__forceinline CatmullRomCurveT<Vec3ff> enlargeRadiusToMinWidth(const RayQueryContext* context, const CurveGeometry* geom, const Vec3fa& ray_org, const CatmullRomCurveT<Vec3ff>& curve)
|
||||
{
|
||||
return CatmullRomCurveT<Vec3ff>(enlargeRadiusToMinWidth(context,geom,ray_org,curve.v0),
|
||||
enlargeRadiusToMinWidth(context,geom,ray_org,curve.v1),
|
||||
enlargeRadiusToMinWidth(context,geom,ray_org,curve.v2),
|
||||
enlargeRadiusToMinWidth(context,geom,ray_org,curve.v3));
|
||||
}
|
||||
|
||||
typedef CatmullRomCurveT<Vec3fa> CatmullRomCurve3fa;
|
||||
}
|
||||
|
||||
226
Framework/external/embree/kernels/subdiv/feature_adaptive_eval.h
vendored
Normal file
226
Framework/external/embree/kernels/subdiv/feature_adaptive_eval.h
vendored
Normal file
|
|
@ -0,0 +1,226 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "patch.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<typename Vertex, typename Vertex_t = Vertex>
|
||||
struct FeatureAdaptiveEval
|
||||
{
|
||||
public:
|
||||
|
||||
typedef PatchT<Vertex,Vertex_t> Patch;
|
||||
typedef typename Patch::Ref Ref;
|
||||
typedef GeneralCatmullClarkPatchT<Vertex,Vertex_t> GeneralCatmullClarkPatch;
|
||||
typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClarkRing;
|
||||
typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch;
|
||||
typedef BSplinePatchT<Vertex,Vertex_t> BSplinePatch;
|
||||
typedef BezierPatchT<Vertex,Vertex_t> BezierPatch;
|
||||
typedef GregoryPatchT<Vertex,Vertex_t> GregoryPatch;
|
||||
typedef BilinearPatchT<Vertex,Vertex_t> BilinearPatch;
|
||||
typedef BezierCurveT<Vertex> BezierCurve;
|
||||
|
||||
public:
|
||||
|
||||
FeatureAdaptiveEval (const HalfEdge* edge, const char* vertices, size_t stride, const float u, const float v,
|
||||
Vertex* P, Vertex* dPdu, Vertex* dPdv, Vertex* ddPdudu, Vertex* ddPdvdv, Vertex* ddPdudv)
|
||||
: P(P), dPdu(dPdu), dPdv(dPdv), ddPdudu(ddPdudu), ddPdvdv(ddPdvdv), ddPdudv(ddPdudv)
|
||||
{
|
||||
switch (edge->patch_type) {
|
||||
case HalfEdge::BILINEAR_PATCH: BilinearPatch(edge,vertices,stride).eval(u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,1.0f); break;
|
||||
case HalfEdge::REGULAR_QUAD_PATCH: RegularPatchT(edge,vertices,stride).eval(u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,1.0f); break;
|
||||
#if PATCH_USE_GREGORY == 2
|
||||
case HalfEdge::IRREGULAR_QUAD_PATCH: GregoryPatch(edge,vertices,stride).eval(u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,1.0f); break;
|
||||
#endif
|
||||
default: {
|
||||
GeneralCatmullClarkPatch patch(edge,vertices,stride);
|
||||
eval(patch,Vec2f(u,v),0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FeatureAdaptiveEval (CatmullClarkPatch& patch, const float u, const float v, float dscale, size_t depth,
|
||||
Vertex* P, Vertex* dPdu, Vertex* dPdv, Vertex* ddPdudu, Vertex* ddPdvdv, Vertex* ddPdudv)
|
||||
: P(P), dPdu(dPdu), dPdv(dPdv), ddPdudu(ddPdudu), ddPdvdv(ddPdvdv), ddPdudv(ddPdudv)
|
||||
{
|
||||
eval(patch,Vec2f(u,v),dscale,depth);
|
||||
}
|
||||
|
||||
void eval_general_quad(const GeneralCatmullClarkPatch& patch, array_t<CatmullClarkPatch,GeneralCatmullClarkPatch::SIZE>& patches, const Vec2f& uv, size_t depth)
|
||||
{
|
||||
float u = uv.x, v = uv.y;
|
||||
if (v < 0.5f) {
|
||||
if (u < 0.5f) {
|
||||
#if PATCH_USE_GREGORY == 2
|
||||
BezierCurve borders[2]; patch.getLimitBorder(borders,0);
|
||||
BezierCurve border0l,border0r; borders[0].subdivide(border0l,border0r);
|
||||
BezierCurve border2l,border2r; borders[1].subdivide(border2l,border2r);
|
||||
eval(patches[0],Vec2f(2.0f*u,2.0f*v),2.0f,depth+1, &border0l, nullptr, nullptr, &border2r);
|
||||
#else
|
||||
eval(patches[0],Vec2f(2.0f*u,2.0f*v),2.0f,depth+1);
|
||||
#endif
|
||||
if (dPdu && dPdv) {
|
||||
const Vertex dpdx = *dPdu, dpdy = *dPdv;
|
||||
*dPdu = dpdx; *dPdv = dpdy;
|
||||
}
|
||||
}
|
||||
else {
|
||||
#if PATCH_USE_GREGORY == 2
|
||||
BezierCurve borders[2]; patch.getLimitBorder(borders,1);
|
||||
BezierCurve border0l,border0r; borders[0].subdivide(border0l,border0r);
|
||||
BezierCurve border2l,border2r; borders[1].subdivide(border2l,border2r);
|
||||
eval(patches[1],Vec2f(2.0f*v,2.0f-2.0f*u),2.0f,depth+1, &border0l, nullptr, nullptr, &border2r);
|
||||
#else
|
||||
eval(patches[1],Vec2f(2.0f*v,2.0f-2.0f*u),2.0f,depth+1);
|
||||
#endif
|
||||
if (dPdu && dPdv) {
|
||||
const Vertex dpdx = *dPdu, dpdy = *dPdv;
|
||||
*dPdu = -dpdy; *dPdv = dpdx;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (u > 0.5f) {
|
||||
#if PATCH_USE_GREGORY == 2
|
||||
BezierCurve borders[2]; patch.getLimitBorder(borders,2);
|
||||
BezierCurve border0l,border0r; borders[0].subdivide(border0l,border0r);
|
||||
BezierCurve border2l,border2r; borders[1].subdivide(border2l,border2r);
|
||||
eval(patches[2],Vec2f(2.0f-2.0f*u,2.0f-2.0f*v),2.0f,depth+1, &border0l, nullptr, nullptr, &border2r);
|
||||
#else
|
||||
eval(patches[2],Vec2f(2.0f-2.0f*u,2.0f-2.0f*v),2.0f,depth+1);
|
||||
#endif
|
||||
if (dPdu && dPdv) {
|
||||
const Vertex dpdx = *dPdu, dpdy = *dPdv;
|
||||
*dPdu = -dpdx; *dPdv = -dpdy;
|
||||
}
|
||||
}
|
||||
else {
|
||||
#if PATCH_USE_GREGORY == 2
|
||||
BezierCurve borders[2]; patch.getLimitBorder(borders,3);
|
||||
BezierCurve border0l,border0r; borders[0].subdivide(border0l,border0r);
|
||||
BezierCurve border2l,border2r; borders[1].subdivide(border2l,border2r);
|
||||
eval(patches[3],Vec2f(2.0f-2.0f*v,2.0f*u),2.0f,depth+1, &border0l, nullptr, nullptr, &border2r);
|
||||
#else
|
||||
eval(patches[3],Vec2f(2.0f-2.0f*v,2.0f*u),2.0f,depth+1);
|
||||
#endif
|
||||
if (dPdu && dPdv) {
|
||||
const Vertex dpdx = *dPdu, dpdy = *dPdv;
|
||||
*dPdu = dpdy; *dPdv = -dpdx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline bool final(const CatmullClarkPatch& patch, const typename CatmullClarkRing::Type type, size_t depth)
|
||||
{
|
||||
const int max_eval_depth = (type & CatmullClarkRing::TYPE_CREASES) ? PATCH_MAX_EVAL_DEPTH_CREASE : PATCH_MAX_EVAL_DEPTH_IRREGULAR;
|
||||
//#if PATCH_MIN_RESOLUTION
|
||||
// return patch.isFinalResolution(PATCH_MIN_RESOLUTION) || depth>=(size_t)max_eval_depth;
|
||||
//#else
|
||||
return depth>=(size_t)max_eval_depth;
|
||||
//#endif
|
||||
}
|
||||
|
||||
void eval(CatmullClarkPatch& patch, Vec2f uv, float dscale, size_t depth,
|
||||
BezierCurve* border0 = nullptr, BezierCurve* border1 = nullptr, BezierCurve* border2 = nullptr, BezierCurve* border3 = nullptr)
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
typename CatmullClarkPatch::Type ty = patch.type();
|
||||
|
||||
if (unlikely(final(patch,ty,depth)))
|
||||
{
|
||||
if (ty & CatmullClarkRing::TYPE_REGULAR) {
|
||||
RegularPatch(patch,border0,border1,border2,border3).eval(uv.x,uv.y,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale);
|
||||
PATCH_DEBUG_SUBDIVISION(234423,c,c,-1);
|
||||
return;
|
||||
} else {
|
||||
IrregularFillPatch(patch,border0,border1,border2,border3).eval(uv.x,uv.y,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale);
|
||||
PATCH_DEBUG_SUBDIVISION(34534,c,-1,c);
|
||||
return;
|
||||
}
|
||||
}
|
||||
else if (ty & CatmullClarkRing::TYPE_REGULAR_CREASES) {
|
||||
assert(depth > 0);
|
||||
RegularPatch(patch,border0,border1,border2,border3).eval(uv.x,uv.y,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale);
|
||||
PATCH_DEBUG_SUBDIVISION(43524,c,c,-1);
|
||||
return;
|
||||
}
|
||||
#if PATCH_USE_GREGORY == 2
|
||||
else if (ty & CatmullClarkRing::TYPE_GREGORY_CREASES) {
|
||||
assert(depth > 0);
|
||||
GregoryPatch(patch,border0,border1,border2,border3).eval(uv.x,uv.y,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale);
|
||||
PATCH_DEBUG_SUBDIVISION(23498,c,-1,c);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
else
|
||||
{
|
||||
array_t<CatmullClarkPatch,4> patches;
|
||||
patch.subdivide(patches); // FIXME: only have to generate one of the patches
|
||||
|
||||
const float u = uv.x, v = uv.y;
|
||||
if (v < 0.5f) {
|
||||
if (u < 0.5f) { patch = patches[0]; uv = Vec2f(2.0f*u,2.0f*v); dscale *= 2.0f; }
|
||||
else { patch = patches[1]; uv = Vec2f(2.0f*u-1.0f,2.0f*v); dscale *= 2.0f; }
|
||||
} else {
|
||||
if (u > 0.5f) { patch = patches[2]; uv = Vec2f(2.0f*u-1.0f,2.0f*v-1.0f); dscale *= 2.0f; }
|
||||
else { patch = patches[3]; uv = Vec2f(2.0f*u,2.0f*v-1.0f); dscale *= 2.0f; }
|
||||
}
|
||||
depth++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void eval(const GeneralCatmullClarkPatch& patch, const Vec2f& uv, const size_t depth)
|
||||
{
|
||||
/* convert into standard quad patch if possible */
|
||||
if (likely(patch.isQuadPatch()))
|
||||
{
|
||||
CatmullClarkPatch qpatch; patch.init(qpatch);
|
||||
return eval(qpatch,uv,1.0f,depth);
|
||||
}
|
||||
|
||||
/* subdivide patch */
|
||||
unsigned N;
|
||||
array_t<CatmullClarkPatch,GeneralCatmullClarkPatch::SIZE> patches;
|
||||
patch.subdivide(patches,N); // FIXME: only have to generate one of the patches
|
||||
|
||||
/* parametrization for quads */
|
||||
if (N == 4)
|
||||
eval_general_quad(patch,patches,uv,depth);
|
||||
|
||||
/* parametrization for arbitrary polygons */
|
||||
else
|
||||
{
|
||||
const unsigned l = (unsigned) floor(0.5f*uv.x); const float u = 2.0f*frac(0.5f*uv.x)-0.5f;
|
||||
const unsigned h = (unsigned) floor(0.5f*uv.y); const float v = 2.0f*frac(0.5f*uv.y)-0.5f;
|
||||
const unsigned i = 4*h+l; assert(i<N);
|
||||
if (i >= N) return;
|
||||
|
||||
#if PATCH_USE_GREGORY == 2
|
||||
BezierCurve borders[2]; patch.getLimitBorder(borders,i);
|
||||
BezierCurve border0l,border0r; borders[0].subdivide(border0l,border0r);
|
||||
BezierCurve border2l,border2r; borders[1].subdivide(border2l,border2r);
|
||||
eval(patches[i],Vec2f(u,v),1.0f,depth+1, &border0l, nullptr, nullptr, &border2r);
|
||||
#else
|
||||
eval(patches[i],Vec2f(u,v),1.0f,depth+1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
Vertex* const P;
|
||||
Vertex* const dPdu;
|
||||
Vertex* const dPdv;
|
||||
Vertex* const ddPdudu;
|
||||
Vertex* const ddPdvdv;
|
||||
Vertex* const ddPdudv;
|
||||
};
|
||||
}
|
||||
}
|
||||
359
Framework/external/embree/kernels/subdiv/feature_adaptive_eval_grid.h
vendored
Normal file
359
Framework/external/embree/kernels/subdiv/feature_adaptive_eval_grid.h
vendored
Normal file
|
|
@ -0,0 +1,359 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "patch.h"
|
||||
#include "catmullclark_patch.h"
|
||||
#include "bspline_patch.h"
|
||||
#include "gregory_patch.h"
|
||||
#include "tessellation.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
struct FeatureAdaptiveEvalGrid
|
||||
{
|
||||
typedef CatmullClark1Ring3fa CatmullClarkRing;
|
||||
typedef CatmullClarkPatch3fa CatmullClarkPatch;
|
||||
typedef BilinearPatch3fa BilinearPatch;
|
||||
typedef BSplinePatch3fa BSplinePatch;
|
||||
typedef BezierPatch3fa BezierPatch;
|
||||
typedef GregoryPatch3fa GregoryPatch;
|
||||
|
||||
private:
|
||||
const unsigned x0,x1;
|
||||
const unsigned y0,y1;
|
||||
const unsigned swidth,sheight;
|
||||
const float rcp_swidth, rcp_sheight;
|
||||
float* const Px;
|
||||
float* const Py;
|
||||
float* const Pz;
|
||||
float* const U;
|
||||
float* const V;
|
||||
float* const Nx;
|
||||
float* const Ny;
|
||||
float* const Nz;
|
||||
const unsigned dwidth;
|
||||
//const unsigned dheight;
|
||||
unsigned count;
|
||||
|
||||
|
||||
public:
|
||||
FeatureAdaptiveEvalGrid (const GeneralCatmullClarkPatch3fa& patch, unsigned subPatch,
|
||||
const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight,
|
||||
float* Px, float* Py, float* Pz, float* U, float* V,
|
||||
float* Nx, float* Ny, float* Nz,
|
||||
const unsigned dwidth, const unsigned dheight)
|
||||
: x0(x0), x1(x1), y0(y0), y1(y1), swidth(swidth), sheight(sheight), rcp_swidth(1.0f/(swidth-1.0f)), rcp_sheight(1.0f/(sheight-1.0f)),
|
||||
Px(Px), Py(Py), Pz(Pz), U(U), V(V), Nx(Nx), Ny(Ny), Nz(Nz), dwidth(dwidth), /*dheight(dheight),*/ count(0)
|
||||
{
|
||||
assert(swidth < (2<<20) && sheight < (2<<20));
|
||||
const BBox2f srange(Vec2f(0.0f,0.0f),Vec2f(float(swidth-1),float(sheight-1)));
|
||||
const BBox2f erange(Vec2f((float)x0,(float)y0),Vec2f((float)x1,(float)y1));
|
||||
|
||||
/* convert into standard quad patch if possible */
|
||||
if (likely(patch.isQuadPatch()))
|
||||
{
|
||||
CatmullClarkPatch3fa qpatch; patch.init(qpatch);
|
||||
eval(qpatch, srange, erange, 0);
|
||||
assert(count == (x1-x0+1)*(y1-y0+1));
|
||||
return;
|
||||
}
|
||||
|
||||
/* subdivide patch */
|
||||
unsigned N;
|
||||
array_t<CatmullClarkPatch3fa,GeneralCatmullClarkPatch3fa::SIZE> patches;
|
||||
patch.subdivide(patches,N);
|
||||
|
||||
if (N == 4)
|
||||
{
|
||||
const Vec2f c = srange.center();
|
||||
const BBox2f srange0(srange.lower,c);
|
||||
const BBox2f srange1(Vec2f(c.x,srange.lower.y),Vec2f(srange.upper.x,c.y));
|
||||
const BBox2f srange2(c,srange.upper);
|
||||
const BBox2f srange3(Vec2f(srange.lower.x,c.y),Vec2f(c.x,srange.upper.y));
|
||||
|
||||
#if PATCH_USE_GREGORY == 2
|
||||
BezierCurve3fa borders[GeneralCatmullClarkPatch3fa::SIZE]; patch.getLimitBorder(borders);
|
||||
BezierCurve3fa border0l,border0r; borders[0].subdivide(border0l,border0r);
|
||||
BezierCurve3fa border1l,border1r; borders[1].subdivide(border1l,border1r);
|
||||
BezierCurve3fa border2l,border2r; borders[2].subdivide(border2l,border2r);
|
||||
BezierCurve3fa border3l,border3r; borders[3].subdivide(border3l,border3r);
|
||||
GeneralCatmullClarkPatch3fa::fix_quad_ring_order(patches);
|
||||
eval(patches[0],srange0,intersect(srange0,erange),1,&border0l,nullptr,nullptr,&border3r);
|
||||
eval(patches[1],srange1,intersect(srange1,erange),1,&border0r,&border1l,nullptr,nullptr);
|
||||
eval(patches[2],srange2,intersect(srange2,erange),1,nullptr,&border1r,&border2l,nullptr);
|
||||
eval(patches[3],srange3,intersect(srange3,erange),1,nullptr,nullptr,&border2r,&border3l);
|
||||
#else
|
||||
GeneralCatmullClarkPatch3fa::fix_quad_ring_order(patches);
|
||||
eval(patches[0],srange0,intersect(srange0,erange),1);
|
||||
eval(patches[1],srange1,intersect(srange1,erange),1);
|
||||
eval(patches[2],srange2,intersect(srange2,erange),1);
|
||||
eval(patches[3],srange3,intersect(srange3,erange),1);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(subPatch < N);
|
||||
|
||||
#if PATCH_USE_GREGORY == 2
|
||||
BezierCurve3fa borders[2]; patch.getLimitBorder(borders,subPatch);
|
||||
BezierCurve3fa border0l,border0r; borders[0].subdivide(border0l,border0r);
|
||||
BezierCurve3fa border2l,border2r; borders[1].subdivide(border2l,border2r);
|
||||
eval(patches[subPatch], srange, erange, 1, &border0l, nullptr, nullptr, &border2r);
|
||||
#else
|
||||
eval(patches[subPatch], srange, erange, 1);
|
||||
#endif
|
||||
|
||||
}
|
||||
assert(count == (x1-x0+1)*(y1-y0+1));
|
||||
}
|
||||
|
||||
FeatureAdaptiveEvalGrid (const CatmullClarkPatch3fa& patch,
|
||||
const BBox2f& srange, const BBox2f& erange, const unsigned depth,
|
||||
const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight,
|
||||
float* Px, float* Py, float* Pz, float* U, float* V,
|
||||
float* Nx, float* Ny, float* Nz,
|
||||
const unsigned dwidth, const unsigned dheight)
|
||||
: x0(x0), x1(x1), y0(y0), y1(y1), swidth(swidth), sheight(sheight), rcp_swidth(1.0f/(swidth-1.0f)), rcp_sheight(1.0f/(sheight-1.0f)),
|
||||
Px(Px), Py(Py), Pz(Pz), U(U), V(V), Nx(Nx), Ny(Ny), Nz(Nz), dwidth(dwidth), /*dheight(dheight),*/ count(0)
|
||||
{
|
||||
eval(patch,srange,erange,depth);
|
||||
}
|
||||
|
||||
template<typename Patch>
|
||||
void evalLocalGrid(const Patch& patch, const BBox2f& srange, const int lx0, const int lx1, const int ly0, const int ly1)
|
||||
{
|
||||
const float scale_x = rcp(srange.upper.x-srange.lower.x);
|
||||
const float scale_y = rcp(srange.upper.y-srange.lower.y);
|
||||
count += (lx1-lx0)*(ly1-ly0);
|
||||
|
||||
#if 0
|
||||
for (unsigned iy=ly0; iy<ly1; iy++) {
|
||||
for (unsigned ix=lx0; ix<lx1; ix++) {
|
||||
const float lu = select(ix == swidth -1, float(1.0f), (float(ix)-srange.lower.x)*scale_x);
|
||||
const float lv = select(iy == sheight-1, float(1.0f), (float(iy)-srange.lower.y)*scale_y);
|
||||
const Vec3fa p = patch.eval(lu,lv);
|
||||
const float u = float(ix)*rcp_swidth;
|
||||
const float v = float(iy)*rcp_sheight;
|
||||
const int ofs = (iy-y0)*dwidth+(ix-x0);
|
||||
Px[ofs] = p.x;
|
||||
Py[ofs] = p.y;
|
||||
Pz[ofs] = p.z;
|
||||
U[ofs] = u;
|
||||
V[ofs] = v;
|
||||
}
|
||||
}
|
||||
#else
|
||||
foreach2(lx0,lx1,ly0,ly1,[&](const vboolx& valid, const vintx& ix, const vintx& iy) {
|
||||
const vfloatx lu = select(ix == swidth -1, vfloatx(1.0f), (vfloatx(ix)-srange.lower.x)*scale_x);
|
||||
const vfloatx lv = select(iy == sheight-1, vfloatx(1.0f), (vfloatx(iy)-srange.lower.y)*scale_y);
|
||||
const Vec3vfx p = patch.eval(lu,lv);
|
||||
Vec3vfx n = zero;
|
||||
if (unlikely(Nx != nullptr)) n = normalize_safe(patch.normal(lu,lv));
|
||||
const vfloatx u = vfloatx(ix)*rcp_swidth;
|
||||
const vfloatx v = vfloatx(iy)*rcp_sheight;
|
||||
const vintx ofs = (iy-y0)*dwidth+(ix-x0);
|
||||
if (likely(all(valid)) && all(iy==iy[0])) {
|
||||
const unsigned ofs2 = ofs[0];
|
||||
vfloatx::storeu(Px+ofs2,p.x);
|
||||
vfloatx::storeu(Py+ofs2,p.y);
|
||||
vfloatx::storeu(Pz+ofs2,p.z);
|
||||
vfloatx::storeu(U+ofs2,u);
|
||||
vfloatx::storeu(V+ofs2,v);
|
||||
if (unlikely(Nx != nullptr)) {
|
||||
vfloatx::storeu(Nx+ofs2,n.x);
|
||||
vfloatx::storeu(Ny+ofs2,n.y);
|
||||
vfloatx::storeu(Nz+ofs2,n.z);
|
||||
}
|
||||
} else {
|
||||
foreach_unique_index(valid,iy,[&](const vboolx& valid, const int iy0, const int j) {
|
||||
const unsigned ofs2 = ofs[j]-j;
|
||||
vfloatx::storeu(valid,Px+ofs2,p.x);
|
||||
vfloatx::storeu(valid,Py+ofs2,p.y);
|
||||
vfloatx::storeu(valid,Pz+ofs2,p.z);
|
||||
vfloatx::storeu(valid,U+ofs2,u);
|
||||
vfloatx::storeu(valid,V+ofs2,v);
|
||||
if (unlikely(Nx != nullptr)) {
|
||||
vfloatx::storeu(valid,Nx+ofs2,n.x);
|
||||
vfloatx::storeu(valid,Ny+ofs2,n.y);
|
||||
vfloatx::storeu(valid,Nz+ofs2,n.z);
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline bool final(const CatmullClarkPatch3fa& patch, const CatmullClarkRing::Type type, unsigned depth)
|
||||
{
|
||||
const unsigned max_eval_depth = (type & CatmullClarkRing::TYPE_CREASES) ? PATCH_MAX_EVAL_DEPTH_CREASE : PATCH_MAX_EVAL_DEPTH_IRREGULAR;
|
||||
//#if PATCH_MIN_RESOLUTION
|
||||
// return patch.isFinalResolution(PATCH_MIN_RESOLUTION) || depth>=max_eval_depth;
|
||||
//#else
|
||||
return depth>=max_eval_depth;
|
||||
//#endif
|
||||
}
|
||||
|
||||
void eval(const CatmullClarkPatch3fa& patch, const BBox2f& srange, const BBox2f& erange, const unsigned depth,
|
||||
const BezierCurve3fa* border0 = nullptr, const BezierCurve3fa* border1 = nullptr, const BezierCurve3fa* border2 = nullptr, const BezierCurve3fa* border3 = nullptr)
|
||||
{
|
||||
if (erange.empty())
|
||||
return;
|
||||
|
||||
int lx0 = (int) ceilf(erange.lower.x);
|
||||
int lx1 = (int) ceilf(erange.upper.x) + (erange.upper.x == x1 && (srange.lower.x < erange.upper.x || erange.upper.x == 0));
|
||||
int ly0 = (int) ceilf(erange.lower.y);
|
||||
int ly1 = (int) ceilf(erange.upper.y) + (erange.upper.y == y1 && (srange.lower.y < erange.upper.y || erange.upper.y == 0));
|
||||
if (lx0 >= lx1 || ly0 >= ly1) return;
|
||||
|
||||
CatmullClarkPatch::Type ty = patch.type();
|
||||
|
||||
if (unlikely(final(patch,ty,depth)))
|
||||
{
|
||||
if (ty & CatmullClarkRing::TYPE_REGULAR) {
|
||||
RegularPatch rpatch(patch,border0,border1,border2,border3);
|
||||
evalLocalGrid(rpatch,srange,lx0,lx1,ly0,ly1);
|
||||
return;
|
||||
} else {
|
||||
IrregularFillPatch ipatch(patch,border0,border1,border2,border3);
|
||||
evalLocalGrid(ipatch,srange,lx0,lx1,ly0,ly1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
else if (ty & CatmullClarkRing::TYPE_REGULAR_CREASES) {
|
||||
assert(depth > 0);
|
||||
RegularPatch rpatch(patch,border0,border1,border2,border3);
|
||||
evalLocalGrid(rpatch,srange,lx0,lx1,ly0,ly1);
|
||||
return;
|
||||
}
|
||||
#if PATCH_USE_GREGORY == 2
|
||||
else if (ty & CatmullClarkRing::TYPE_GREGORY_CREASES) {
|
||||
assert(depth > 0);
|
||||
GregoryPatch gpatch(patch,border0,border1,border2,border3);
|
||||
evalLocalGrid(gpatch,srange,lx0,lx1,ly0,ly1);
|
||||
}
|
||||
#endif
|
||||
else
|
||||
{
|
||||
array_t<CatmullClarkPatch3fa,4> patches;
|
||||
patch.subdivide(patches);
|
||||
|
||||
const Vec2f c = srange.center();
|
||||
const BBox2f srange0(srange.lower,c);
|
||||
const BBox2f srange1(Vec2f(c.x,srange.lower.y),Vec2f(srange.upper.x,c.y));
|
||||
const BBox2f srange2(c,srange.upper);
|
||||
const BBox2f srange3(Vec2f(srange.lower.x,c.y),Vec2f(c.x,srange.upper.y));
|
||||
|
||||
eval(patches[0],srange0,intersect(srange0,erange),depth+1);
|
||||
eval(patches[1],srange1,intersect(srange1,erange),depth+1);
|
||||
eval(patches[2],srange2,intersect(srange2,erange),depth+1);
|
||||
eval(patches[3],srange3,intersect(srange3,erange),depth+1);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Eval, typename Patch>
|
||||
bool stitch_col(const Patch& patch, int subPatch,
|
||||
const bool right, const unsigned y0, const unsigned y1, const int fine_y, const int coarse_y,
|
||||
float* Px, float* Py, float* Pz, float* U, float* V, float* Nx, float* Ny, float* Nz, const unsigned dx0, const unsigned dwidth, const unsigned dheight)
|
||||
{
|
||||
assert(coarse_y <= fine_y);
|
||||
if (likely(fine_y == coarse_y))
|
||||
return false;
|
||||
|
||||
const unsigned y0s = stitch(y0,fine_y,coarse_y);
|
||||
const unsigned y1s = stitch(y1,fine_y,coarse_y);
|
||||
const unsigned M = y1s-y0s+1 + VSIZEX;
|
||||
|
||||
dynamic_large_stack_array(float,px,M,64*sizeof(float));
|
||||
dynamic_large_stack_array(float,py,M,64*sizeof(float));
|
||||
dynamic_large_stack_array(float,pz,M,64*sizeof(float));
|
||||
dynamic_large_stack_array(float,u,M,64*sizeof(float));
|
||||
dynamic_large_stack_array(float,v,M,64*sizeof(float));
|
||||
dynamic_large_stack_array(float,nx,M,64*sizeof(float));
|
||||
dynamic_large_stack_array(float,ny,M,64*sizeof(float));
|
||||
dynamic_large_stack_array(float,nz,M,64*sizeof(float));
|
||||
const bool has_Nxyz = Nx; assert(!Nx || (Ny && Nz));
|
||||
Eval(patch,subPatch, right,right, y0s,y1s, 2,coarse_y+1, px,py,pz,u,v,
|
||||
has_Nxyz ? (float*)nx : nullptr,has_Nxyz ? (float*)ny : nullptr ,has_Nxyz ? (float*)nz : nullptr, 1,4097);
|
||||
|
||||
for (unsigned y=y0; y<=y1; y++)
|
||||
{
|
||||
const unsigned ys = stitch(y,fine_y,coarse_y)-y0s;
|
||||
Px[(y-y0)*dwidth+dx0] = px[ys];
|
||||
Py[(y-y0)*dwidth+dx0] = py[ys];
|
||||
Pz[(y-y0)*dwidth+dx0] = pz[ys];
|
||||
U [(y-y0)*dwidth+dx0] = u[ys];
|
||||
V [(y-y0)*dwidth+dx0] = v[ys];
|
||||
if (unlikely(has_Nxyz)) {
|
||||
Nx[(y-y0)*dwidth+dx0] = nx[ys];
|
||||
Ny[(y-y0)*dwidth+dx0] = ny[ys];
|
||||
Nz[(y-y0)*dwidth+dx0] = nz[ys];
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename Eval, typename Patch>
|
||||
bool stitch_row(const Patch& patch, int subPatch,
|
||||
const bool bottom, const unsigned x0, const unsigned x1, const int fine_x, const int coarse_x,
|
||||
float* Px, float* Py, float* Pz, float* U, float* V, float* Nx, float* Ny, float* Nz, const unsigned dy0, const unsigned dwidth, const unsigned dheight)
|
||||
{
|
||||
assert(coarse_x <= fine_x);
|
||||
if (likely(fine_x == coarse_x))
|
||||
return false;
|
||||
|
||||
const unsigned x0s = stitch(x0,fine_x,coarse_x);
|
||||
const unsigned x1s = stitch(x1,fine_x,coarse_x);
|
||||
const unsigned M = x1s-x0s+1 + VSIZEX;
|
||||
|
||||
dynamic_large_stack_array(float,px,M,32*sizeof(float));
|
||||
dynamic_large_stack_array(float,py,M,32*sizeof(float));
|
||||
dynamic_large_stack_array(float,pz,M,32*sizeof(float));
|
||||
dynamic_large_stack_array(float,u,M,32*sizeof(float));
|
||||
dynamic_large_stack_array(float,v,M,32*sizeof(float));
|
||||
dynamic_large_stack_array(float,nx,M,32*sizeof(float));
|
||||
dynamic_large_stack_array(float,ny,M,32*sizeof(float));
|
||||
dynamic_large_stack_array(float,nz,M,32*sizeof(float));
|
||||
const bool has_Nxyz = Nx; assert(!Nx || (Ny && Nz));
|
||||
Eval(patch,subPatch, x0s,x1s, bottom,bottom, coarse_x+1,2, px,py,pz,u,v,
|
||||
has_Nxyz ? (float*)nx :nullptr, has_Nxyz ? (float*)ny : nullptr , has_Nxyz ? (float*)nz : nullptr, 4097,1);
|
||||
|
||||
for (unsigned x=x0; x<=x1; x++)
|
||||
{
|
||||
const unsigned xs = stitch(x,fine_x,coarse_x)-x0s;
|
||||
Px[dy0*dwidth+x-x0] = px[xs];
|
||||
Py[dy0*dwidth+x-x0] = py[xs];
|
||||
Pz[dy0*dwidth+x-x0] = pz[xs];
|
||||
U [dy0*dwidth+x-x0] = u[xs];
|
||||
V [dy0*dwidth+x-x0] = v[xs];
|
||||
if (unlikely(has_Nxyz)) {
|
||||
Nx[dy0*dwidth+x-x0] = nx[xs];
|
||||
Ny[dy0*dwidth+x-x0] = ny[xs];
|
||||
Nz[dy0*dwidth+x-x0] = nz[xs];
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename Eval, typename Patch>
|
||||
void feature_adaptive_eval_grid (const Patch& patch, unsigned subPatch, const float levels[4],
|
||||
const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight,
|
||||
float* Px, float* Py, float* Pz, float* U, float* V, float* Nx, float* Ny, float* Nz, const unsigned dwidth, const unsigned dheight)
|
||||
{
|
||||
bool sl = false, sr = false, st = false, sb = false;
|
||||
if (levels) {
|
||||
sl = x0 == 0 && stitch_col<Eval,Patch>(patch,subPatch,0,y0,y1,sheight-1,int(levels[3]), Px,Py,Pz,U,V,Nx,Ny,Nz, 0 ,dwidth,dheight);
|
||||
sr = x1 == swidth-1 && stitch_col<Eval,Patch>(patch,subPatch,1,y0,y1,sheight-1,int(levels[1]), Px,Py,Pz,U,V,Nx,Ny,Nz, x1-x0,dwidth,dheight);
|
||||
st = y0 == 0 && stitch_row<Eval,Patch>(patch,subPatch,0,x0,x1,swidth-1,int(levels[0]), Px,Py,Pz,U,V,Nx,Ny,Nz, 0 ,dwidth,dheight);
|
||||
sb = y1 == sheight-1 && stitch_row<Eval,Patch>(patch,subPatch,1,x0,x1,swidth-1,int(levels[2]), Px,Py,Pz,U,V,Nx,Ny,Nz, y1-y0,dwidth,dheight);
|
||||
}
|
||||
const unsigned ofs = st*dwidth+sl;
|
||||
Eval(patch,subPatch,x0+sl,x1-sr,y0+st,y1-sb, swidth,sheight, Px+ofs,Py+ofs,Pz+ofs,U+ofs,V+ofs,Nx?Nx+ofs:nullptr,Ny?Ny+ofs:nullptr,Nz?Nz+ofs:nullptr, dwidth,dheight);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
186
Framework/external/embree/kernels/subdiv/feature_adaptive_eval_simd.h
vendored
Normal file
186
Framework/external/embree/kernels/subdiv/feature_adaptive_eval_simd.h
vendored
Normal file
|
|
@ -0,0 +1,186 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "patch.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<typename vbool, typename vint, typename vfloat, typename Vertex, typename Vertex_t = Vertex>
|
||||
struct FeatureAdaptiveEvalSimd
|
||||
{
|
||||
public:
|
||||
|
||||
typedef PatchT<Vertex,Vertex_t> Patch;
|
||||
typedef typename Patch::Ref Ref;
|
||||
typedef GeneralCatmullClarkPatchT<Vertex,Vertex_t> GeneralCatmullClarkPatch;
|
||||
typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClarkRing;
|
||||
typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch;
|
||||
typedef BSplinePatchT<Vertex,Vertex_t> BSplinePatch;
|
||||
typedef BezierPatchT<Vertex,Vertex_t> BezierPatch;
|
||||
typedef GregoryPatchT<Vertex,Vertex_t> GregoryPatch;
|
||||
typedef BilinearPatchT<Vertex,Vertex_t> BilinearPatch;
|
||||
typedef BezierCurveT<Vertex> BezierCurve;
|
||||
|
||||
FeatureAdaptiveEvalSimd (const HalfEdge* edge, const char* vertices, size_t stride, const vbool& valid, const vfloat& u, const vfloat& v,
|
||||
float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv, const size_t dstride, const size_t N)
|
||||
: P(P), dPdu(dPdu), dPdv(dPdv), ddPdudu(ddPdudu), ddPdvdv(ddPdvdv), ddPdudv(ddPdudv), dstride(dstride), N(N)
|
||||
{
|
||||
switch (edge->patch_type) {
|
||||
case HalfEdge::BILINEAR_PATCH: BilinearPatch(edge,vertices,stride).eval(valid,u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,1.0f,dstride,N); break;
|
||||
case HalfEdge::REGULAR_QUAD_PATCH: RegularPatchT(edge,vertices,stride).eval(valid,u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,1.0f,dstride,N); break;
|
||||
#if PATCH_USE_GREGORY == 2
|
||||
case HalfEdge::IRREGULAR_QUAD_PATCH: GregoryPatchT<Vertex,Vertex_t>(edge,vertices,stride).eval(valid,u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,1.0f,dstride,N); break;
|
||||
#endif
|
||||
default: {
|
||||
GeneralCatmullClarkPatch patch(edge,vertices,stride);
|
||||
eval_direct(valid,patch,Vec2<vfloat>(u,v),0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FeatureAdaptiveEvalSimd (const CatmullClarkPatch& patch, const vbool& valid, const vfloat& u, const vfloat& v, float dscale, size_t depth,
|
||||
float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv, const size_t dstride, const size_t N)
|
||||
: P(P), dPdu(dPdu), dPdv(dPdv), ddPdudu(ddPdudu), ddPdvdv(ddPdvdv), ddPdudv(ddPdudv), dstride(dstride), N(N)
|
||||
{
|
||||
eval_direct(valid,patch,Vec2<vfloat>(u,v),dscale,depth);
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
__forceinline void eval_quad_direct(const vbool& valid, array_t<CatmullClarkPatch,N>& patches, const Vec2<vfloat>& uv, float dscale, size_t depth)
|
||||
{
|
||||
const vfloat u = uv.x, v = uv.y;
|
||||
const vbool u0_mask = u < 0.5f, u1_mask = u >= 0.5f;
|
||||
const vbool v0_mask = v < 0.5f, v1_mask = v >= 0.5f;
|
||||
const vbool u0v0_mask = valid & u0_mask & v0_mask;
|
||||
const vbool u0v1_mask = valid & u0_mask & v1_mask;
|
||||
const vbool u1v0_mask = valid & u1_mask & v0_mask;
|
||||
const vbool u1v1_mask = valid & u1_mask & v1_mask;
|
||||
if (any(u0v0_mask)) eval_direct(u0v0_mask,patches[0],Vec2<vfloat>(2.0f*u,2.0f*v),2.0f*dscale,depth+1);
|
||||
if (any(u1v0_mask)) eval_direct(u1v0_mask,patches[1],Vec2<vfloat>(2.0f*u-1.0f,2.0f*v),2.0f*dscale,depth+1);
|
||||
if (any(u1v1_mask)) eval_direct(u1v1_mask,patches[2],Vec2<vfloat>(2.0f*u-1.0f,2.0f*v-1.0f),2.0f*dscale,depth+1);
|
||||
if (any(u0v1_mask)) eval_direct(u0v1_mask,patches[3],Vec2<vfloat>(2.0f*u,2.0f*v-1.0f),2.0f*dscale,depth+1);
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
__forceinline void eval_general_quad_direct(const vbool& valid, const GeneralCatmullClarkPatch& patch, array_t<CatmullClarkPatch,N>& patches, const Vec2<vfloat>& uv, float dscale, size_t depth)
|
||||
{
|
||||
#if PATCH_USE_GREGORY == 2
|
||||
BezierCurve borders[GeneralCatmullClarkPatch::SIZE]; patch.getLimitBorder(borders);
|
||||
BezierCurve border0l,border0r; borders[0].subdivide(border0l,border0r);
|
||||
BezierCurve border1l,border1r; borders[1].subdivide(border1l,border1r);
|
||||
BezierCurve border2l,border2r; borders[2].subdivide(border2l,border2r);
|
||||
BezierCurve border3l,border3r; borders[3].subdivide(border3l,border3r);
|
||||
#endif
|
||||
GeneralCatmullClarkPatch::fix_quad_ring_order(patches);
|
||||
const vfloat u = uv.x, v = uv.y;
|
||||
const vbool u0_mask = u < 0.5f, u1_mask = u >= 0.5f;
|
||||
const vbool v0_mask = v < 0.5f, v1_mask = v >= 0.5f;
|
||||
const vbool u0v0_mask = valid & u0_mask & v0_mask;
|
||||
const vbool u0v1_mask = valid & u0_mask & v1_mask;
|
||||
const vbool u1v0_mask = valid & u1_mask & v0_mask;
|
||||
const vbool u1v1_mask = valid & u1_mask & v1_mask;
|
||||
#if PATCH_USE_GREGORY == 2
|
||||
if (any(u0v0_mask)) eval_direct(u0v0_mask,patches[0],Vec2<vfloat>(2.0f*u,2.0f*v),2.0f*dscale,depth+1,&border0l,nullptr,nullptr,&border3r);
|
||||
if (any(u1v0_mask)) eval_direct(u1v0_mask,patches[1],Vec2<vfloat>(2.0f*u-1.0f,2.0f*v),2.0f*dscale,depth+1,&border0r,&border1l,nullptr,nullptr);
|
||||
if (any(u1v1_mask)) eval_direct(u1v1_mask,patches[2],Vec2<vfloat>(2.0f*u-1.0f,2.0f*v-1.0f),2.0f*dscale,depth+1,nullptr,&border1r,&border2l,nullptr);
|
||||
if (any(u0v1_mask)) eval_direct(u0v1_mask,patches[3],Vec2<vfloat>(2.0f*u,2.0f*v-1.0f),2.0f*dscale,depth+1,nullptr,nullptr,&border2r,&border3l);
|
||||
#else
|
||||
if (any(u0v0_mask)) eval_direct(u0v0_mask,patches[0],Vec2<vfloat>(2.0f*u,2.0f*v),2.0f*dscale,depth+1);
|
||||
if (any(u1v0_mask)) eval_direct(u1v0_mask,patches[1],Vec2<vfloat>(2.0f*u-1.0f,2.0f*v),2.0f*dscale,depth+1);
|
||||
if (any(u1v1_mask)) eval_direct(u1v1_mask,patches[2],Vec2<vfloat>(2.0f*u-1.0f,2.0f*v-1.0f),2.0f*dscale,depth+1);
|
||||
if (any(u0v1_mask)) eval_direct(u0v1_mask,patches[3],Vec2<vfloat>(2.0f*u,2.0f*v-1.0f),2.0f*dscale,depth+1);
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline bool final(const CatmullClarkPatch& patch, const typename CatmullClarkRing::Type type, size_t depth)
|
||||
{
|
||||
const size_t max_eval_depth = (type & CatmullClarkRing::TYPE_CREASES) ? PATCH_MAX_EVAL_DEPTH_CREASE : PATCH_MAX_EVAL_DEPTH_IRREGULAR;
|
||||
//#if PATCH_MIN_RESOLUTION
|
||||
// return patch.isFinalResolution(PATCH_MIN_RESOLUTION) || depth>=max_eval_depth;
|
||||
//#else
|
||||
return depth>=max_eval_depth;
|
||||
//#endif
|
||||
}
|
||||
|
||||
void eval_direct(const vbool& valid, const CatmullClarkPatch& patch, const Vec2<vfloat>& uv, float dscale, size_t depth,
|
||||
BezierCurve* border0 = nullptr, BezierCurve* border1 = nullptr, BezierCurve* border2 = nullptr, BezierCurve* border3 = nullptr)
|
||||
{
|
||||
typename CatmullClarkPatch::Type ty = patch.type();
|
||||
|
||||
if (unlikely(final(patch,ty,depth)))
|
||||
{
|
||||
if (ty & CatmullClarkRing::TYPE_REGULAR) {
|
||||
RegularPatch(patch,border0,border1,border2,border3).eval(valid,uv.x,uv.y,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale,dstride,N);
|
||||
} else {
|
||||
IrregularFillPatch(patch,border0,border1,border2,border3).eval(valid,uv.x,uv.y,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale,dstride,N);
|
||||
}
|
||||
}
|
||||
else if (ty & CatmullClarkRing::TYPE_REGULAR_CREASES) {
|
||||
assert(depth > 0); RegularPatch(patch,border0,border1,border2,border3).eval(valid,uv.x,uv.y,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale,dstride,N);
|
||||
}
|
||||
#if PATCH_USE_GREGORY == 2
|
||||
else if (ty & CatmullClarkRing::TYPE_GREGORY_CREASES) {
|
||||
assert(depth > 0); GregoryPatch(patch,border0,border1,border2,border3).eval(valid,uv.x,uv.y,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale,dstride,N);
|
||||
}
|
||||
#endif
|
||||
else
|
||||
{
|
||||
array_t<CatmullClarkPatch,4> patches;
|
||||
patch.subdivide(patches); // FIXME: only have to generate one of the patches
|
||||
eval_quad_direct(valid,patches,uv,dscale,depth);
|
||||
}
|
||||
}
|
||||
|
||||
void eval_direct(const vbool& valid, const GeneralCatmullClarkPatch& patch, const Vec2<vfloat>& uv, const size_t depth)
|
||||
{
|
||||
/* convert into standard quad patch if possible */
|
||||
if (likely(patch.isQuadPatch())) {
|
||||
CatmullClarkPatch qpatch; patch.init(qpatch);
|
||||
return eval_direct(valid,qpatch,uv,1.0f,depth);
|
||||
}
|
||||
|
||||
/* subdivide patch */
|
||||
unsigned Nc;
|
||||
array_t<CatmullClarkPatch,GeneralCatmullClarkPatch::SIZE> patches;
|
||||
patch.subdivide(patches,Nc); // FIXME: only have to generate one of the patches
|
||||
|
||||
/* parametrization for quads */
|
||||
if (Nc == 4)
|
||||
eval_general_quad_direct(valid,patch,patches,uv,1.0f,depth);
|
||||
|
||||
/* parametrization for arbitrary polygons */
|
||||
else
|
||||
{
|
||||
const vint l = (vint)floor(0.5f*uv.x); const vfloat u = 2.0f*frac(0.5f*uv.x)-0.5f;
|
||||
const vint h = (vint)floor(0.5f*uv.y); const vfloat v = 2.0f*frac(0.5f*uv.y)-0.5f;
|
||||
const vint i = (h<<2)+l; assert(all(valid,i<Nc));
|
||||
foreach_unique(valid,i,[&](const vbool& valid, const int i) {
|
||||
#if PATCH_USE_GREGORY == 2
|
||||
BezierCurve borders[2]; patch.getLimitBorder(borders,i);
|
||||
BezierCurve border0l,border0r; borders[0].subdivide(border0l,border0r);
|
||||
BezierCurve border2l,border2r; borders[1].subdivide(border2l,border2r);
|
||||
eval_direct(valid,patches[i],Vec2<vfloat>(u,v),1.0f,depth+1, &border0l, nullptr, nullptr, &border2r);
|
||||
#else
|
||||
eval_direct(valid,patches[i],Vec2<vfloat>(u,v),1.0f,depth+1);
|
||||
#endif
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
float* const P;
|
||||
float* const dPdu;
|
||||
float* const dPdv;
|
||||
float* const ddPdudu;
|
||||
float* const ddPdvdv;
|
||||
float* const ddPdudv;
|
||||
const size_t dstride;
|
||||
const size_t N;
|
||||
};
|
||||
}
|
||||
}
|
||||
893
Framework/external/embree/kernels/subdiv/gregory_patch.h
vendored
Normal file
893
Framework/external/embree/kernels/subdiv/gregory_patch.h
vendored
Normal file
|
|
@ -0,0 +1,893 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "catmullclark_patch.h"
|
||||
#include "bezier_patch.h"
|
||||
#include "bezier_curve.h"
|
||||
#include "catmullclark_coefficients.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<typename Vertex, typename Vertex_t = Vertex>
|
||||
class __aligned(64) GregoryPatchT
|
||||
{
|
||||
typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch;
|
||||
typedef GeneralCatmullClarkPatchT<Vertex,Vertex_t> GeneralCatmullClarkPatch;
|
||||
typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClark1Ring;
|
||||
typedef BezierCurveT<Vertex> BezierCurve;
|
||||
|
||||
public:
|
||||
Vertex v[4][4];
|
||||
Vertex f[2][2];
|
||||
|
||||
__forceinline GregoryPatchT() {}
|
||||
|
||||
__forceinline GregoryPatchT(const CatmullClarkPatch& patch) {
|
||||
init(patch);
|
||||
}
|
||||
|
||||
__forceinline GregoryPatchT(const CatmullClarkPatch& patch,
|
||||
const BezierCurve* border0, const BezierCurve* border1, const BezierCurve* border2, const BezierCurve* border3)
|
||||
{
|
||||
init_crackfix(patch,border0,border1,border2,border3);
|
||||
}
|
||||
|
||||
__forceinline GregoryPatchT (const HalfEdge* edge, const char* vertices, size_t stride) {
|
||||
init(CatmullClarkPatch(edge,vertices,stride));
|
||||
}
|
||||
|
||||
__forceinline Vertex& p0() { return v[0][0]; }
|
||||
__forceinline Vertex& p1() { return v[0][3]; }
|
||||
__forceinline Vertex& p2() { return v[3][3]; }
|
||||
__forceinline Vertex& p3() { return v[3][0]; }
|
||||
|
||||
__forceinline Vertex& e0_p() { return v[0][1]; }
|
||||
__forceinline Vertex& e0_m() { return v[1][0]; }
|
||||
__forceinline Vertex& e1_p() { return v[1][3]; }
|
||||
__forceinline Vertex& e1_m() { return v[0][2]; }
|
||||
__forceinline Vertex& e2_p() { return v[3][2]; }
|
||||
__forceinline Vertex& e2_m() { return v[2][3]; }
|
||||
__forceinline Vertex& e3_p() { return v[2][0]; }
|
||||
__forceinline Vertex& e3_m() { return v[3][1]; }
|
||||
|
||||
__forceinline Vertex& f0_p() { return v[1][1]; }
|
||||
__forceinline Vertex& f1_p() { return v[1][2]; }
|
||||
__forceinline Vertex& f2_p() { return v[2][2]; }
|
||||
__forceinline Vertex& f3_p() { return v[2][1]; }
|
||||
__forceinline Vertex& f0_m() { return f[0][0]; }
|
||||
__forceinline Vertex& f1_m() { return f[0][1]; }
|
||||
__forceinline Vertex& f2_m() { return f[1][1]; }
|
||||
__forceinline Vertex& f3_m() { return f[1][0]; }
|
||||
|
||||
__forceinline const Vertex& p0() const { return v[0][0]; }
|
||||
__forceinline const Vertex& p1() const { return v[0][3]; }
|
||||
__forceinline const Vertex& p2() const { return v[3][3]; }
|
||||
__forceinline const Vertex& p3() const { return v[3][0]; }
|
||||
|
||||
__forceinline const Vertex& e0_p() const { return v[0][1]; }
|
||||
__forceinline const Vertex& e0_m() const { return v[1][0]; }
|
||||
__forceinline const Vertex& e1_p() const { return v[1][3]; }
|
||||
__forceinline const Vertex& e1_m() const { return v[0][2]; }
|
||||
__forceinline const Vertex& e2_p() const { return v[3][2]; }
|
||||
__forceinline const Vertex& e2_m() const { return v[2][3]; }
|
||||
__forceinline const Vertex& e3_p() const { return v[2][0]; }
|
||||
__forceinline const Vertex& e3_m() const { return v[3][1]; }
|
||||
|
||||
__forceinline const Vertex& f0_p() const { return v[1][1]; }
|
||||
__forceinline const Vertex& f1_p() const { return v[1][2]; }
|
||||
__forceinline const Vertex& f2_p() const { return v[2][2]; }
|
||||
__forceinline const Vertex& f3_p() const { return v[2][1]; }
|
||||
__forceinline const Vertex& f0_m() const { return f[0][0]; }
|
||||
__forceinline const Vertex& f1_m() const { return f[0][1]; }
|
||||
__forceinline const Vertex& f2_m() const { return f[1][1]; }
|
||||
__forceinline const Vertex& f3_m() const { return f[1][0]; }
|
||||
|
||||
__forceinline Vertex initCornerVertex(const CatmullClarkPatch& irreg_patch, const size_t index) {
|
||||
return irreg_patch.ring[index].getLimitVertex();
|
||||
}
|
||||
|
||||
__forceinline Vertex initPositiveEdgeVertex(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) {
|
||||
return madd(1.0f/3.0f,irreg_patch.ring[index].getLimitTangent(),p_vtx);
|
||||
}
|
||||
|
||||
__forceinline Vertex initNegativeEdgeVertex(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) {
|
||||
return madd(1.0f/3.0f,irreg_patch.ring[index].getSecondLimitTangent(),p_vtx);
|
||||
}
|
||||
|
||||
__forceinline Vertex initPositiveEdgeVertex2(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx)
|
||||
{
|
||||
CatmullClark1Ring3fa r0,r1,r2;
|
||||
irreg_patch.ring[index].subdivide(r0);
|
||||
r0.subdivide(r1);
|
||||
r1.subdivide(r2);
|
||||
return madd(8.0f/3.0f,r2.getLimitTangent(),p_vtx);
|
||||
}
|
||||
|
||||
__forceinline Vertex initNegativeEdgeVertex2(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx)
|
||||
{
|
||||
CatmullClark1Ring3fa r0,r1,r2;
|
||||
irreg_patch.ring[index].subdivide(r0);
|
||||
r0.subdivide(r1);
|
||||
r1.subdivide(r2);
|
||||
return madd(8.0f/3.0f,r2.getSecondLimitTangent(),p_vtx);
|
||||
}
|
||||
|
||||
void initFaceVertex(const CatmullClarkPatch& irreg_patch,
|
||||
const size_t index,
|
||||
const Vertex& p_vtx,
|
||||
const Vertex& e0_p_vtx,
|
||||
const Vertex& e1_m_vtx,
|
||||
const unsigned int face_valence_p1,
|
||||
const Vertex& e0_m_vtx,
|
||||
const Vertex& e3_p_vtx,
|
||||
const unsigned int face_valence_p3,
|
||||
Vertex& f_p_vtx,
|
||||
Vertex& f_m_vtx)
|
||||
{
|
||||
const unsigned int face_valence = irreg_patch.ring[index].face_valence;
|
||||
const unsigned int edge_valence = irreg_patch.ring[index].edge_valence;
|
||||
const unsigned int border_index = irreg_patch.ring[index].border_index;
|
||||
|
||||
const Vertex& vtx = irreg_patch.ring[index].vtx;
|
||||
const Vertex e_i = irreg_patch.ring[index].getEdgeCenter(0);
|
||||
const Vertex c_i_m_1 = irreg_patch.ring[index].getQuadCenter(0);
|
||||
const Vertex e_i_m_1 = irreg_patch.ring[index].getEdgeCenter(1);
|
||||
|
||||
Vertex c_i, e_i_p_1;
|
||||
const bool hasHardEdge0 =
|
||||
std::isinf(irreg_patch.ring[index].vertex_crease_weight) &&
|
||||
std::isinf(irreg_patch.ring[index].crease_weight[0]);
|
||||
|
||||
if (unlikely((border_index == edge_valence-2) || hasHardEdge0))
|
||||
{
|
||||
/* mirror quad center and edge mid-point */
|
||||
c_i = madd(2.0f, e_i - c_i_m_1, c_i_m_1);
|
||||
e_i_p_1 = madd(2.0f, vtx - e_i_m_1, e_i_m_1);
|
||||
}
|
||||
else
|
||||
{
|
||||
c_i = irreg_patch.ring[index].getQuadCenter( face_valence-1 );
|
||||
e_i_p_1 = irreg_patch.ring[index].getEdgeCenter( face_valence-1 );
|
||||
}
|
||||
|
||||
Vertex c_i_m_2, e_i_m_2;
|
||||
const bool hasHardEdge1 =
|
||||
std::isinf(irreg_patch.ring[index].vertex_crease_weight) &&
|
||||
std::isinf(irreg_patch.ring[index].crease_weight[1]);
|
||||
|
||||
if (unlikely(border_index == 2 || hasHardEdge1))
|
||||
{
|
||||
/* mirror quad center and edge mid-point */
|
||||
c_i_m_2 = madd(2.0f, e_i_m_1 - c_i_m_1, c_i_m_1);
|
||||
e_i_m_2 = madd(2.0f, vtx - e_i, + e_i);
|
||||
}
|
||||
else
|
||||
{
|
||||
c_i_m_2 = irreg_patch.ring[index].getQuadCenter( 1 );
|
||||
e_i_m_2 = irreg_patch.ring[index].getEdgeCenter( 2 );
|
||||
}
|
||||
|
||||
const float d = 3.0f;
|
||||
//const float c = cosf(2.0f*M_PI/(float)face_valence);
|
||||
//const float c_e_p = cosf(2.0f*M_PI/(float)face_valence_p1);
|
||||
//const float c_e_m = cosf(2.0f*M_PI/(float)face_valence_p3);
|
||||
|
||||
const float c = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence);
|
||||
const float c_e_p = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p1);
|
||||
const float c_e_m = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p3);
|
||||
|
||||
const Vertex r_e_p = 1.0f/3.0f * (e_i_m_1 - e_i_p_1) + 2.0f/3.0f * (c_i_m_1 - c_i);
|
||||
const Vertex r_e_m = 1.0f/3.0f * (e_i - e_i_m_2) + 2.0f/3.0f * (c_i_m_1 - c_i_m_2);
|
||||
|
||||
f_p_vtx = 1.0f / d * (c_e_p * p_vtx + (d - 2.0f*c - c_e_p) * e0_p_vtx + 2.0f*c* e1_m_vtx + r_e_p);
|
||||
f_m_vtx = 1.0f / d * (c_e_m * p_vtx + (d - 2.0f*c - c_e_m) * e0_m_vtx + 2.0f*c* e3_p_vtx + r_e_m);
|
||||
}
|
||||
|
||||
__noinline void init(const CatmullClarkPatch& patch)
|
||||
{
|
||||
assert( patch.ring[0].hasValidPositions() );
|
||||
assert( patch.ring[1].hasValidPositions() );
|
||||
assert( patch.ring[2].hasValidPositions() );
|
||||
assert( patch.ring[3].hasValidPositions() );
|
||||
|
||||
p0() = initCornerVertex(patch,0);
|
||||
p1() = initCornerVertex(patch,1);
|
||||
p2() = initCornerVertex(patch,2);
|
||||
p3() = initCornerVertex(patch,3);
|
||||
|
||||
e0_p() = initPositiveEdgeVertex(patch,0, p0());
|
||||
e1_p() = initPositiveEdgeVertex(patch,1, p1());
|
||||
e2_p() = initPositiveEdgeVertex(patch,2, p2());
|
||||
e3_p() = initPositiveEdgeVertex(patch,3, p3());
|
||||
|
||||
e0_m() = initNegativeEdgeVertex(patch,0, p0());
|
||||
e1_m() = initNegativeEdgeVertex(patch,1, p1());
|
||||
e2_m() = initNegativeEdgeVertex(patch,2, p2());
|
||||
e3_m() = initNegativeEdgeVertex(patch,3, p3());
|
||||
|
||||
const unsigned int face_valence_p0 = patch.ring[0].face_valence;
|
||||
const unsigned int face_valence_p1 = patch.ring[1].face_valence;
|
||||
const unsigned int face_valence_p2 = patch.ring[2].face_valence;
|
||||
const unsigned int face_valence_p3 = patch.ring[3].face_valence;
|
||||
|
||||
initFaceVertex(patch,0,p0(),e0_p(),e1_m(),face_valence_p1,e0_m(),e3_p(),face_valence_p3,f0_p(),f0_m() );
|
||||
initFaceVertex(patch,1,p1(),e1_p(),e2_m(),face_valence_p2,e1_m(),e0_p(),face_valence_p0,f1_p(),f1_m() );
|
||||
initFaceVertex(patch,2,p2(),e2_p(),e3_m(),face_valence_p3,e2_m(),e1_p(),face_valence_p1,f2_p(),f2_m() );
|
||||
initFaceVertex(patch,3,p3(),e3_p(),e0_m(),face_valence_p0,e3_m(),e2_p(),face_valence_p3,f3_p(),f3_m() );
|
||||
|
||||
}
|
||||
|
||||
__noinline void init_crackfix(const CatmullClarkPatch& patch,
|
||||
const BezierCurve* border0,
|
||||
const BezierCurve* border1,
|
||||
const BezierCurve* border2,
|
||||
const BezierCurve* border3)
|
||||
{
|
||||
assert( patch.ring[0].hasValidPositions() );
|
||||
assert( patch.ring[1].hasValidPositions() );
|
||||
assert( patch.ring[2].hasValidPositions() );
|
||||
assert( patch.ring[3].hasValidPositions() );
|
||||
|
||||
p0() = initCornerVertex(patch,0);
|
||||
p1() = initCornerVertex(patch,1);
|
||||
p2() = initCornerVertex(patch,2);
|
||||
p3() = initCornerVertex(patch,3);
|
||||
|
||||
e0_p() = initPositiveEdgeVertex(patch,0, p0());
|
||||
e1_p() = initPositiveEdgeVertex(patch,1, p1());
|
||||
e2_p() = initPositiveEdgeVertex(patch,2, p2());
|
||||
e3_p() = initPositiveEdgeVertex(patch,3, p3());
|
||||
|
||||
e0_m() = initNegativeEdgeVertex(patch,0, p0());
|
||||
e1_m() = initNegativeEdgeVertex(patch,1, p1());
|
||||
e2_m() = initNegativeEdgeVertex(patch,2, p2());
|
||||
e3_m() = initNegativeEdgeVertex(patch,3, p3());
|
||||
|
||||
if (unlikely(border0 != nullptr))
|
||||
{
|
||||
p0() = border0->v0;
|
||||
e0_p() = border0->v1;
|
||||
e1_m() = border0->v2;
|
||||
p1() = border0->v3;
|
||||
}
|
||||
|
||||
if (unlikely(border1 != nullptr))
|
||||
{
|
||||
p1() = border1->v0;
|
||||
e1_p() = border1->v1;
|
||||
e2_m() = border1->v2;
|
||||
p2() = border1->v3;
|
||||
}
|
||||
|
||||
if (unlikely(border2 != nullptr))
|
||||
{
|
||||
p2() = border2->v0;
|
||||
e2_p() = border2->v1;
|
||||
e3_m() = border2->v2;
|
||||
p3() = border2->v3;
|
||||
}
|
||||
|
||||
if (unlikely(border3 != nullptr))
|
||||
{
|
||||
p3() = border3->v0;
|
||||
e3_p() = border3->v1;
|
||||
e0_m() = border3->v2;
|
||||
p0() = border3->v3;
|
||||
}
|
||||
|
||||
const unsigned int face_valence_p0 = patch.ring[0].face_valence;
|
||||
const unsigned int face_valence_p1 = patch.ring[1].face_valence;
|
||||
const unsigned int face_valence_p2 = patch.ring[2].face_valence;
|
||||
const unsigned int face_valence_p3 = patch.ring[3].face_valence;
|
||||
|
||||
initFaceVertex(patch,0,p0(),e0_p(),e1_m(),face_valence_p1,e0_m(),e3_p(),face_valence_p3,f0_p(),f0_m() );
|
||||
initFaceVertex(patch,1,p1(),e1_p(),e2_m(),face_valence_p2,e1_m(),e0_p(),face_valence_p0,f1_p(),f1_m() );
|
||||
initFaceVertex(patch,2,p2(),e2_p(),e3_m(),face_valence_p3,e2_m(),e1_p(),face_valence_p1,f2_p(),f2_m() );
|
||||
initFaceVertex(patch,3,p3(),e3_p(),e0_m(),face_valence_p0,e3_m(),e2_p(),face_valence_p3,f3_p(),f3_m() );
|
||||
}
|
||||
|
||||
|
||||
void computeGregoryPatchFacePoints(const unsigned int face_valence,
|
||||
const Vertex& r_e_p,
|
||||
const Vertex& r_e_m,
|
||||
const Vertex& p_vtx,
|
||||
const Vertex& e0_p_vtx,
|
||||
const Vertex& e1_m_vtx,
|
||||
const unsigned int face_valence_p1,
|
||||
const Vertex& e0_m_vtx,
|
||||
const Vertex& e3_p_vtx,
|
||||
const unsigned int face_valence_p3,
|
||||
Vertex& f_p_vtx,
|
||||
Vertex& f_m_vtx,
|
||||
const float d = 3.0f)
|
||||
{
|
||||
//const float c = cosf(2.0*M_PI/(float)face_valence);
|
||||
//const float c_e_p = cosf(2.0*M_PI/(float)face_valence_p1);
|
||||
//const float c_e_m = cosf(2.0*M_PI/(float)face_valence_p3);
|
||||
|
||||
const float c = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence);
|
||||
const float c_e_p = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p1);
|
||||
const float c_e_m = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p3);
|
||||
|
||||
|
||||
f_p_vtx = 1.0f / d * (c_e_p * p_vtx + (d - 2.0f*c - c_e_p) * e0_p_vtx + 2.0f*c* e1_m_vtx + r_e_p);
|
||||
f_m_vtx = 1.0f / d * (c_e_m * p_vtx + (d - 2.0f*c - c_e_m) * e0_m_vtx + 2.0f*c* e3_p_vtx + r_e_m);
|
||||
f_p_vtx = 1.0f / d * (c_e_p * p_vtx + (d - 2.0f*c - c_e_p) * e0_p_vtx + 2.0f*c* e1_m_vtx + r_e_p);
|
||||
f_m_vtx = 1.0f / d * (c_e_m * p_vtx + (d - 2.0f*c - c_e_m) * e0_m_vtx + 2.0f*c* e3_p_vtx + r_e_m);
|
||||
}
|
||||
|
||||
__noinline void init(const GeneralCatmullClarkPatch& patch)
|
||||
{
|
||||
assert(patch.size() == 4);
|
||||
#if 0
|
||||
CatmullClarkPatch qpatch; patch.init(qpatch);
|
||||
init(qpatch);
|
||||
#else
|
||||
const float face_valence_p0 = patch.ring[0].face_valence;
|
||||
const float face_valence_p1 = patch.ring[1].face_valence;
|
||||
const float face_valence_p2 = patch.ring[2].face_valence;
|
||||
const float face_valence_p3 = patch.ring[3].face_valence;
|
||||
|
||||
Vertex p0_r_p, p0_r_m;
|
||||
patch.ring[0].computeGregoryPatchEdgePoints( p0(), e0_p(), e0_m(), p0_r_p, p0_r_m );
|
||||
|
||||
Vertex p1_r_p, p1_r_m;
|
||||
patch.ring[1].computeGregoryPatchEdgePoints( p1(), e1_p(), e1_m(), p1_r_p, p1_r_m );
|
||||
|
||||
Vertex p2_r_p, p2_r_m;
|
||||
patch.ring[2].computeGregoryPatchEdgePoints( p2(), e2_p(), e2_m(), p2_r_p, p2_r_m );
|
||||
|
||||
Vertex p3_r_p, p3_r_m;
|
||||
patch.ring[3].computeGregoryPatchEdgePoints( p3(), e3_p(), e3_m(), p3_r_p, p3_r_m );
|
||||
|
||||
computeGregoryPatchFacePoints(face_valence_p0, p0_r_p, p0_r_m, p0(), e0_p(), e1_m(), face_valence_p1, e0_m(), e3_p(), face_valence_p3, f0_p(), f0_m() );
|
||||
computeGregoryPatchFacePoints(face_valence_p1, p1_r_p, p1_r_m, p1(), e1_p(), e2_m(), face_valence_p2, e1_m(), e0_p(), face_valence_p0, f1_p(), f1_m() );
|
||||
computeGregoryPatchFacePoints(face_valence_p2, p2_r_p, p2_r_m, p2(), e2_p(), e3_m(), face_valence_p3, e2_m(), e1_p(), face_valence_p1, f2_p(), f2_m() );
|
||||
computeGregoryPatchFacePoints(face_valence_p3, p3_r_p, p3_r_m, p3(), e3_p(), e0_m(), face_valence_p0, e3_m(), e2_p(), face_valence_p3, f3_p(), f3_m() );
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
__forceinline void convert_to_bezier()
|
||||
{
|
||||
f0_p() = (f0_p() + f0_m()) * 0.5f;
|
||||
f1_p() = (f1_p() + f1_m()) * 0.5f;
|
||||
f2_p() = (f2_p() + f2_m()) * 0.5f;
|
||||
f3_p() = (f3_p() + f3_m()) * 0.5f;
|
||||
f0_m() = Vertex( zero );
|
||||
f1_m() = Vertex( zero );
|
||||
f2_m() = Vertex( zero );
|
||||
f3_m() = Vertex( zero );
|
||||
}
|
||||
|
||||
static __forceinline void computeInnerVertices(const Vertex matrix[4][4], const Vertex f_m[2][2], const float uu, const float vv,
|
||||
Vertex_t& matrix_11, Vertex_t& matrix_12, Vertex_t& matrix_22, Vertex_t& matrix_21)
|
||||
{
|
||||
if (unlikely(uu == 0.0f || uu == 1.0f || vv == 0.0f || vv == 1.0f))
|
||||
{
|
||||
matrix_11 = matrix[1][1];
|
||||
matrix_12 = matrix[1][2];
|
||||
matrix_22 = matrix[2][2];
|
||||
matrix_21 = matrix[2][1];
|
||||
}
|
||||
else
|
||||
{
|
||||
const Vertex_t f0_p = matrix[1][1];
|
||||
const Vertex_t f1_p = matrix[1][2];
|
||||
const Vertex_t f2_p = matrix[2][2];
|
||||
const Vertex_t f3_p = matrix[2][1];
|
||||
|
||||
const Vertex_t f0_m = f_m[0][0];
|
||||
const Vertex_t f1_m = f_m[0][1];
|
||||
const Vertex_t f2_m = f_m[1][1];
|
||||
const Vertex_t f3_m = f_m[1][0];
|
||||
|
||||
matrix_11 = ( uu * f0_p + vv * f0_m)*rcp(uu+vv);
|
||||
matrix_12 = ((1.0f-uu) * f1_m + vv * f1_p)*rcp(1.0f-uu+vv);
|
||||
matrix_22 = ((1.0f-uu) * f2_p + (1.0f-vv) * f2_m)*rcp(2.0f-uu-vv);
|
||||
matrix_21 = ( uu * f3_m + (1.0f-vv) * f3_p)*rcp(1.0f+uu-vv);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename vfloat>
|
||||
static __forceinline void computeInnerVertices(const Vertex v[4][4], const Vertex f[2][2],
|
||||
size_t i, const vfloat& uu, const vfloat& vv, vfloat& matrix_11, vfloat& matrix_12, vfloat& matrix_22, vfloat& matrix_21)
|
||||
{
|
||||
const auto m_border = (uu == 0.0f) | (uu == 1.0f) | (vv == 0.0f) | (vv == 1.0f);
|
||||
|
||||
const vfloat f0_p = v[1][1][i];
|
||||
const vfloat f1_p = v[1][2][i];
|
||||
const vfloat f2_p = v[2][2][i];
|
||||
const vfloat f3_p = v[2][1][i];
|
||||
|
||||
const vfloat f0_m = f[0][0][i];
|
||||
const vfloat f1_m = f[0][1][i];
|
||||
const vfloat f2_m = f[1][1][i];
|
||||
const vfloat f3_m = f[1][0][i];
|
||||
|
||||
const vfloat one_minus_uu = vfloat(1.0f) - uu;
|
||||
const vfloat one_minus_vv = vfloat(1.0f) - vv;
|
||||
|
||||
const vfloat f0_i = ( uu * f0_p + vv * f0_m) * rcp(uu+vv);
|
||||
const vfloat f1_i = (one_minus_uu * f1_m + vv * f1_p) * rcp(one_minus_uu+vv);
|
||||
const vfloat f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv);
|
||||
const vfloat f3_i = ( uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv);
|
||||
|
||||
matrix_11 = select(m_border,f0_p,f0_i);
|
||||
matrix_12 = select(m_border,f1_p,f1_i);
|
||||
matrix_22 = select(m_border,f2_p,f2_i);
|
||||
matrix_21 = select(m_border,f3_p,f3_i);
|
||||
}
|
||||
|
||||
static __forceinline Vertex eval(const Vertex matrix[4][4], const Vertex f[2][2], const float& uu, const float& vv)
|
||||
{
|
||||
Vertex_t v_11, v_12, v_22, v_21;
|
||||
computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
|
||||
|
||||
const Vec4<float> Bu = BezierBasis::eval(uu);
|
||||
const Vec4<float> Bv = BezierBasis::eval(vv);
|
||||
|
||||
return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))),
|
||||
madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))),
|
||||
madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))),
|
||||
Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3]))))));
|
||||
}
|
||||
|
||||
static __forceinline Vertex eval_du(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative
|
||||
{
|
||||
Vertex_t v_11, v_12, v_22, v_21;
|
||||
computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
|
||||
|
||||
const Vec4<float> Bu = BezierBasis::derivative(uu);
|
||||
const Vec4<float> Bv = BezierBasis::eval(vv);
|
||||
|
||||
return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))),
|
||||
madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))),
|
||||
madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))),
|
||||
Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3]))))));
|
||||
}
|
||||
|
||||
static __forceinline Vertex eval_dv(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative
|
||||
{
|
||||
Vertex_t v_11, v_12, v_22, v_21;
|
||||
computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
|
||||
|
||||
const Vec4<float> Bu = BezierBasis::eval(uu);
|
||||
const Vec4<float> Bv = BezierBasis::derivative(vv);
|
||||
|
||||
return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))),
|
||||
madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))),
|
||||
madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))),
|
||||
Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3]))))));
|
||||
}
|
||||
|
||||
static __forceinline Vertex eval_dudu(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative
|
||||
{
|
||||
Vertex_t v_11, v_12, v_22, v_21;
|
||||
computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
|
||||
|
||||
const Vec4<float> Bu = BezierBasis::derivative2(uu);
|
||||
const Vec4<float> Bv = BezierBasis::eval(vv);
|
||||
|
||||
return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))),
|
||||
madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))),
|
||||
madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))),
|
||||
Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3]))))));
|
||||
}
|
||||
|
||||
static __forceinline Vertex eval_dvdv(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative
|
||||
{
|
||||
Vertex_t v_11, v_12, v_22, v_21;
|
||||
computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
|
||||
|
||||
const Vec4<float> Bu = BezierBasis::eval(uu);
|
||||
const Vec4<float> Bv = BezierBasis::derivative2(vv);
|
||||
|
||||
return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))),
|
||||
madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))),
|
||||
madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))),
|
||||
Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3]))))));
|
||||
}
|
||||
|
||||
static __forceinline Vertex eval_dudv(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative
|
||||
{
|
||||
Vertex_t v_11, v_12, v_22, v_21;
|
||||
computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
|
||||
|
||||
const Vec4<float> Bu = BezierBasis::derivative(uu);
|
||||
const Vec4<float> Bv = BezierBasis::derivative(vv);
|
||||
|
||||
return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))),
|
||||
madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))),
|
||||
madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))),
|
||||
Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3]))))));
|
||||
}
|
||||
|
||||
__forceinline Vertex eval(const float uu, const float vv) const {
|
||||
return eval(v,f,uu,vv);
|
||||
}
|
||||
|
||||
__forceinline Vertex eval_du( const float uu, const float vv) const {
|
||||
return eval_du(v,f,uu,vv);
|
||||
}
|
||||
|
||||
__forceinline Vertex eval_dv( const float uu, const float vv) const {
|
||||
return eval_dv(v,f,uu,vv);
|
||||
}
|
||||
|
||||
__forceinline Vertex eval_dudu( const float uu, const float vv) const {
|
||||
return eval_dudu(v,f,uu,vv);
|
||||
}
|
||||
|
||||
__forceinline Vertex eval_dvdv( const float uu, const float vv) const {
|
||||
return eval_dvdv(v,f,uu,vv);
|
||||
}
|
||||
|
||||
__forceinline Vertex eval_dudv( const float uu, const float vv) const {
|
||||
return eval_dudv(v,f,uu,vv);
|
||||
}
|
||||
|
||||
static __forceinline Vertex normal(const Vertex matrix[4][4], const Vertex f_m[2][2], const float uu, const float vv) // FIXME: why not using basis functions
|
||||
{
|
||||
/* interpolate inner vertices */
|
||||
Vertex_t matrix_11, matrix_12, matrix_22, matrix_21;
|
||||
computeInnerVertices(matrix,f_m,uu,vv,matrix_11, matrix_12, matrix_22, matrix_21);
|
||||
|
||||
/* tangentU */
|
||||
const Vertex_t col0 = deCasteljau(vv, (Vertex_t)matrix[0][0], (Vertex_t)matrix[1][0], (Vertex_t)matrix[2][0], (Vertex_t)matrix[3][0]);
|
||||
const Vertex_t col1 = deCasteljau(vv, (Vertex_t)matrix[0][1], (Vertex_t)matrix_11 , (Vertex_t)matrix_21 , (Vertex_t)matrix[3][1]);
|
||||
const Vertex_t col2 = deCasteljau(vv, (Vertex_t)matrix[0][2], (Vertex_t)matrix_12 , (Vertex_t)matrix_22 , (Vertex_t)matrix[3][2]);
|
||||
const Vertex_t col3 = deCasteljau(vv, (Vertex_t)matrix[0][3], (Vertex_t)matrix[1][3], (Vertex_t)matrix[2][3], (Vertex_t)matrix[3][3]);
|
||||
|
||||
const Vertex_t tangentU = deCasteljau_tangent(uu, col0, col1, col2, col3);
|
||||
|
||||
/* tangentV */
|
||||
const Vertex_t row0 = deCasteljau(uu, (Vertex_t)matrix[0][0], (Vertex_t)matrix[0][1], (Vertex_t)matrix[0][2], (Vertex_t)matrix[0][3]);
|
||||
const Vertex_t row1 = deCasteljau(uu, (Vertex_t)matrix[1][0], (Vertex_t)matrix_11 , (Vertex_t)matrix_12 , (Vertex_t)matrix[1][3]);
|
||||
const Vertex_t row2 = deCasteljau(uu, (Vertex_t)matrix[2][0], (Vertex_t)matrix_21 , (Vertex_t)matrix_22 , (Vertex_t)matrix[2][3]);
|
||||
const Vertex_t row3 = deCasteljau(uu, (Vertex_t)matrix[3][0], (Vertex_t)matrix[3][1], (Vertex_t)matrix[3][2], (Vertex_t)matrix[3][3]);
|
||||
|
||||
const Vertex_t tangentV = deCasteljau_tangent(vv, row0, row1, row2, row3);
|
||||
|
||||
/* normal = tangentU x tangentV */
|
||||
const Vertex_t n = cross(tangentU,tangentV);
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
__forceinline Vertex normal( const float uu, const float vv) const {
|
||||
return normal(v,f,uu,vv);
|
||||
}
|
||||
|
||||
__forceinline void eval(const float u, const float v,
|
||||
Vertex* P, Vertex* dPdu, Vertex* dPdv,
|
||||
Vertex* ddPdudu, Vertex* ddPdvdv, Vertex* ddPdudv,
|
||||
const float dscale = 1.0f) const
|
||||
{
|
||||
if (P) {
|
||||
*P = eval(u,v);
|
||||
}
|
||||
if (dPdu) {
|
||||
assert(dPdu); *dPdu = eval_du(u,v)*dscale;
|
||||
assert(dPdv); *dPdv = eval_dv(u,v)*dscale;
|
||||
}
|
||||
if (ddPdudu) {
|
||||
assert(ddPdudu); *ddPdudu = eval_dudu(u,v)*sqr(dscale);
|
||||
assert(ddPdvdv); *ddPdvdv = eval_dvdv(u,v)*sqr(dscale);
|
||||
assert(ddPdudv); *ddPdudv = eval_dudv(u,v)*sqr(dscale);
|
||||
}
|
||||
}
|
||||
|
||||
template<class vfloat>
|
||||
static __forceinline vfloat eval(const Vertex v[4][4], const Vertex f[2][2],
|
||||
const size_t i, const vfloat& uu, const vfloat& vv, const Vec4<vfloat>& u_n, const Vec4<vfloat>& v_n,
|
||||
vfloat& matrix_11, vfloat& matrix_12, vfloat& matrix_22, vfloat& matrix_21)
|
||||
{
|
||||
const vfloat curve0_x = madd(v_n[0],vfloat(v[0][0][i]),madd(v_n[1],vfloat(v[1][0][i]),madd(v_n[2],vfloat(v[2][0][i]),v_n[3] * vfloat(v[3][0][i]))));
|
||||
const vfloat curve1_x = madd(v_n[0],vfloat(v[0][1][i]),madd(v_n[1],vfloat(matrix_11 ),madd(v_n[2],vfloat(matrix_21 ),v_n[3] * vfloat(v[3][1][i]))));
|
||||
const vfloat curve2_x = madd(v_n[0],vfloat(v[0][2][i]),madd(v_n[1],vfloat(matrix_12 ),madd(v_n[2],vfloat(matrix_22 ),v_n[3] * vfloat(v[3][2][i]))));
|
||||
const vfloat curve3_x = madd(v_n[0],vfloat(v[0][3][i]),madd(v_n[1],vfloat(v[1][3][i]),madd(v_n[2],vfloat(v[2][3][i]),v_n[3] * vfloat(v[3][3][i]))));
|
||||
return madd(u_n[0],curve0_x,madd(u_n[1],curve1_x,madd(u_n[2],curve2_x,u_n[3] * curve3_x)));
|
||||
}
|
||||
|
||||
template<typename vbool, typename vfloat>
|
||||
static __forceinline void eval(const Vertex v[4][4], const Vertex f[2][2],
|
||||
const vbool& valid, const vfloat& uu, const vfloat& vv,
|
||||
float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv,
|
||||
const float dscale, const size_t dstride, const size_t N)
|
||||
{
|
||||
if (P) {
|
||||
const Vec4<vfloat> u_n = BezierBasis::eval(uu);
|
||||
const Vec4<vfloat> v_n = BezierBasis::eval(vv);
|
||||
for (size_t i=0; i<N; i++) {
|
||||
vfloat matrix_11, matrix_12, matrix_22, matrix_21;
|
||||
computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
|
||||
vfloat::store(valid,P+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21));
|
||||
}
|
||||
}
|
||||
if (dPdu)
|
||||
{
|
||||
{
|
||||
assert(dPdu);
|
||||
const Vec4<vfloat> u_n = BezierBasis::derivative(uu);
|
||||
const Vec4<vfloat> v_n = BezierBasis::eval(vv);
|
||||
for (size_t i=0; i<N; i++) {
|
||||
vfloat matrix_11, matrix_12, matrix_22, matrix_21;
|
||||
computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
|
||||
vfloat::store(valid,dPdu+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*dscale);
|
||||
}
|
||||
}
|
||||
{
|
||||
assert(dPdv);
|
||||
const Vec4<vfloat> u_n = BezierBasis::eval(uu);
|
||||
const Vec4<vfloat> v_n = BezierBasis::derivative(vv);
|
||||
for (size_t i=0; i<N; i++) {
|
||||
vfloat matrix_11, matrix_12, matrix_22, matrix_21;
|
||||
computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
|
||||
vfloat::store(valid,dPdv+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*dscale);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ddPdudu)
|
||||
{
|
||||
{
|
||||
assert(ddPdudu);
|
||||
const Vec4<vfloat> u_n = BezierBasis::derivative2(uu);
|
||||
const Vec4<vfloat> v_n = BezierBasis::eval(vv);
|
||||
for (size_t i=0; i<N; i++) {
|
||||
vfloat matrix_11, matrix_12, matrix_22, matrix_21;
|
||||
computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
|
||||
vfloat::store(valid,ddPdudu+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*sqr(dscale));
|
||||
}
|
||||
}
|
||||
{
|
||||
assert(ddPdvdv);
|
||||
const Vec4<vfloat> u_n = BezierBasis::eval(uu);
|
||||
const Vec4<vfloat> v_n = BezierBasis::derivative2(vv);
|
||||
for (size_t i=0; i<N; i++) {
|
||||
vfloat matrix_11, matrix_12, matrix_22, matrix_21;
|
||||
computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
|
||||
vfloat::store(valid,ddPdvdv+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*sqr(dscale));
|
||||
}
|
||||
}
|
||||
{
|
||||
assert(ddPdudv);
|
||||
const Vec4<vfloat> u_n = BezierBasis::derivative(uu);
|
||||
const Vec4<vfloat> v_n = BezierBasis::derivative(vv);
|
||||
for (size_t i=0; i<N; i++) {
|
||||
vfloat matrix_11, matrix_12, matrix_22, matrix_21;
|
||||
computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
|
||||
vfloat::store(valid,ddPdudv+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*sqr(dscale));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename vbool, typename vfloat>
|
||||
__forceinline void eval(const vbool& valid, const vfloat& uu, const vfloat& vv,
|
||||
float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv,
|
||||
const float dscale, const size_t dstride, const size_t N) const {
|
||||
eval(v,f,valid,uu,vv,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale,dstride,N);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
static __forceinline Vec3<T> eval_t(const Vertex matrix[4][4], const Vec3<T> f[2][2], const T& uu, const T& vv)
|
||||
{
|
||||
typedef typename T::Bool M;
|
||||
const M m_border = (uu == 0.0f) | (uu == 1.0f) | (vv == 0.0f) | (vv == 1.0f);
|
||||
|
||||
const Vec3<T> f0_p = Vec3<T>(matrix[1][1].x,matrix[1][1].y,matrix[1][1].z);
|
||||
const Vec3<T> f1_p = Vec3<T>(matrix[1][2].x,matrix[1][2].y,matrix[1][2].z);
|
||||
const Vec3<T> f2_p = Vec3<T>(matrix[2][2].x,matrix[2][2].y,matrix[2][2].z);
|
||||
const Vec3<T> f3_p = Vec3<T>(matrix[2][1].x,matrix[2][1].y,matrix[2][1].z);
|
||||
|
||||
const Vec3<T> f0_m = f[0][0];
|
||||
const Vec3<T> f1_m = f[0][1];
|
||||
const Vec3<T> f2_m = f[1][1];
|
||||
const Vec3<T> f3_m = f[1][0];
|
||||
|
||||
const T one_minus_uu = T(1.0f) - uu;
|
||||
const T one_minus_vv = T(1.0f) - vv;
|
||||
|
||||
const Vec3<T> f0_i = ( uu * f0_p + vv * f0_m) * rcp(uu+vv);
|
||||
const Vec3<T> f1_i = (one_minus_uu * f1_m + vv * f1_p) * rcp(one_minus_uu+vv);
|
||||
const Vec3<T> f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv);
|
||||
const Vec3<T> f3_i = ( uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv);
|
||||
|
||||
const Vec3<T> F0( select(m_border,f0_p.x,f0_i.x), select(m_border,f0_p.y,f0_i.y), select(m_border,f0_p.z,f0_i.z) );
|
||||
const Vec3<T> F1( select(m_border,f1_p.x,f1_i.x), select(m_border,f1_p.y,f1_i.y), select(m_border,f1_p.z,f1_i.z) );
|
||||
const Vec3<T> F2( select(m_border,f2_p.x,f2_i.x), select(m_border,f2_p.y,f2_i.y), select(m_border,f2_p.z,f2_i.z) );
|
||||
const Vec3<T> F3( select(m_border,f3_p.x,f3_i.x), select(m_border,f3_p.y,f3_i.y), select(m_border,f3_p.z,f3_i.z) );
|
||||
|
||||
const T B0_u = one_minus_uu * one_minus_uu * one_minus_uu;
|
||||
const T B0_v = one_minus_vv * one_minus_vv * one_minus_vv;
|
||||
const T B1_u = 3.0f * (one_minus_uu * uu * one_minus_uu);
|
||||
const T B1_v = 3.0f * (one_minus_vv * vv * one_minus_vv);
|
||||
const T B2_u = 3.0f * (uu * one_minus_uu * uu);
|
||||
const T B2_v = 3.0f * (vv * one_minus_vv * vv);
|
||||
const T B3_u = uu * uu * uu;
|
||||
const T B3_v = vv * vv * vv;
|
||||
|
||||
const T x = madd(B0_v,madd(B0_u,matrix[0][0].x,madd(B1_u,matrix[0][1].x,madd(B2_u,matrix[0][2].x,B3_u * matrix[0][3].x))),
|
||||
madd(B1_v,madd(B0_u,matrix[1][0].x,madd(B1_u,F0.x ,madd(B2_u,F1.x ,B3_u * matrix[1][3].x))),
|
||||
madd(B2_v,madd(B0_u,matrix[2][0].x,madd(B1_u,F3.x ,madd(B2_u,F2.x ,B3_u * matrix[2][3].x))),
|
||||
B3_v*madd(B0_u,matrix[3][0].x,madd(B1_u,matrix[3][1].x,madd(B2_u,matrix[3][2].x,B3_u * matrix[3][3].x))))));
|
||||
|
||||
const T y = madd(B0_v,madd(B0_u,matrix[0][0].y,madd(B1_u,matrix[0][1].y,madd(B2_u,matrix[0][2].y,B3_u * matrix[0][3].y))),
|
||||
madd(B1_v,madd(B0_u,matrix[1][0].y,madd(B1_u,F0.y ,madd(B2_u,F1.y ,B3_u * matrix[1][3].y))),
|
||||
madd(B2_v,madd(B0_u,matrix[2][0].y,madd(B1_u,F3.y ,madd(B2_u,F2.y ,B3_u * matrix[2][3].y))),
|
||||
B3_v*madd(B0_u,matrix[3][0].y,madd(B1_u,matrix[3][1].y,madd(B2_u,matrix[3][2].y,B3_u * matrix[3][3].y))))));
|
||||
|
||||
const T z = madd(B0_v,madd(B0_u,matrix[0][0].z,madd(B1_u,matrix[0][1].z,madd(B2_u,matrix[0][2].z,B3_u * matrix[0][3].z))),
|
||||
madd(B1_v,madd(B0_u,matrix[1][0].z,madd(B1_u,F0.z ,madd(B2_u,F1.z ,B3_u * matrix[1][3].z))),
|
||||
madd(B2_v,madd(B0_u,matrix[2][0].z,madd(B1_u,F3.z ,madd(B2_u,F2.z ,B3_u * matrix[2][3].z))),
|
||||
B3_v*madd(B0_u,matrix[3][0].z,madd(B1_u,matrix[3][1].z,madd(B2_u,matrix[3][2].z,B3_u * matrix[3][3].z))))));
|
||||
|
||||
return Vec3<T>(x,y,z);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
__forceinline Vec3<T> eval(const T& uu, const T& vv) const
|
||||
{
|
||||
Vec3<T> ff[2][2];
|
||||
ff[0][0] = Vec3<T>(f[0][0]);
|
||||
ff[0][1] = Vec3<T>(f[0][1]);
|
||||
ff[1][1] = Vec3<T>(f[1][1]);
|
||||
ff[1][0] = Vec3<T>(f[1][0]);
|
||||
return eval_t(v,ff,uu,vv);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
static __forceinline Vec3<T> normal_t(const Vertex matrix[4][4], const Vec3<T> f[2][2], const T& uu, const T& vv)
|
||||
{
|
||||
typedef typename T::Bool M;
|
||||
|
||||
const Vec3<T> f0_p = Vec3<T>(matrix[1][1].x,matrix[1][1].y,matrix[1][1].z);
|
||||
const Vec3<T> f1_p = Vec3<T>(matrix[1][2].x,matrix[1][2].y,matrix[1][2].z);
|
||||
const Vec3<T> f2_p = Vec3<T>(matrix[2][2].x,matrix[2][2].y,matrix[2][2].z);
|
||||
const Vec3<T> f3_p = Vec3<T>(matrix[2][1].x,matrix[2][1].y,matrix[2][1].z);
|
||||
|
||||
const Vec3<T> f0_m = f[0][0];
|
||||
const Vec3<T> f1_m = f[0][1];
|
||||
const Vec3<T> f2_m = f[1][1];
|
||||
const Vec3<T> f3_m = f[1][0];
|
||||
|
||||
const T one_minus_uu = T(1.0f) - uu;
|
||||
const T one_minus_vv = T(1.0f) - vv;
|
||||
|
||||
const Vec3<T> f0_i = ( uu * f0_p + vv * f0_m) * rcp(uu+vv);
|
||||
const Vec3<T> f1_i = (one_minus_uu * f1_m + vv * f1_p) * rcp(one_minus_uu+vv);
|
||||
const Vec3<T> f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv);
|
||||
const Vec3<T> f3_i = ( uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv);
|
||||
|
||||
#if 1
|
||||
const M m_corner0 = (uu == 0.0f) & (vv == 0.0f);
|
||||
const M m_corner1 = (uu == 1.0f) & (vv == 0.0f);
|
||||
const M m_corner2 = (uu == 1.0f) & (vv == 1.0f);
|
||||
const M m_corner3 = (uu == 0.0f) & (vv == 1.0f);
|
||||
const Vec3<T> matrix_11( select(m_corner0,f0_p.x,f0_i.x), select(m_corner0,f0_p.y,f0_i.y), select(m_corner0,f0_p.z,f0_i.z) );
|
||||
const Vec3<T> matrix_12( select(m_corner1,f1_p.x,f1_i.x), select(m_corner1,f1_p.y,f1_i.y), select(m_corner1,f1_p.z,f1_i.z) );
|
||||
const Vec3<T> matrix_22( select(m_corner2,f2_p.x,f2_i.x), select(m_corner2,f2_p.y,f2_i.y), select(m_corner2,f2_p.z,f2_i.z) );
|
||||
const Vec3<T> matrix_21( select(m_corner3,f3_p.x,f3_i.x), select(m_corner3,f3_p.y,f3_i.y), select(m_corner3,f3_p.z,f3_i.z) );
|
||||
#else
|
||||
const M m_border = (uu == 0.0f) | (uu == 1.0f) | (vv == 0.0f) | (vv == 1.0f);
|
||||
const Vec3<T> matrix_11( select(m_border,f0_p.x,f0_i.x), select(m_border,f0_p.y,f0_i.y), select(m_border,f0_p.z,f0_i.z) );
|
||||
const Vec3<T> matrix_12( select(m_border,f1_p.x,f1_i.x), select(m_border,f1_p.y,f1_i.y), select(m_border,f1_p.z,f1_i.z) );
|
||||
const Vec3<T> matrix_22( select(m_border,f2_p.x,f2_i.x), select(m_border,f2_p.y,f2_i.y), select(m_border,f2_p.z,f2_i.z) );
|
||||
const Vec3<T> matrix_21( select(m_border,f3_p.x,f3_i.x), select(m_border,f3_p.y,f3_i.y), select(m_border,f3_p.z,f3_i.z) );
|
||||
#endif
|
||||
|
||||
const Vec3<T> matrix_00 = Vec3<T>(matrix[0][0].x,matrix[0][0].y,matrix[0][0].z);
|
||||
const Vec3<T> matrix_10 = Vec3<T>(matrix[1][0].x,matrix[1][0].y,matrix[1][0].z);
|
||||
const Vec3<T> matrix_20 = Vec3<T>(matrix[2][0].x,matrix[2][0].y,matrix[2][0].z);
|
||||
const Vec3<T> matrix_30 = Vec3<T>(matrix[3][0].x,matrix[3][0].y,matrix[3][0].z);
|
||||
|
||||
const Vec3<T> matrix_01 = Vec3<T>(matrix[0][1].x,matrix[0][1].y,matrix[0][1].z);
|
||||
const Vec3<T> matrix_02 = Vec3<T>(matrix[0][2].x,matrix[0][2].y,matrix[0][2].z);
|
||||
const Vec3<T> matrix_03 = Vec3<T>(matrix[0][3].x,matrix[0][3].y,matrix[0][3].z);
|
||||
|
||||
const Vec3<T> matrix_31 = Vec3<T>(matrix[3][1].x,matrix[3][1].y,matrix[3][1].z);
|
||||
const Vec3<T> matrix_32 = Vec3<T>(matrix[3][2].x,matrix[3][2].y,matrix[3][2].z);
|
||||
const Vec3<T> matrix_33 = Vec3<T>(matrix[3][3].x,matrix[3][3].y,matrix[3][3].z);
|
||||
|
||||
const Vec3<T> matrix_13 = Vec3<T>(matrix[1][3].x,matrix[1][3].y,matrix[1][3].z);
|
||||
const Vec3<T> matrix_23 = Vec3<T>(matrix[2][3].x,matrix[2][3].y,matrix[2][3].z);
|
||||
|
||||
/* tangentU */
|
||||
const Vec3<T> col0 = deCasteljau(vv, matrix_00, matrix_10, matrix_20, matrix_30);
|
||||
const Vec3<T> col1 = deCasteljau(vv, matrix_01, matrix_11, matrix_21, matrix_31);
|
||||
const Vec3<T> col2 = deCasteljau(vv, matrix_02, matrix_12, matrix_22, matrix_32);
|
||||
const Vec3<T> col3 = deCasteljau(vv, matrix_03, matrix_13, matrix_23, matrix_33);
|
||||
|
||||
const Vec3<T> tangentU = deCasteljau_tangent(uu, col0, col1, col2, col3);
|
||||
|
||||
/* tangentV */
|
||||
const Vec3<T> row0 = deCasteljau(uu, matrix_00, matrix_01, matrix_02, matrix_03);
|
||||
const Vec3<T> row1 = deCasteljau(uu, matrix_10, matrix_11, matrix_12, matrix_13);
|
||||
const Vec3<T> row2 = deCasteljau(uu, matrix_20, matrix_21, matrix_22, matrix_23);
|
||||
const Vec3<T> row3 = deCasteljau(uu, matrix_30, matrix_31, matrix_32, matrix_33);
|
||||
|
||||
const Vec3<T> tangentV = deCasteljau_tangent(vv, row0, row1, row2, row3);
|
||||
|
||||
/* normal = tangentU x tangentV */
|
||||
const Vec3<T> n = cross(tangentU,tangentV);
|
||||
return n;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
__forceinline Vec3<T> normal(const T& uu, const T& vv) const
|
||||
{
|
||||
Vec3<T> ff[2][2];
|
||||
ff[0][0] = Vec3<T>(f[0][0]);
|
||||
ff[0][1] = Vec3<T>(f[0][1]);
|
||||
ff[1][1] = Vec3<T>(f[1][1]);
|
||||
ff[1][0] = Vec3<T>(f[1][0]);
|
||||
return normal_t(v,ff,uu,vv);
|
||||
}
|
||||
|
||||
__forceinline BBox<Vertex> bounds() const
|
||||
{
|
||||
const Vertex *const cv = &v[0][0];
|
||||
BBox<Vertex> bounds (cv[0]);
|
||||
for (size_t i=1; i<16; i++)
|
||||
bounds.extend( cv[i] );
|
||||
bounds.extend(f[0][0]);
|
||||
bounds.extend(f[1][0]);
|
||||
bounds.extend(f[1][1]);
|
||||
bounds.extend(f[1][1]);
|
||||
return bounds;
|
||||
}
|
||||
|
||||
friend embree_ostream operator<<(embree_ostream o, const GregoryPatchT& p)
|
||||
{
|
||||
for (size_t y=0; y<4; y++)
|
||||
for (size_t x=0; x<4; x++)
|
||||
o << "v[" << y << "][" << x << "] " << p.v[y][x] << embree_endl;
|
||||
|
||||
for (size_t y=0; y<2; y++)
|
||||
for (size_t x=0; x<2; x++)
|
||||
o << "f[" << y << "][" << x << "] " << p.f[y][x] << embree_endl;
|
||||
return o;
|
||||
}
|
||||
};
|
||||
|
||||
typedef GregoryPatchT<Vec3fa,Vec3fa_t> GregoryPatch3fa;
|
||||
|
||||
template<typename Vertex, typename Vertex_t>
|
||||
__forceinline BezierPatchT<Vertex,Vertex_t>::BezierPatchT (const HalfEdge* edge, const char* vertices, size_t stride)
|
||||
{
|
||||
CatmullClarkPatchT<Vertex,Vertex_t> patch(edge,vertices,stride);
|
||||
GregoryPatchT<Vertex,Vertex_t> gpatch(patch);
|
||||
gpatch.convert_to_bezier();
|
||||
for (size_t y=0; y<4; y++)
|
||||
for (size_t x=0; x<4; x++)
|
||||
matrix[y][x] = (Vertex_t)gpatch.v[y][x];
|
||||
}
|
||||
|
||||
template<typename Vertex, typename Vertex_t>
|
||||
__forceinline BezierPatchT<Vertex,Vertex_t>::BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t>& patch)
|
||||
{
|
||||
GregoryPatchT<Vertex,Vertex_t> gpatch(patch);
|
||||
gpatch.convert_to_bezier();
|
||||
for (size_t y=0; y<4; y++)
|
||||
for (size_t x=0; x<4; x++)
|
||||
matrix[y][x] = (Vertex_t)gpatch.v[y][x];
|
||||
}
|
||||
|
||||
template<typename Vertex, typename Vertex_t>
|
||||
__forceinline BezierPatchT<Vertex,Vertex_t>::BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t>& patch,
|
||||
const BezierCurveT<Vertex>* border0,
|
||||
const BezierCurveT<Vertex>* border1,
|
||||
const BezierCurveT<Vertex>* border2,
|
||||
const BezierCurveT<Vertex>* border3)
|
||||
{
|
||||
GregoryPatchT<Vertex,Vertex_t> gpatch(patch,border0,border1,border2,border3);
|
||||
gpatch.convert_to_bezier();
|
||||
for (size_t y=0; y<4; y++)
|
||||
for (size_t x=0; x<4; x++)
|
||||
matrix[y][x] = (Vertex_t)gpatch.v[y][x];
|
||||
}
|
||||
}
|
||||
113
Framework/external/embree/kernels/subdiv/gregory_patch_dense.h
vendored
Normal file
113
Framework/external/embree/kernels/subdiv/gregory_patch_dense.h
vendored
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "gregory_patch.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
class __aligned(64) DenseGregoryPatch3fa
|
||||
{
|
||||
typedef Vec3fa Vec3fa_4x4[4][4];
|
||||
public:
|
||||
|
||||
__forceinline DenseGregoryPatch3fa (const GregoryPatch3fa& patch)
|
||||
{
|
||||
for (size_t y=0; y<4; y++)
|
||||
for (size_t x=0; x<4; x++)
|
||||
matrix[y][x] = Vec3ff(patch.v[y][x], 0.0f);
|
||||
|
||||
matrix[0][0].w = patch.f[0][0].x;
|
||||
matrix[0][1].w = patch.f[0][0].y;
|
||||
matrix[0][2].w = patch.f[0][0].z;
|
||||
matrix[0][3].w = 0.0f;
|
||||
|
||||
matrix[1][0].w = patch.f[0][1].x;
|
||||
matrix[1][1].w = patch.f[0][1].y;
|
||||
matrix[1][2].w = patch.f[0][1].z;
|
||||
matrix[1][3].w = 0.0f;
|
||||
|
||||
matrix[2][0].w = patch.f[1][1].x;
|
||||
matrix[2][1].w = patch.f[1][1].y;
|
||||
matrix[2][2].w = patch.f[1][1].z;
|
||||
matrix[2][3].w = 0.0f;
|
||||
|
||||
matrix[3][0].w = patch.f[1][0].x;
|
||||
matrix[3][1].w = patch.f[1][0].y;
|
||||
matrix[3][2].w = patch.f[1][0].z;
|
||||
matrix[3][3].w = 0.0f;
|
||||
}
|
||||
|
||||
__forceinline void extract_f_m(Vec3fa f_m[2][2]) const
|
||||
{
|
||||
f_m[0][0] = Vec3fa( matrix[0][0].w, matrix[0][1].w, matrix[0][2].w );
|
||||
f_m[0][1] = Vec3fa( matrix[1][0].w, matrix[1][1].w, matrix[1][2].w );
|
||||
f_m[1][1] = Vec3fa( matrix[2][0].w, matrix[2][1].w, matrix[2][2].w );
|
||||
f_m[1][0] = Vec3fa( matrix[3][0].w, matrix[3][1].w, matrix[3][2].w );
|
||||
}
|
||||
|
||||
__forceinline Vec3fa eval(const float uu, const float vv) const
|
||||
{
|
||||
__aligned(64) Vec3fa f_m[2][2]; extract_f_m(f_m);
|
||||
return GregoryPatch3fa::eval(*(Vec3fa_4x4*)&matrix,f_m,uu,vv);
|
||||
}
|
||||
|
||||
__forceinline Vec3fa normal(const float uu, const float vv) const
|
||||
{
|
||||
__aligned(64) Vec3fa f_m[2][2]; extract_f_m(f_m);
|
||||
return GregoryPatch3fa::normal(*(Vec3fa_4x4*)&matrix,f_m,uu,vv);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
__forceinline Vec3<T> eval(const T &uu, const T &vv) const
|
||||
{
|
||||
Vec3<T> f_m[2][2];
|
||||
f_m[0][0] = Vec3<T>( matrix[0][0].w, matrix[0][1].w, matrix[0][2].w );
|
||||
f_m[0][1] = Vec3<T>( matrix[1][0].w, matrix[1][1].w, matrix[1][2].w );
|
||||
f_m[1][1] = Vec3<T>( matrix[2][0].w, matrix[2][1].w, matrix[2][2].w );
|
||||
f_m[1][0] = Vec3<T>( matrix[3][0].w, matrix[3][1].w, matrix[3][2].w );
|
||||
return GregoryPatch3fa::eval_t(*(Vec3fa_4x4*)&matrix,f_m,uu,vv);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
__forceinline Vec3<T> normal(const T &uu, const T &vv) const
|
||||
{
|
||||
Vec3<T> f_m[2][2];
|
||||
f_m[0][0] = Vec3<T>( matrix[0][0].w, matrix[0][1].w, matrix[0][2].w );
|
||||
f_m[0][1] = Vec3<T>( matrix[1][0].w, matrix[1][1].w, matrix[1][2].w );
|
||||
f_m[1][1] = Vec3<T>( matrix[2][0].w, matrix[2][1].w, matrix[2][2].w );
|
||||
f_m[1][0] = Vec3<T>( matrix[3][0].w, matrix[3][1].w, matrix[3][2].w );
|
||||
return GregoryPatch3fa::normal_t(*(Vec3fa_4x4*)&matrix,f_m,uu,vv);
|
||||
}
|
||||
|
||||
__forceinline void eval(const float u, const float v,
|
||||
Vec3fa* P, Vec3fa* dPdu, Vec3fa* dPdv, Vec3fa* ddPdudu, Vec3fa* ddPdvdv, Vec3fa* ddPdudv,
|
||||
const float dscale = 1.0f) const
|
||||
{
|
||||
__aligned(64) Vec3fa f_m[2][2]; extract_f_m(f_m);
|
||||
if (P) {
|
||||
*P = GregoryPatch3fa::eval(*(Vec3fa_4x4*)&matrix,f_m,u,v);
|
||||
}
|
||||
if (dPdu) {
|
||||
assert(dPdu); *dPdu = GregoryPatch3fa::eval_du(*(Vec3fa_4x4*)&matrix,f_m,u,v)*dscale;
|
||||
assert(dPdv); *dPdv = GregoryPatch3fa::eval_dv(*(Vec3fa_4x4*)&matrix,f_m,u,v)*dscale;
|
||||
}
|
||||
if (ddPdudu) {
|
||||
assert(ddPdudu); *ddPdudu = GregoryPatch3fa::eval_dudu(*(Vec3fa_4x4*)&matrix,f_m,u,v)*sqr(dscale);
|
||||
assert(ddPdvdv); *ddPdvdv = GregoryPatch3fa::eval_dvdv(*(Vec3fa_4x4*)&matrix,f_m,u,v)*sqr(dscale);
|
||||
assert(ddPdudv); *ddPdudv = GregoryPatch3fa::eval_dudv(*(Vec3fa_4x4*)&matrix,f_m,u,v)*sqr(dscale);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename vbool, typename vfloat>
|
||||
__forceinline void eval(const vbool& valid, const vfloat& uu, const vfloat& vv, float* P, float* dPdu, float* dPdv, const float dscale, const size_t dstride, const size_t N) const
|
||||
{
|
||||
__aligned(64) Vec3fa f_m[2][2]; extract_f_m(f_m);
|
||||
GregoryPatch3fa::eval(matrix,f_m,valid,uu,vv,P,dPdu,dPdv,dscale,dstride,N);
|
||||
}
|
||||
|
||||
private:
|
||||
Vec3ff matrix[4][4]; // f_p/m points are stored in 4th component
|
||||
};
|
||||
}
|
||||
96
Framework/external/embree/kernels/subdiv/gridrange.h
vendored
Normal file
96
Framework/external/embree/kernels/subdiv/gridrange.h
vendored
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../common/default.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct __aligned(16) GridRange
|
||||
{
|
||||
unsigned int u_start;
|
||||
unsigned int u_end;
|
||||
unsigned int v_start;
|
||||
unsigned int v_end;
|
||||
|
||||
__forceinline GridRange() {}
|
||||
|
||||
__forceinline GridRange(unsigned int u_start, unsigned int u_end, unsigned int v_start, unsigned int v_end)
|
||||
: u_start(u_start), u_end(u_end), v_start(v_start), v_end(v_end) {}
|
||||
|
||||
__forceinline unsigned int width() const {
|
||||
return u_end-u_start+1;
|
||||
}
|
||||
|
||||
__forceinline unsigned int height() const {
|
||||
return v_end-v_start+1;
|
||||
}
|
||||
|
||||
__forceinline bool hasLeafSize() const
|
||||
{
|
||||
const unsigned int u_size = u_end-u_start+1;
|
||||
const unsigned int v_size = v_end-v_start+1;
|
||||
assert(u_size >= 1);
|
||||
assert(v_size >= 1);
|
||||
return u_size <= 3 && v_size <= 3;
|
||||
}
|
||||
|
||||
static __forceinline unsigned int split(unsigned int start,unsigned int end)
|
||||
{
|
||||
const unsigned int center = (start+end)/2;
|
||||
assert (center > start);
|
||||
assert (center < end);
|
||||
return center;
|
||||
}
|
||||
|
||||
__forceinline void split(GridRange& r0, GridRange& r1) const
|
||||
{
|
||||
assert( hasLeafSize() == false );
|
||||
const unsigned int u_size = u_end-u_start+1;
|
||||
const unsigned int v_size = v_end-v_start+1;
|
||||
r0 = *this;
|
||||
r1 = *this;
|
||||
|
||||
if (u_size >= v_size)
|
||||
{
|
||||
const unsigned int u_mid = split(u_start,u_end);
|
||||
r0.u_end = u_mid;
|
||||
r1.u_start = u_mid;
|
||||
}
|
||||
else
|
||||
{
|
||||
const unsigned int v_mid = split(v_start,v_end);
|
||||
r0.v_end = v_mid;
|
||||
r1.v_start = v_mid;
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline unsigned int splitIntoSubRanges(GridRange r[4]) const
|
||||
{
|
||||
assert( !hasLeafSize() );
|
||||
unsigned int children = 0;
|
||||
GridRange first,second;
|
||||
split(first,second);
|
||||
|
||||
if (first.hasLeafSize()) {
|
||||
r[0] = first;
|
||||
children++;
|
||||
}
|
||||
else {
|
||||
first.split(r[0],r[1]);
|
||||
children += 2;
|
||||
}
|
||||
|
||||
if (second.hasLeafSize()) {
|
||||
r[children] = second;
|
||||
children++;
|
||||
}
|
||||
else {
|
||||
second.split(r[children+0],r[children+1]);
|
||||
children += 2;
|
||||
}
|
||||
return children;
|
||||
}
|
||||
};
|
||||
}
|
||||
371
Framework/external/embree/kernels/subdiv/half_edge.h
vendored
Normal file
371
Framework/external/embree/kernels/subdiv/half_edge.h
vendored
Normal file
|
|
@ -0,0 +1,371 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "catmullclark_coefficients.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
class __aligned(32) HalfEdge
|
||||
{
|
||||
friend class SubdivMesh;
|
||||
public:
|
||||
|
||||
enum PatchType : char {
|
||||
BILINEAR_PATCH = 0, //!< a bilinear patch
|
||||
REGULAR_QUAD_PATCH = 1, //!< a regular quad patch can be represented as a B-Spline
|
||||
IRREGULAR_QUAD_PATCH = 2, //!< an irregular quad patch can be represented as a Gregory patch
|
||||
COMPLEX_PATCH = 3 //!< these patches need subdivision and cannot be processed by the above fast code paths
|
||||
};
|
||||
|
||||
enum VertexType : char {
|
||||
REGULAR_VERTEX = 0, //!< regular vertex
|
||||
NON_MANIFOLD_EDGE_VERTEX = 1, //!< vertex of a non-manifold edge
|
||||
};
|
||||
|
||||
__forceinline friend PatchType max( const PatchType& ty0, const PatchType& ty1) {
|
||||
return (PatchType) max((int)ty0,(int)ty1);
|
||||
}
|
||||
|
||||
struct Edge
|
||||
{
|
||||
/*! edge constructor */
|
||||
__forceinline Edge(const uint32_t v0, const uint32_t v1)
|
||||
: v0(v0), v1(v1) {}
|
||||
|
||||
/*! create an 64 bit identifier that is unique for the not oriented edge */
|
||||
__forceinline operator uint64_t() const
|
||||
{
|
||||
uint32_t p0 = v0, p1 = v1;
|
||||
if (p0<p1) std::swap(p0,p1);
|
||||
return (((uint64_t)p0) << 32) | (uint64_t)p1;
|
||||
}
|
||||
|
||||
public:
|
||||
uint32_t v0,v1; //!< start and end vertex of the edge
|
||||
};
|
||||
|
||||
HalfEdge ()
|
||||
: vtx_index(-1), next_half_edge_ofs(0), prev_half_edge_ofs(0), opposite_half_edge_ofs(0), edge_crease_weight(0),
|
||||
vertex_crease_weight(0), edge_level(0), patch_type(COMPLEX_PATCH), vertex_type(REGULAR_VERTEX)
|
||||
{
|
||||
static_assert(sizeof(HalfEdge) == 32, "invalid half edge size");
|
||||
}
|
||||
|
||||
__forceinline bool hasOpposite() const { return opposite_half_edge_ofs != 0; }
|
||||
__forceinline void setOpposite(HalfEdge* opposite) { opposite_half_edge_ofs = int(opposite-this); }
|
||||
|
||||
__forceinline HalfEdge* next() { assert( next_half_edge_ofs != 0 ); return &this[next_half_edge_ofs]; }
|
||||
__forceinline const HalfEdge* next() const { assert( next_half_edge_ofs != 0 ); return &this[next_half_edge_ofs]; }
|
||||
|
||||
__forceinline HalfEdge* prev() { assert( prev_half_edge_ofs != 0 ); return &this[prev_half_edge_ofs]; }
|
||||
__forceinline const HalfEdge* prev() const { assert( prev_half_edge_ofs != 0 ); return &this[prev_half_edge_ofs]; }
|
||||
|
||||
__forceinline HalfEdge* opposite() { assert( opposite_half_edge_ofs != 0 ); return &this[opposite_half_edge_ofs]; }
|
||||
__forceinline const HalfEdge* opposite() const { assert( opposite_half_edge_ofs != 0 ); return &this[opposite_half_edge_ofs]; }
|
||||
|
||||
__forceinline HalfEdge* rotate() { return opposite()->next(); }
|
||||
__forceinline const HalfEdge* rotate() const { return opposite()->next(); }
|
||||
|
||||
__forceinline unsigned int getStartVertexIndex() const { return vtx_index; }
|
||||
__forceinline unsigned int getEndVertexIndex () const { return next()->vtx_index; }
|
||||
__forceinline Edge getEdge () const { return Edge(getStartVertexIndex(),getEndVertexIndex()); }
|
||||
|
||||
|
||||
/*! tests if the start vertex of the edge is regular */
|
||||
__forceinline PatchType vertexType() const
|
||||
{
|
||||
const HalfEdge* p = this;
|
||||
size_t face_valence = 0;
|
||||
bool hasBorder = false;
|
||||
|
||||
do
|
||||
{
|
||||
/* we need subdivision to handle edge creases */
|
||||
if (p->hasOpposite() && p->edge_crease_weight > 0.0f)
|
||||
return COMPLEX_PATCH;
|
||||
|
||||
face_valence++;
|
||||
|
||||
/* test for quad */
|
||||
const HalfEdge* pp = p;
|
||||
pp = pp->next(); if (pp == p) return COMPLEX_PATCH;
|
||||
pp = pp->next(); if (pp == p) return COMPLEX_PATCH;
|
||||
pp = pp->next(); if (pp == p) return COMPLEX_PATCH;
|
||||
pp = pp->next(); if (pp != p) return COMPLEX_PATCH;
|
||||
|
||||
/* continue with next face */
|
||||
p = p->prev();
|
||||
if (likely(p->hasOpposite()))
|
||||
p = p->opposite();
|
||||
|
||||
/* if there is no opposite go the long way to the other side of the border */
|
||||
else
|
||||
{
|
||||
face_valence++;
|
||||
hasBorder = true;
|
||||
p = this;
|
||||
while (p->hasOpposite())
|
||||
p = p->rotate();
|
||||
}
|
||||
} while (p != this);
|
||||
|
||||
/* calculate vertex type */
|
||||
if (face_valence == 2 && hasBorder) {
|
||||
if (vertex_crease_weight == 0.0f ) return REGULAR_QUAD_PATCH;
|
||||
else if (vertex_crease_weight == float(inf)) return REGULAR_QUAD_PATCH;
|
||||
else return COMPLEX_PATCH;
|
||||
}
|
||||
else if (vertex_crease_weight != 0.0f) return COMPLEX_PATCH;
|
||||
else if (face_valence == 3 && hasBorder) return REGULAR_QUAD_PATCH;
|
||||
else if (face_valence == 4 && !hasBorder) return REGULAR_QUAD_PATCH;
|
||||
else return IRREGULAR_QUAD_PATCH;
|
||||
}
|
||||
|
||||
/*! tests if this edge is part of a bilinear patch */
|
||||
__forceinline bool bilinearVertex() const {
|
||||
return vertex_crease_weight == float(inf) && edge_crease_weight == float(inf);
|
||||
}
|
||||
|
||||
/*! calculates the type of the patch */
|
||||
__forceinline PatchType patchType() const
|
||||
{
|
||||
const HalfEdge* p = this;
|
||||
PatchType ret = REGULAR_QUAD_PATCH;
|
||||
bool bilinear = true;
|
||||
|
||||
ret = max(ret,p->vertexType());
|
||||
bilinear &= p->bilinearVertex();
|
||||
if ((p = p->next()) == this) return COMPLEX_PATCH;
|
||||
|
||||
ret = max(ret,p->vertexType());
|
||||
bilinear &= p->bilinearVertex();
|
||||
if ((p = p->next()) == this) return COMPLEX_PATCH;
|
||||
|
||||
ret = max(ret,p->vertexType());
|
||||
bilinear &= p->bilinearVertex();
|
||||
if ((p = p->next()) == this) return COMPLEX_PATCH;
|
||||
|
||||
ret = max(ret,p->vertexType());
|
||||
bilinear &= p->bilinearVertex();
|
||||
if ((p = p->next()) != this) return COMPLEX_PATCH;
|
||||
|
||||
if (bilinear) return BILINEAR_PATCH;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*! tests if the face is a regular b-spline face */
|
||||
__forceinline bool isRegularFace() const {
|
||||
return patch_type == REGULAR_QUAD_PATCH;
|
||||
}
|
||||
|
||||
/*! tests if the face can be diced (using bspline or gregory patch) */
|
||||
__forceinline bool isGregoryFace() const {
|
||||
return patch_type == IRREGULAR_QUAD_PATCH || patch_type == REGULAR_QUAD_PATCH;
|
||||
}
|
||||
|
||||
/*! tests if the base vertex of this half edge is a corner vertex */
|
||||
__forceinline bool isCorner() const {
|
||||
return !hasOpposite() && !prev()->hasOpposite();
|
||||
}
|
||||
|
||||
/*! tests if the vertex is attached to any border */
|
||||
__forceinline bool vertexHasBorder() const
|
||||
{
|
||||
const HalfEdge* p = this;
|
||||
do {
|
||||
if (!p->hasOpposite()) return true;
|
||||
p = p->rotate();
|
||||
} while (p != this);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*! tests if the face this half edge belongs to has some border */
|
||||
__forceinline bool faceHasBorder() const
|
||||
{
|
||||
const HalfEdge* p = this;
|
||||
do {
|
||||
if (p->vertexHasBorder() && (p->vertex_type != HalfEdge::NON_MANIFOLD_EDGE_VERTEX)) return true;
|
||||
p = p->next();
|
||||
} while (p != this);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*! calculates conservative bounds of a catmull clark subdivision face */
|
||||
__forceinline BBox3fa bounds(const BufferView<Vec3fa>& vertices) const
|
||||
{
|
||||
BBox3fa bounds = this->get1RingBounds(vertices);
|
||||
for (const HalfEdge* p=this->next(); p!=this; p=p->next())
|
||||
bounds.extend(p->get1RingBounds(vertices));
|
||||
return bounds;
|
||||
}
|
||||
|
||||
/*! tests if this is a valid patch */
|
||||
__forceinline bool valid(const BufferView<Vec3fa>& vertices) const
|
||||
{
|
||||
size_t N = 1;
|
||||
if (!this->validRing(vertices)) return false;
|
||||
for (const HalfEdge* p=this->next(); p!=this; p=p->next(), N++) {
|
||||
if (!p->validRing(vertices)) return false;
|
||||
}
|
||||
return N >= 3 && N <= MAX_PATCH_VALENCE;
|
||||
}
|
||||
|
||||
/*! counts number of polygon edges */
|
||||
__forceinline unsigned int numEdges() const
|
||||
{
|
||||
unsigned int N = 1;
|
||||
for (const HalfEdge* p=this->next(); p!=this; p=p->next(), N++);
|
||||
return N;
|
||||
}
|
||||
|
||||
/*! calculates face and edge valence */
|
||||
__forceinline void calculateFaceValenceAndEdgeValence(size_t& faceValence, size_t& edgeValence) const
|
||||
{
|
||||
faceValence = 0;
|
||||
edgeValence = 0;
|
||||
|
||||
const HalfEdge* p = this;
|
||||
do
|
||||
{
|
||||
/* calculate bounds of current face */
|
||||
unsigned int numEdges = p->numEdges();
|
||||
assert(numEdges >= 3);
|
||||
edgeValence += numEdges-2;
|
||||
|
||||
faceValence++;
|
||||
p = p->prev();
|
||||
|
||||
/* continue with next face */
|
||||
if (likely(p->hasOpposite()))
|
||||
p = p->opposite();
|
||||
|
||||
/* if there is no opposite go the long way to the other side of the border */
|
||||
else {
|
||||
faceValence++;
|
||||
edgeValence++;
|
||||
p = this;
|
||||
while (p->hasOpposite())
|
||||
p = p->opposite()->next();
|
||||
}
|
||||
|
||||
} while (p != this);
|
||||
}
|
||||
|
||||
/*! stream output */
|
||||
friend __forceinline std::ostream &operator<<(std::ostream &o, const HalfEdge &h)
|
||||
{
|
||||
return o << "{ " <<
|
||||
"vertex = " << h.vtx_index << ", " << //" -> " << h.next()->vtx_index << ", " <<
|
||||
"prev = " << h.prev_half_edge_ofs << ", " <<
|
||||
"next = " << h.next_half_edge_ofs << ", " <<
|
||||
"opposite = " << h.opposite_half_edge_ofs << ", " <<
|
||||
"edge_crease = " << h.edge_crease_weight << ", " <<
|
||||
"vertex_crease = " << h.vertex_crease_weight << ", " <<
|
||||
//"edge_level = " << h.edge_level <<
|
||||
" }";
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
/*! calculates the bounds of the face associated with the half-edge */
|
||||
__forceinline BBox3fa getFaceBounds(const BufferView<Vec3fa>& vertices) const
|
||||
{
|
||||
BBox3fa b = vertices[getStartVertexIndex()];
|
||||
for (const HalfEdge* p = next(); p!=this; p=p->next()) {
|
||||
b.extend(vertices[p->getStartVertexIndex()]);
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
/*! calculates the bounds of the 1-ring associated with the vertex of the half-edge */
|
||||
__forceinline BBox3fa get1RingBounds(const BufferView<Vec3fa>& vertices) const
|
||||
{
|
||||
BBox3fa bounds = empty;
|
||||
const HalfEdge* p = this;
|
||||
do
|
||||
{
|
||||
/* calculate bounds of current face */
|
||||
bounds.extend(p->getFaceBounds(vertices));
|
||||
p = p->prev();
|
||||
|
||||
/* continue with next face */
|
||||
if (likely(p->hasOpposite()))
|
||||
p = p->opposite();
|
||||
|
||||
/* if there is no opposite go the long way to the other side of the border */
|
||||
else {
|
||||
p = this;
|
||||
while (p->hasOpposite())
|
||||
p = p->opposite()->next();
|
||||
}
|
||||
|
||||
} while (p != this);
|
||||
|
||||
return bounds;
|
||||
}
|
||||
|
||||
/*! tests if this is a valid face */
|
||||
__forceinline bool validFace(const BufferView<Vec3fa>& vertices, size_t& N) const
|
||||
{
|
||||
const Vec3fa v = vertices[getStartVertexIndex()];
|
||||
if (!isvalid(v)) return false;
|
||||
size_t n = 1;
|
||||
for (const HalfEdge* p = next(); p!=this; p=p->next(), n++) {
|
||||
const Vec3fa v = vertices[p->getStartVertexIndex()];
|
||||
if (!isvalid(v)) return false;
|
||||
}
|
||||
N += n-2;
|
||||
return n >= 3 && n <= MAX_PATCH_VALENCE;
|
||||
}
|
||||
|
||||
/*! tests if this is a valid ring */
|
||||
__forceinline bool validRing(const BufferView<Vec3fa>& vertices) const
|
||||
{
|
||||
size_t faceValence = 0;
|
||||
size_t edgeValence = 0;
|
||||
|
||||
const HalfEdge* p = this;
|
||||
do
|
||||
{
|
||||
/* calculate bounds of current face */
|
||||
if (!p->validFace(vertices,edgeValence))
|
||||
return false;
|
||||
|
||||
faceValence++;
|
||||
p = p->prev();
|
||||
|
||||
/* continue with next face */
|
||||
if (likely(p->hasOpposite()))
|
||||
p = p->opposite();
|
||||
|
||||
/* if there is no opposite go the long way to the other side of the border */
|
||||
else {
|
||||
faceValence++;
|
||||
edgeValence++;
|
||||
p = this;
|
||||
while (p->hasOpposite())
|
||||
p = p->opposite()->next();
|
||||
}
|
||||
|
||||
} while (p != this);
|
||||
|
||||
return faceValence <= MAX_RING_FACE_VALENCE && edgeValence <= MAX_RING_EDGE_VALENCE;
|
||||
}
|
||||
|
||||
private:
|
||||
unsigned int vtx_index; //!< index of edge start vertex
|
||||
int next_half_edge_ofs; //!< relative offset to next half edge of face
|
||||
int prev_half_edge_ofs; //!< relative offset to previous half edge of face
|
||||
int opposite_half_edge_ofs; //!< relative offset to opposite half edge
|
||||
|
||||
public:
|
||||
float edge_crease_weight; //!< crease weight attached to edge
|
||||
float vertex_crease_weight; //!< crease weight attached to start vertex
|
||||
float edge_level; //!< subdivision factor for edge
|
||||
PatchType patch_type; //!< stores type of subdiv patch
|
||||
VertexType vertex_type; //!< stores type of the start vertex
|
||||
char align[2];
|
||||
};
|
||||
}
|
||||
45
Framework/external/embree/kernels/subdiv/hermite_curve.h
vendored
Normal file
45
Framework/external/embree/kernels/subdiv/hermite_curve.h
vendored
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../common/default.h"
|
||||
#include "bezier_curve.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<typename Vertex>
|
||||
struct HermiteCurveT : BezierCurveT<Vertex>
|
||||
{
|
||||
__forceinline HermiteCurveT() {}
|
||||
|
||||
__forceinline HermiteCurveT(const BezierCurveT<Vertex>& curve)
|
||||
: BezierCurveT<Vertex>(curve) {}
|
||||
|
||||
__forceinline HermiteCurveT(const Vertex& v0, const Vertex& t0, const Vertex& v1, const Vertex& t1)
|
||||
: BezierCurveT<Vertex>(v0,madd(1.0f/3.0f,t0,v0),nmadd(1.0f/3.0f,t1,v1),v1) {}
|
||||
|
||||
__forceinline HermiteCurveT<Vec3ff> xfm_pr(const LinearSpace3fa& space, const Vec3fa& p) const
|
||||
{
|
||||
const Vec3ff q0(xfmVector(space,this->v0-p), this->v0.w);
|
||||
const Vec3ff q1(xfmVector(space,this->v1-p), this->v1.w);
|
||||
const Vec3ff q2(xfmVector(space,this->v2-p), this->v2.w);
|
||||
const Vec3ff q3(xfmVector(space,this->v3-p), this->v3.w);
|
||||
return BezierCurveT<Vec3ff>(q0,q1,q2,q3);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Vertex>
|
||||
__forceinline void convert(const HermiteCurveT<Vertex>& icurve, BezierCurveT<Vertex>& ocurve)
|
||||
{
|
||||
ocurve = BezierCurveT<Vertex>(icurve.v0,icurve.v1,icurve.v2,icurve.v3);
|
||||
}
|
||||
|
||||
template<typename CurveGeometry>
|
||||
__forceinline HermiteCurveT<Vec3ff> enlargeRadiusToMinWidth(const RayQueryContext* context, const CurveGeometry* geom, const Vec3fa& ray_org, const HermiteCurveT<Vec3ff>& curve) {
|
||||
return HermiteCurveT<Vec3ff>(enlargeRadiusToMinWidth(context,geom,ray_org,BezierCurveT<Vec3ff>(curve)));
|
||||
}
|
||||
|
||||
typedef HermiteCurveT<Vec3fa> HermiteCurve3fa;
|
||||
}
|
||||
|
||||
443
Framework/external/embree/kernels/subdiv/linear_bezier_patch.h
vendored
Normal file
443
Framework/external/embree/kernels/subdiv/linear_bezier_patch.h
vendored
Normal file
|
|
@ -0,0 +1,443 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bezier_curve.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<typename V>
|
||||
struct TensorLinearQuadraticBezierSurface
|
||||
{
|
||||
QuadraticBezierCurve<V> L;
|
||||
QuadraticBezierCurve<V> R;
|
||||
|
||||
__forceinline TensorLinearQuadraticBezierSurface() {}
|
||||
|
||||
__forceinline TensorLinearQuadraticBezierSurface(const TensorLinearQuadraticBezierSurface<V>& curve)
|
||||
: L(curve.L), R(curve.R) {}
|
||||
|
||||
__forceinline TensorLinearQuadraticBezierSurface& operator= (const TensorLinearQuadraticBezierSurface& other) {
|
||||
L = other.L; R = other.R; return *this;
|
||||
}
|
||||
|
||||
__forceinline TensorLinearQuadraticBezierSurface(const QuadraticBezierCurve<V>& L, const QuadraticBezierCurve<V>& R)
|
||||
: L(L), R(R) {}
|
||||
|
||||
__forceinline BBox<V> bounds() const {
|
||||
return merge(L.bounds(),R.bounds());
|
||||
}
|
||||
};
|
||||
|
||||
#if !defined(__SYCL_DEVICE_ONLY__)
|
||||
template<>
|
||||
struct TensorLinearQuadraticBezierSurface<Vec2fa>
|
||||
{
|
||||
QuadraticBezierCurve<vfloat4> LR;
|
||||
|
||||
__forceinline TensorLinearQuadraticBezierSurface() {}
|
||||
|
||||
__forceinline TensorLinearQuadraticBezierSurface(const TensorLinearQuadraticBezierSurface<Vec2fa>& curve)
|
||||
: LR(curve.LR) {}
|
||||
|
||||
__forceinline TensorLinearQuadraticBezierSurface& operator= (const TensorLinearQuadraticBezierSurface& other) {
|
||||
LR = other.LR; return *this;
|
||||
}
|
||||
|
||||
__forceinline TensorLinearQuadraticBezierSurface(const QuadraticBezierCurve<vfloat4>& LR)
|
||||
: LR(LR) {}
|
||||
|
||||
__forceinline BBox<Vec2fa> bounds() const
|
||||
{
|
||||
const BBox<vfloat4> b = LR.bounds();
|
||||
const BBox<Vec2fa> bl(Vec2fa(b.lower),Vec2fa(b.upper));
|
||||
const BBox<Vec2fa> br(Vec2fa(shuffle<2,3,2,3>(b.lower)),Vec2fa(shuffle<2,3,2,3>(b.upper)));
|
||||
return merge(bl,br);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
template<typename V>
|
||||
struct TensorLinearCubicBezierSurface
|
||||
{
|
||||
CubicBezierCurve<V> L;
|
||||
CubicBezierCurve<V> R;
|
||||
|
||||
__forceinline TensorLinearCubicBezierSurface() {}
|
||||
|
||||
__forceinline TensorLinearCubicBezierSurface(const TensorLinearCubicBezierSurface& curve)
|
||||
: L(curve.L), R(curve.R) {}
|
||||
|
||||
__forceinline TensorLinearCubicBezierSurface& operator= (const TensorLinearCubicBezierSurface& other) {
|
||||
L = other.L; R = other.R; return *this;
|
||||
}
|
||||
|
||||
__forceinline TensorLinearCubicBezierSurface(const CubicBezierCurve<V>& L, const CubicBezierCurve<V>& R)
|
||||
: L(L), R(R) {}
|
||||
|
||||
template<template<typename T> class SourceCurve>
|
||||
__forceinline static TensorLinearCubicBezierSurface fromCenterAndNormalCurve(const SourceCurve<Vec3ff>& center, const SourceCurve<Vec3fa>& normal)
|
||||
{
|
||||
SourceCurve<Vec3ff> vcurve = center;
|
||||
SourceCurve<Vec3fa> ncurve = normal;
|
||||
|
||||
/* here we construct a patch which follows the curve l(t) =
|
||||
* p(t) +/- r(t)*normalize(cross(n(t),dp(t))) */
|
||||
|
||||
const Vec3ff p0 = vcurve.eval(0.0f);
|
||||
const Vec3ff dp0 = vcurve.eval_du(0.0f);
|
||||
//const Vec3ff ddp0 = vcurve.eval_dudu(0.0f); // ddp0 is assumed to be 0
|
||||
|
||||
const Vec3fa n0 = ncurve.eval(0.0f);
|
||||
const Vec3fa dn0 = ncurve.eval_du(0.0f);
|
||||
|
||||
const Vec3ff p1 = vcurve.eval(1.0f);
|
||||
const Vec3ff dp1 = vcurve.eval_du(1.0f);
|
||||
//const Vec3ff ddp1 = vcurve.eval_dudu(1.0f); // ddp1 is assumed to be 0
|
||||
|
||||
const Vec3fa n1 = ncurve.eval(1.0f);
|
||||
const Vec3fa dn1 = ncurve.eval_du(1.0f);
|
||||
|
||||
const Vec3fa bt0 = cross(n0,dp0);
|
||||
const Vec3fa dbt0 = cross(dn0,dp0);// + cross(n0,ddp0);
|
||||
|
||||
const Vec3fa bt1 = cross(n1,dp1);
|
||||
const Vec3fa dbt1 = cross(dn1,dp1);// + cross(n1,ddp1);
|
||||
|
||||
const Vec3fa k0 = normalize(bt0);
|
||||
const Vec3fa dk0 = dnormalize(bt0,dbt0);
|
||||
|
||||
const Vec3fa k1 = normalize(bt1);
|
||||
const Vec3fa dk1 = dnormalize(bt1,dbt1);
|
||||
|
||||
const Vec3fa l0 = p0 - p0.w*k0;
|
||||
const Vec3fa dl0 = dp0 - (dp0.w*k0 + p0.w*dk0);
|
||||
|
||||
const Vec3fa r0 = p0 + p0.w*k0;
|
||||
const Vec3fa dr0 = dp0 + (dp0.w*k0 + p0.w*dk0);
|
||||
|
||||
const Vec3fa l1 = p1 - p1.w*k1;
|
||||
const Vec3fa dl1 = dp1 - (dp1.w*k1 + p1.w*dk1);
|
||||
|
||||
const Vec3fa r1 = p1 + p1.w*k1;
|
||||
const Vec3fa dr1 = dp1 + (dp1.w*k1 + p1.w*dk1);
|
||||
|
||||
const float scale = 1.0f/3.0f;
|
||||
CubicBezierCurve<V> L(l0,l0+scale*dl0,l1-scale*dl1,l1);
|
||||
CubicBezierCurve<V> R(r0,r0+scale*dr0,r1-scale*dr1,r1);
|
||||
return TensorLinearCubicBezierSurface(L,R);
|
||||
}
|
||||
|
||||
__forceinline BBox<V> bounds() const {
|
||||
return merge(L.bounds(),R.bounds());
|
||||
}
|
||||
|
||||
__forceinline BBox3fa accurateBounds() const {
|
||||
return merge(L.accurateBounds(),R.accurateBounds());
|
||||
}
|
||||
|
||||
__forceinline CubicBezierCurve<Interval1f> reduce_v() const {
|
||||
return merge(CubicBezierCurve<Interval<V>>(L),CubicBezierCurve<Interval<V>>(R));
|
||||
}
|
||||
|
||||
__forceinline LinearBezierCurve<Interval1f> reduce_u() const {
|
||||
return LinearBezierCurve<Interval1f>(L.bounds(),R.bounds());
|
||||
}
|
||||
|
||||
__forceinline TensorLinearCubicBezierSurface<float> xfm(const V& dx) const {
|
||||
return TensorLinearCubicBezierSurface<float>(L.xfm(dx),R.xfm(dx));
|
||||
}
|
||||
|
||||
template<int W>
|
||||
__forceinline TensorLinearCubicBezierSurface<vfloat<W>> vxfm(const V& dx) const {
|
||||
return TensorLinearCubicBezierSurface<vfloat<W>>(L.template vxfm<W>(dx),R.template vxfm<W>(dx));
|
||||
}
|
||||
|
||||
__forceinline TensorLinearCubicBezierSurface<float> xfm(const V& dx, const V& p) const {
|
||||
return TensorLinearCubicBezierSurface<float>(L.xfm(dx,p),R.xfm(dx,p));
|
||||
}
|
||||
|
||||
__forceinline TensorLinearCubicBezierSurface<Vec3fa> xfm(const LinearSpace3fa& space) const {
|
||||
return TensorLinearCubicBezierSurface(L.xfm(space),R.xfm(space));
|
||||
}
|
||||
|
||||
__forceinline TensorLinearCubicBezierSurface<Vec3fa> xfm(const LinearSpace3fa& space, const Vec3fa& p) const {
|
||||
return TensorLinearCubicBezierSurface(L.xfm(space,p),R.xfm(space,p));
|
||||
}
|
||||
|
||||
__forceinline TensorLinearCubicBezierSurface<Vec3fa> xfm(const LinearSpace3fa& space, const Vec3fa& p, const float s) const {
|
||||
return TensorLinearCubicBezierSurface(L.xfm(space,p,s),R.xfm(space,p,s));
|
||||
}
|
||||
|
||||
__forceinline TensorLinearCubicBezierSurface clip_u(const Interval1f& u) const {
|
||||
return TensorLinearCubicBezierSurface(L.clip(u),R.clip(u));
|
||||
}
|
||||
|
||||
__forceinline TensorLinearCubicBezierSurface clip_v(const Interval1f& v) const {
|
||||
return TensorLinearCubicBezierSurface(clerp(L,R,V(v.lower)),clerp(L,R,V(v.upper)));
|
||||
}
|
||||
|
||||
__forceinline TensorLinearCubicBezierSurface clip(const Interval1f& u, const Interval1f& v) const {
|
||||
return clip_v(v).clip_u(u);
|
||||
}
|
||||
|
||||
__forceinline void split_u(TensorLinearCubicBezierSurface& left, TensorLinearCubicBezierSurface& right, const float u = 0.5f) const
|
||||
{
|
||||
CubicBezierCurve<V> L0,L1; L.split(L0,L1,u);
|
||||
CubicBezierCurve<V> R0,R1; R.split(R0,R1,u);
|
||||
new (&left ) TensorLinearCubicBezierSurface(L0,R0);
|
||||
new (&right) TensorLinearCubicBezierSurface(L1,R1);
|
||||
}
|
||||
|
||||
__forceinline TensorLinearCubicBezierSurface<Vec2vfx> vsplit_u(vboolx& valid, const BBox1f& u) const {
|
||||
valid = true; clear(valid,VSIZEX-1);
|
||||
return TensorLinearCubicBezierSurface<Vec2vfx>(L.split(u),R.split(u));
|
||||
}
|
||||
|
||||
template<int W>
|
||||
__forceinline TensorLinearCubicBezierSurface<Vec2vf<W>> vsplit_u(vbool<W>& valid, const BBox1f& u, int& i, int N) const
|
||||
{
|
||||
valid = true; clear(valid,W-1);
|
||||
auto r = TensorLinearCubicBezierSurface<Vec2vf<W>>(L.template split<W>(u,i,N),R.template split<W>(u,i,N));
|
||||
i += W-1;
|
||||
return r;
|
||||
}
|
||||
|
||||
__forceinline V eval(const float u, const float v) const {
|
||||
return clerp(L,R,V(v)).eval(u);
|
||||
}
|
||||
|
||||
__forceinline V eval_du(const float u, const float v) const {
|
||||
return clerp(L,R,V(v)).eval_dt(u);
|
||||
}
|
||||
|
||||
__forceinline V eval_dv(const float u, const float v) const {
|
||||
return (R-L).eval(u);
|
||||
}
|
||||
|
||||
__forceinline void eval(const float u, const float v, V& p, V& dpdu, V& dpdv) const
|
||||
{
|
||||
V p0, dp0du; L.eval(u,p0,dp0du);
|
||||
V p1, dp1du; R.eval(u,p1,dp1du);
|
||||
p = lerp(p0,p1,v);
|
||||
dpdu = lerp(dp0du,dp1du,v);
|
||||
dpdv = p1-p0;
|
||||
}
|
||||
|
||||
__forceinline TensorLinearQuadraticBezierSurface<V> derivative_u() const {
|
||||
return TensorLinearQuadraticBezierSurface<V>(L.derivative(),R.derivative());
|
||||
}
|
||||
|
||||
__forceinline CubicBezierCurve<V> derivative_v() const {
|
||||
return R-L;
|
||||
}
|
||||
|
||||
__forceinline V axis_u() const {
|
||||
return (L.end()-L.begin())+(R.end()-R.begin());
|
||||
}
|
||||
|
||||
__forceinline V axis_v() const {
|
||||
return (R.begin()-L.begin())+(R.end()-L.end());
|
||||
}
|
||||
|
||||
friend embree_ostream operator<<(embree_ostream cout, const TensorLinearCubicBezierSurface& a)
|
||||
{
|
||||
return cout << "TensorLinearCubicBezierSurface" << embree_endl
|
||||
<< "{" << embree_endl
|
||||
<< " L = " << a.L << ", " << embree_endl
|
||||
<< " R = " << a.R << embree_endl
|
||||
<< "}";
|
||||
}
|
||||
|
||||
friend __forceinline TensorLinearCubicBezierSurface clerp(const TensorLinearCubicBezierSurface& a, const TensorLinearCubicBezierSurface& b, const float t) {
|
||||
return TensorLinearCubicBezierSurface(clerp(a.L,b.L,V(t)), clerp(a.R,b.R,V(t)));
|
||||
}
|
||||
};
|
||||
|
||||
#if !defined(__SYCL_DEVICE_ONLY__)
|
||||
|
||||
template<>
|
||||
struct TensorLinearCubicBezierSurface<Vec2fa>
|
||||
{
|
||||
CubicBezierCurve<vfloat4> LR;
|
||||
|
||||
__forceinline TensorLinearCubicBezierSurface() {}
|
||||
|
||||
__forceinline TensorLinearCubicBezierSurface(const TensorLinearCubicBezierSurface& curve)
|
||||
: LR(curve.LR) {}
|
||||
|
||||
__forceinline TensorLinearCubicBezierSurface& operator= (const TensorLinearCubicBezierSurface& other) {
|
||||
LR = other.LR; return *this;
|
||||
}
|
||||
|
||||
__forceinline TensorLinearCubicBezierSurface(const CubicBezierCurve<vfloat4>& LR)
|
||||
: LR(LR) {}
|
||||
|
||||
__forceinline TensorLinearCubicBezierSurface(const CubicBezierCurve<Vec2fa>& L, const CubicBezierCurve<Vec2fa>& R)
|
||||
: LR(shuffle<0,1,0,1>(vfloat4(L.v0),vfloat4(R.v0)),shuffle<0,1,0,1>(vfloat4(L.v1),vfloat4(R.v1)),shuffle<0,1,0,1>(vfloat4(L.v2),vfloat4(R.v2)),shuffle<0,1,0,1>(vfloat4(L.v3),vfloat4(R.v3))) {}
|
||||
|
||||
__forceinline CubicBezierCurve<Vec2fa> getL() const {
|
||||
return CubicBezierCurve<Vec2fa>(Vec2fa(LR.v0),Vec2fa(LR.v1),Vec2fa(LR.v2),Vec2fa(LR.v3));
|
||||
}
|
||||
|
||||
__forceinline CubicBezierCurve<Vec2fa> getR() const {
|
||||
return CubicBezierCurve<Vec2fa>(Vec2fa(shuffle<2,3,2,3>(LR.v0)),Vec2fa(shuffle<2,3,2,3>(LR.v1)),Vec2fa(shuffle<2,3,2,3>(LR.v2)),Vec2fa(shuffle<2,3,2,3>(LR.v3)));
|
||||
}
|
||||
|
||||
__forceinline BBox<Vec2fa> bounds() const
|
||||
{
|
||||
const BBox<vfloat4> b = LR.bounds();
|
||||
const BBox<Vec2fa> bl(Vec2fa(b.lower),Vec2fa(b.upper));
|
||||
const BBox<Vec2fa> br(Vec2fa(shuffle<2,3,2,3>(b.lower)),Vec2fa(shuffle<2,3,2,3>(b.upper)));
|
||||
return merge(bl,br);
|
||||
}
|
||||
|
||||
__forceinline BBox1f bounds(const Vec2fa& axis) const
|
||||
{
|
||||
const CubicBezierCurve<vfloat4> LRx = LR;
|
||||
const CubicBezierCurve<vfloat4> LRy(shuffle<1,0,3,2>(LR.v0),shuffle<1,0,3,2>(LR.v1),shuffle<1,0,3,2>(LR.v2),shuffle<1,0,3,2>(LR.v3));
|
||||
const CubicBezierCurve<vfloat4> LRa = cmadd(shuffle<0>(vfloat4(axis)),LRx,shuffle<1>(vfloat4(axis))*LRy);
|
||||
const BBox<vfloat4> Lb = LRa.bounds();
|
||||
const BBox<vfloat4> Rb(shuffle<3>(Lb.lower),shuffle<3>(Lb.upper));
|
||||
const BBox<vfloat4> b = merge(Lb,Rb);
|
||||
return BBox1f(b.lower[0],b.upper[0]);
|
||||
}
|
||||
|
||||
__forceinline TensorLinearCubicBezierSurface<float> xfm(const Vec2fa& dx) const
|
||||
{
|
||||
const CubicBezierCurve<vfloat4> LRx = LR;
|
||||
const CubicBezierCurve<vfloat4> LRy(shuffle<1,0,3,2>(LR.v0),shuffle<1,0,3,2>(LR.v1),shuffle<1,0,3,2>(LR.v2),shuffle<1,0,3,2>(LR.v3));
|
||||
const CubicBezierCurve<vfloat4> LRa = cmadd(shuffle<0>(vfloat4(dx)),LRx,shuffle<1>(vfloat4(dx))*LRy);
|
||||
return TensorLinearCubicBezierSurface<float>(CubicBezierCurve<float>(LRa.v0[0],LRa.v1[0],LRa.v2[0],LRa.v3[0]),
|
||||
CubicBezierCurve<float>(LRa.v0[2],LRa.v1[2],LRa.v2[2],LRa.v3[2]));
|
||||
}
|
||||
|
||||
__forceinline TensorLinearCubicBezierSurface<float> xfm(const Vec2fa& dx, const Vec2fa& p) const
|
||||
{
|
||||
const vfloat4 pxyxy = shuffle<0,1,0,1>(vfloat4(p));
|
||||
const CubicBezierCurve<vfloat4> LRx = LR-pxyxy;
|
||||
const CubicBezierCurve<vfloat4> LRy(shuffle<1,0,3,2>(LR.v0),shuffle<1,0,3,2>(LR.v1),shuffle<1,0,3,2>(LR.v2),shuffle<1,0,3,2>(LR.v3));
|
||||
const CubicBezierCurve<vfloat4> LRa = cmadd(shuffle<0>(vfloat4(dx)),LRx,shuffle<1>(vfloat4(dx))*LRy);
|
||||
return TensorLinearCubicBezierSurface<float>(CubicBezierCurve<float>(LRa.v0[0],LRa.v1[0],LRa.v2[0],LRa.v3[0]),
|
||||
CubicBezierCurve<float>(LRa.v0[2],LRa.v1[2],LRa.v2[2],LRa.v3[2]));
|
||||
}
|
||||
|
||||
__forceinline TensorLinearCubicBezierSurface clip_u(const Interval1f& u) const {
|
||||
return TensorLinearCubicBezierSurface(LR.clip(u));
|
||||
}
|
||||
|
||||
__forceinline TensorLinearCubicBezierSurface clip_v(const Interval1f& v) const
|
||||
{
|
||||
const CubicBezierCurve<vfloat4> LL(shuffle<0,1,0,1>(LR.v0),shuffle<0,1,0,1>(LR.v1),shuffle<0,1,0,1>(LR.v2),shuffle<0,1,0,1>(LR.v3));
|
||||
const CubicBezierCurve<vfloat4> RR(shuffle<2,3,2,3>(LR.v0),shuffle<2,3,2,3>(LR.v1),shuffle<2,3,2,3>(LR.v2),shuffle<2,3,2,3>(LR.v3));
|
||||
return TensorLinearCubicBezierSurface(clerp(LL,RR,vfloat4(v.lower,v.lower,v.upper,v.upper)));
|
||||
}
|
||||
|
||||
__forceinline TensorLinearCubicBezierSurface clip(const Interval1f& u, const Interval1f& v) const {
|
||||
return clip_v(v).clip_u(u);
|
||||
}
|
||||
|
||||
__forceinline void split_u(TensorLinearCubicBezierSurface& left, TensorLinearCubicBezierSurface& right, const float u = 0.5f) const
|
||||
{
|
||||
CubicBezierCurve<vfloat4> LR0,LR1; LR.split(LR0,LR1,u);
|
||||
new (&left ) TensorLinearCubicBezierSurface(LR0);
|
||||
new (&right) TensorLinearCubicBezierSurface(LR1);
|
||||
}
|
||||
|
||||
__forceinline TensorLinearCubicBezierSurface<Vec2vfx> vsplit_u(vboolx& valid, const BBox1f& u) const {
|
||||
valid = true; clear(valid,VSIZEX-1);
|
||||
return TensorLinearCubicBezierSurface<Vec2vfx>(getL().split(u),getR().split(u));
|
||||
}
|
||||
|
||||
template<int W>
|
||||
__forceinline TensorLinearCubicBezierSurface<Vec2vf<W>> vsplit_u(vbool<W>& valid, const BBox1f& u, int& i, int N) const {
|
||||
valid = true; clear(valid,W-1);
|
||||
auto r = TensorLinearCubicBezierSurface<Vec2vf<W>>(getL().split<W>(u,i,N),getR().split<W>(u,i,N));
|
||||
i += W-1;
|
||||
return r;
|
||||
}
|
||||
|
||||
__forceinline Vec2fa eval(const float u, const float v) const
|
||||
{
|
||||
const vfloat4 p = LR.eval(u);
|
||||
return Vec2fa(lerp(shuffle<0,1,0,1>(p),shuffle<2,3,2,3>(p),v));
|
||||
}
|
||||
|
||||
__forceinline Vec2fa eval_du(const float u, const float v) const
|
||||
{
|
||||
const vfloat4 dpdu = LR.eval_dt(u);
|
||||
return Vec2fa(lerp(shuffle<0,1,0,1>(dpdu),shuffle<2,3,2,3>(dpdu),v));
|
||||
}
|
||||
|
||||
__forceinline Vec2fa eval_dv(const float u, const float v) const
|
||||
{
|
||||
const vfloat4 p = LR.eval(u);
|
||||
return Vec2fa(shuffle<2,3,2,3>(p)-shuffle<0,1,0,1>(p));
|
||||
}
|
||||
|
||||
__forceinline void eval(const float u, const float v, Vec2fa& p, Vec2fa& dpdu, Vec2fa& dpdv) const
|
||||
{
|
||||
vfloat4 p0, dp0du; LR.eval(u,p0,dp0du);
|
||||
p = Vec2fa(lerp(shuffle<0,1,0,1>(p0),shuffle<2,3,2,3>(p0),v));
|
||||
dpdu = Vec2fa(lerp(shuffle<0,1,0,1>(dp0du),shuffle<2,3,2,3>(dp0du),v));
|
||||
dpdv = Vec2fa(shuffle<2,3,2,3>(p0)-shuffle<0,1,0,1>(p0));
|
||||
}
|
||||
|
||||
__forceinline TensorLinearQuadraticBezierSurface<Vec2fa> derivative_u() const {
|
||||
return TensorLinearQuadraticBezierSurface<Vec2fa>(LR.derivative());
|
||||
}
|
||||
|
||||
__forceinline CubicBezierCurve<Vec2fa> derivative_v() const {
|
||||
return getR()-getL();
|
||||
}
|
||||
|
||||
__forceinline Vec2fa axis_u() const
|
||||
{
|
||||
const CubicBezierCurve<Vec2fa> L = getL();
|
||||
const CubicBezierCurve<Vec2fa> R = getR();
|
||||
return (L.end()-L.begin())+(R.end()-R.begin());
|
||||
}
|
||||
|
||||
__forceinline Vec2fa axis_v() const
|
||||
{
|
||||
const CubicBezierCurve<Vec2fa> L = getL();
|
||||
const CubicBezierCurve<Vec2fa> R = getR();
|
||||
return (R.begin()-L.begin())+(R.end()-L.end());
|
||||
}
|
||||
|
||||
friend embree_ostream operator<<(embree_ostream cout, const TensorLinearCubicBezierSurface& a)
|
||||
{
|
||||
return cout << "TensorLinearCubicBezierSurface" << embree_endl
|
||||
<< "{" << embree_endl
|
||||
<< " L = " << a.getL() << ", " << embree_endl
|
||||
<< " R = " << a.getR() << embree_endl
|
||||
<< "}";
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
__forceinline TensorLinearCubicBezierSurface<Vec2f> TensorLinearCubicBezierSurface<Vec2fa>::vsplit_u<1>(bool& valid, const BBox1f& u, int& i, int N) const {
|
||||
auto r = TensorLinearCubicBezierSurface<Vec2f>(getL().split1(u,i,N),getR().split1(u,i,N));
|
||||
valid = true; i += 1;
|
||||
return r;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
template<> template<>
|
||||
__forceinline TensorLinearCubicBezierSurface<Vec2f> TensorLinearCubicBezierSurface<Vec2fa>::vsplit_u<1>(bool& valid, const BBox1f& u, int& i, int N) const {
|
||||
auto r = TensorLinearCubicBezierSurface<Vec2f>(L.split1(u,i,N),R.split1(u,i,N));
|
||||
valid = true; i += 1;
|
||||
return r;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
typedef TensorLinearCubicBezierSurface<float> TensorLinearCubicBezierSurface1f;
|
||||
typedef TensorLinearCubicBezierSurface<Vec2fa> TensorLinearCubicBezierSurface2fa;
|
||||
typedef TensorLinearCubicBezierSurface<Vec3fa> TensorLinearCubicBezierSurface3fa;
|
||||
}
|
||||
}
|
||||
371
Framework/external/embree/kernels/subdiv/patch.h
vendored
Normal file
371
Framework/external/embree/kernels/subdiv/patch.h
vendored
Normal file
|
|
@ -0,0 +1,371 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "catmullclark_patch.h"
|
||||
#include "bilinear_patch.h"
|
||||
#include "bspline_patch.h"
|
||||
#include "bezier_patch.h"
|
||||
#include "gregory_patch.h"
|
||||
#include "tessellation_cache.h"
|
||||
|
||||
#if 1
|
||||
#define PATCH_DEBUG_SUBDIVISION(ptr,x,y,z)
|
||||
#else
|
||||
#define PATCH_DEBUG_SUBDIVISION(ptr,x,y,z) \
|
||||
{ \
|
||||
size_t hex = (size_t)ptr; \
|
||||
for (size_t i=0; i<4; i++) hex = hex ^ (hex >> 8); \
|
||||
const float c = (float)(((hex >> 0) ^ (hex >> 4) ^ (hex >> 8) ^ (hex >> 12) ^ (hex >> 16))&0xf)/15.0f; \
|
||||
if (P) *P = Vertex(0.5f+0.5f*x,0.5f+0.5f*y,0.5f+0.5f*z,0.0f); \
|
||||
}
|
||||
#endif
|
||||
|
||||
#define PATCH_MAX_CACHE_DEPTH 2
|
||||
//#define PATCH_MIN_RESOLUTION 1 // FIXME: not yet completely implemented
|
||||
#define PATCH_MAX_EVAL_DEPTH_IRREGULAR 10 // maximum evaluation depth at irregular vertices (has to be larger or equal than PATCH_MAX_CACHE_DEPTH)
|
||||
#define PATCH_MAX_EVAL_DEPTH_CREASE 10 // maximum evaluation depth at crease features (has to be larger or equal than PATCH_MAX_CACHE_DEPTH)
|
||||
#define PATCH_USE_GREGORY 1 // 0 = no gregory, 1 = fill, 2 = as early as possible
|
||||
|
||||
#if PATCH_USE_GREGORY==2
|
||||
#define PATCH_USE_BEZIER_PATCH 1 // enable use of bezier instead of b-spline patches
|
||||
#else
|
||||
#define PATCH_USE_BEZIER_PATCH 0 // enable use of bezier instead of b-spline patches
|
||||
#endif
|
||||
|
||||
#if PATCH_USE_BEZIER_PATCH
|
||||
# define RegularPatch BezierPatch
|
||||
# define RegularPatchT BezierPatchT<Vertex,Vertex_t>
|
||||
#else
|
||||
# define RegularPatch BSplinePatch
|
||||
# define RegularPatchT BSplinePatchT<Vertex,Vertex_t>
|
||||
#endif
|
||||
|
||||
#if PATCH_USE_GREGORY
|
||||
#define IrregularFillPatch GregoryPatch
|
||||
#define IrregularFillPatchT GregoryPatchT<Vertex,Vertex_t>
|
||||
#else
|
||||
#define IrregularFillPatch BilinearPatch
|
||||
#define IrregularFillPatchT BilinearPatchT<Vertex,Vertex_t>
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<typename Vertex, typename Vertex_t = Vertex>
|
||||
struct __aligned(64) PatchT
|
||||
{
|
||||
public:
|
||||
|
||||
typedef GeneralCatmullClarkPatchT<Vertex,Vertex_t> GeneralCatmullClarkPatch;
|
||||
typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch;
|
||||
typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClarkRing;
|
||||
typedef BezierCurveT<Vertex> BezierCurve;
|
||||
|
||||
enum Type {
|
||||
INVALID_PATCH = 0,
|
||||
BILINEAR_PATCH = 1,
|
||||
BSPLINE_PATCH = 2,
|
||||
BEZIER_PATCH = 3,
|
||||
GREGORY_PATCH = 4,
|
||||
SUBDIVIDED_GENERAL_PATCH = 7,
|
||||
SUBDIVIDED_QUAD_PATCH = 8,
|
||||
EVAL_PATCH = 9,
|
||||
};
|
||||
|
||||
struct Ref
|
||||
{
|
||||
__forceinline Ref(void* p = nullptr)
|
||||
: ptr((size_t)p) {}
|
||||
|
||||
__forceinline operator bool() const { return ptr != 0; }
|
||||
__forceinline operator size_t() const { return ptr; }
|
||||
|
||||
__forceinline Ref (Type ty, void* in)
|
||||
: ptr(((size_t)in)+ty) { assert((((size_t)in) & 0xF) == 0); }
|
||||
|
||||
__forceinline Type type () const { return (Type)(ptr & 0xF); }
|
||||
__forceinline void* object() const { return (void*) (ptr & ~0xF); }
|
||||
|
||||
size_t ptr;
|
||||
};
|
||||
|
||||
struct EvalPatch
|
||||
{
|
||||
/* creates EvalPatch from a CatmullClarkPatch */
|
||||
template<typename Allocator>
|
||||
__noinline static Ref create(const Allocator& alloc, const CatmullClarkPatch& patch)
|
||||
{
|
||||
size_t ofs = 0, bytes = patch.bytes();
|
||||
void* ptr = alloc(bytes);
|
||||
patch.serialize(ptr,ofs);
|
||||
assert(ofs == bytes);
|
||||
return Ref(EVAL_PATCH, ptr);
|
||||
}
|
||||
};
|
||||
|
||||
struct BilinearPatch
|
||||
{
|
||||
/* creates BilinearPatch from a CatmullClarkPatch */
|
||||
template<typename Allocator>
|
||||
__noinline static Ref create(const Allocator& alloc, const CatmullClarkPatch& patch,
|
||||
const BezierCurve* border0, const BezierCurve* border1, const BezierCurve* border2, const BezierCurve* border3) {
|
||||
return Ref(BILINEAR_PATCH, new (alloc(sizeof(BilinearPatch))) BilinearPatch(patch));
|
||||
}
|
||||
|
||||
__forceinline BilinearPatch (const CatmullClarkPatch& patch)
|
||||
: patch(patch) {}
|
||||
|
||||
/* creates BilinearPatch from 4 vertices */
|
||||
template<typename Allocator>
|
||||
__noinline static Ref create(const Allocator& alloc, const HalfEdge* edge, const char* vertices, size_t stride) {
|
||||
return Ref(BILINEAR_PATCH, new (alloc(sizeof(BilinearPatch))) BilinearPatch(edge,vertices,stride));
|
||||
}
|
||||
|
||||
__forceinline BilinearPatch (const HalfEdge* edge, const char* vertices, size_t stride)
|
||||
: patch(edge,vertices,stride) {}
|
||||
|
||||
public:
|
||||
BilinearPatchT<Vertex,Vertex_t> patch;
|
||||
};
|
||||
|
||||
struct BSplinePatch
|
||||
{
|
||||
/* creates BSplinePatch from a half edge */
|
||||
template<typename Allocator>
|
||||
__noinline static Ref create(const Allocator& alloc, const HalfEdge* edge, const char* vertices, size_t stride) {
|
||||
return Ref(BSPLINE_PATCH, new (alloc(sizeof(BSplinePatch))) BSplinePatch(edge,vertices,stride));
|
||||
}
|
||||
|
||||
__forceinline BSplinePatch (const HalfEdge* edge, const char* vertices, size_t stride)
|
||||
: patch(edge,vertices,stride) {}
|
||||
|
||||
/* creates BSplinePatch from a CatmullClarkPatch */
|
||||
template<typename Allocator>
|
||||
__noinline static Ref create(const Allocator& alloc, const CatmullClarkPatch& patch,
|
||||
const BezierCurve* border0, const BezierCurve* border1, const BezierCurve* border2, const BezierCurve* border3) {
|
||||
return Ref(BSPLINE_PATCH, new (alloc(sizeof(BSplinePatch))) BSplinePatch(patch,border0,border1,border2,border3));
|
||||
}
|
||||
|
||||
__forceinline BSplinePatch (const CatmullClarkPatch& patch, const BezierCurve* border0, const BezierCurve* border1, const BezierCurve* border2, const BezierCurve* border3)
|
||||
: patch(patch,border0,border1,border2,border3) {}
|
||||
|
||||
public:
|
||||
BSplinePatchT<Vertex,Vertex_t> patch;
|
||||
};
|
||||
|
||||
struct BezierPatch
|
||||
{
|
||||
/* creates BezierPatch from a half edge */
|
||||
template<typename Allocator>
|
||||
__noinline static Ref create(const Allocator& alloc, const HalfEdge* edge, const char* vertices, size_t stride) {
|
||||
return Ref(BEZIER_PATCH, new (alloc(sizeof(BezierPatch))) BezierPatch(edge,vertices,stride));
|
||||
}
|
||||
|
||||
__forceinline BezierPatch (const HalfEdge* edge, const char* vertices, size_t stride)
|
||||
: patch(edge,vertices,stride) {}
|
||||
|
||||
/* creates Bezier from a CatmullClarkPatch */
|
||||
template<typename Allocator>
|
||||
__noinline static Ref create(const Allocator& alloc, const CatmullClarkPatch& patch,
|
||||
const BezierCurve* border0, const BezierCurve* border1, const BezierCurve* border2, const BezierCurve* border3) {
|
||||
return Ref(BEZIER_PATCH, new (alloc(sizeof(BezierPatch))) BezierPatch(patch,border0,border1,border2,border3));
|
||||
}
|
||||
|
||||
__forceinline BezierPatch (const CatmullClarkPatch& patch, const BezierCurve* border0, const BezierCurve* border1, const BezierCurve* border2, const BezierCurve* border3)
|
||||
: patch(patch,border0,border1,border2,border3) {}
|
||||
|
||||
public:
|
||||
BezierPatchT<Vertex,Vertex_t> patch;
|
||||
};
|
||||
|
||||
struct GregoryPatch
|
||||
{
|
||||
/* creates GregoryPatch from half edge */
|
||||
template<typename Allocator>
|
||||
__noinline static Ref create(const Allocator& alloc, const HalfEdge* edge, const char* vertices, size_t stride) {
|
||||
return Ref(GREGORY_PATCH, new (alloc(sizeof(GregoryPatch))) GregoryPatch(edge,vertices,stride));
|
||||
}
|
||||
|
||||
__forceinline GregoryPatch (const HalfEdge* edge, const char* vertices, size_t stride)
|
||||
: patch(CatmullClarkPatch(edge,vertices,stride)) {}
|
||||
|
||||
/* creates GregoryPatch from CatmullClarkPatch */
|
||||
template<typename Allocator>
|
||||
__noinline static Ref create(const Allocator& alloc, const CatmullClarkPatch& patch,
|
||||
const BezierCurve* border0, const BezierCurve* border1, const BezierCurve* border2, const BezierCurve* border3) {
|
||||
return Ref(GREGORY_PATCH, new (alloc(sizeof(GregoryPatch))) GregoryPatch(patch,border0,border1,border2,border3));
|
||||
}
|
||||
|
||||
__forceinline GregoryPatch (const CatmullClarkPatch& patch, const BezierCurve* border0, const BezierCurve* border1, const BezierCurve* border2, const BezierCurve* border3)
|
||||
: patch(patch,border0,border1,border2,border3) {}
|
||||
|
||||
public:
|
||||
GregoryPatchT<Vertex,Vertex_t> patch;
|
||||
};
|
||||
|
||||
struct SubdividedQuadPatch
|
||||
{
|
||||
template<typename Allocator>
|
||||
__noinline static Ref create(const Allocator& alloc, Ref children[4]) {
|
||||
return Ref(SUBDIVIDED_QUAD_PATCH, new (alloc(sizeof(SubdividedQuadPatch))) SubdividedQuadPatch(children));
|
||||
}
|
||||
|
||||
__forceinline SubdividedQuadPatch(Ref children[4]) {
|
||||
for (size_t i=0; i<4; i++) child[i] = children[i];
|
||||
}
|
||||
|
||||
public:
|
||||
Ref child[4];
|
||||
};
|
||||
|
||||
struct SubdividedGeneralPatch
|
||||
{
|
||||
template<typename Allocator>
|
||||
__noinline static Ref create(const Allocator& alloc, Ref* children, const unsigned N) {
|
||||
return Ref(SUBDIVIDED_GENERAL_PATCH, new (alloc(sizeof(SubdividedGeneralPatch))) SubdividedGeneralPatch(children,N));
|
||||
}
|
||||
|
||||
__forceinline SubdividedGeneralPatch(Ref* children, const unsigned N) : N(N) {
|
||||
for (unsigned i=0; i<N; i++) child[i] = children[i];
|
||||
}
|
||||
|
||||
unsigned N;
|
||||
Ref child[MAX_PATCH_VALENCE];
|
||||
};
|
||||
|
||||
/*! Default constructor. */
|
||||
__forceinline PatchT () {}
|
||||
|
||||
template<typename Allocator>
|
||||
__noinline static Ref create(const Allocator& alloc, const HalfEdge* edge, const char* vertices, size_t stride)
|
||||
{
|
||||
if (PATCH_MAX_CACHE_DEPTH == 0)
|
||||
return nullptr;
|
||||
|
||||
Ref child(0);
|
||||
switch (edge->patch_type) {
|
||||
case HalfEdge::BILINEAR_PATCH: child = BilinearPatch::create(alloc,edge,vertices,stride); break;
|
||||
case HalfEdge::REGULAR_QUAD_PATCH: child = RegularPatch::create(alloc,edge,vertices,stride); break;
|
||||
#if PATCH_USE_GREGORY == 2
|
||||
case HalfEdge::IRREGULAR_QUAD_PATCH: child = GregoryPatch::create(alloc,edge,vertices,stride); break;
|
||||
#endif
|
||||
default: {
|
||||
GeneralCatmullClarkPatch patch(edge,vertices,stride);
|
||||
child = PatchT::create(alloc,patch,edge,vertices,stride,0);
|
||||
}
|
||||
}
|
||||
return child;
|
||||
}
|
||||
|
||||
template<typename Allocator>
|
||||
__noinline static Ref create(const Allocator& alloc, GeneralCatmullClarkPatch& patch, const HalfEdge* edge, const char* vertices, size_t stride, size_t depth)
|
||||
{
|
||||
/* convert into standard quad patch if possible */
|
||||
if (likely(patch.isQuadPatch()))
|
||||
{
|
||||
CatmullClarkPatch qpatch; patch.init(qpatch);
|
||||
return PatchT::create(alloc,qpatch,edge,vertices,stride,depth);
|
||||
}
|
||||
|
||||
/* do only cache up to some depth */
|
||||
if (depth >= PATCH_MAX_CACHE_DEPTH)
|
||||
return nullptr;
|
||||
|
||||
/* subdivide patch */
|
||||
unsigned N;
|
||||
array_t<CatmullClarkPatch,GeneralCatmullClarkPatch::SIZE> patches;
|
||||
patch.subdivide(patches,N);
|
||||
|
||||
if (N == 4)
|
||||
{
|
||||
Ref child[4];
|
||||
#if PATCH_USE_GREGORY == 2
|
||||
BezierCurve borders[GeneralCatmullClarkPatch::SIZE]; patch.getLimitBorder(borders);
|
||||
BezierCurve border0l,border0r; borders[0].subdivide(border0l,border0r);
|
||||
BezierCurve border1l,border1r; borders[1].subdivide(border1l,border1r);
|
||||
BezierCurve border2l,border2r; borders[2].subdivide(border2l,border2r);
|
||||
BezierCurve border3l,border3r; borders[3].subdivide(border3l,border3r);
|
||||
GeneralCatmullClarkPatch::fix_quad_ring_order(patches);
|
||||
child[0] = PatchT::create(alloc,patches[0],edge,vertices,stride,depth+1,&border0l,nullptr,nullptr,&border3r);
|
||||
child[1] = PatchT::create(alloc,patches[1],edge,vertices,stride,depth+1,&border0r,&border1l,nullptr,nullptr);
|
||||
child[2] = PatchT::create(alloc,patches[2],edge,vertices,stride,depth+1,nullptr,&border1r,&border2l,nullptr);
|
||||
child[3] = PatchT::create(alloc,patches[3],edge,vertices,stride,depth+1,nullptr,nullptr,&border2r,&border3l);
|
||||
#else
|
||||
GeneralCatmullClarkPatch::fix_quad_ring_order(patches);
|
||||
for (size_t i=0; i<4; i++)
|
||||
child[i] = PatchT::create(alloc,patches[i],edge,vertices,stride,depth+1);
|
||||
#endif
|
||||
return SubdividedQuadPatch::create(alloc,child);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(N<MAX_PATCH_VALENCE);
|
||||
Ref child[MAX_PATCH_VALENCE];
|
||||
|
||||
#if PATCH_USE_GREGORY == 2
|
||||
BezierCurve borders[GeneralCatmullClarkPatch::SIZE];
|
||||
patch.getLimitBorder(borders);
|
||||
|
||||
for (size_t i0=0; i0<N; i0++) {
|
||||
const size_t i2 = i0==0 ? N-1 : i0-1;
|
||||
BezierCurve border0l,border0r; borders[i0].subdivide(border0l,border0r);
|
||||
BezierCurve border2l,border2r; borders[i2].subdivide(border2l,border2r);
|
||||
child[i0] = PatchT::create(alloc,patches[i0],edge,vertices,stride,depth+1, &border0l, nullptr, nullptr, &border2r);
|
||||
}
|
||||
#else
|
||||
for (size_t i=0; i<N; i++)
|
||||
child[i] = PatchT::create(alloc,patches[i],edge,vertices,stride,depth+1);
|
||||
#endif
|
||||
return SubdividedGeneralPatch::create(alloc,child,N);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static __forceinline bool final(const CatmullClarkPatch& patch, const typename CatmullClarkRing::Type type, size_t depth)
|
||||
{
|
||||
const size_t max_eval_depth = (type & CatmullClarkRing::TYPE_CREASES) ? PATCH_MAX_EVAL_DEPTH_CREASE : PATCH_MAX_EVAL_DEPTH_IRREGULAR;
|
||||
//#if PATCH_MIN_RESOLUTION
|
||||
// return patch.isFinalResolution(PATCH_MIN_RESOLUTION) || depth>=max_eval_depth;
|
||||
//#else
|
||||
return depth>=max_eval_depth;
|
||||
//#endif
|
||||
}
|
||||
|
||||
template<typename Allocator>
|
||||
__noinline static Ref create(const Allocator& alloc, CatmullClarkPatch& patch, const HalfEdge* edge, const char* vertices, size_t stride, size_t depth,
|
||||
const BezierCurve* border0 = nullptr, const BezierCurve* border1 = nullptr, const BezierCurve* border2 = nullptr, const BezierCurve* border3 = nullptr)
|
||||
{
|
||||
const typename CatmullClarkPatch::Type ty = patch.type();
|
||||
if (unlikely(final(patch,ty,depth))) {
|
||||
if (ty & CatmullClarkRing::TYPE_REGULAR) return RegularPatch::create(alloc,patch,border0,border1,border2,border3);
|
||||
else return IrregularFillPatch::create(alloc,patch,border0,border1,border2,border3);
|
||||
}
|
||||
else if (ty & CatmullClarkRing::TYPE_REGULAR_CREASES) {
|
||||
assert(depth > 0); return RegularPatch::create(alloc,patch,border0,border1,border2,border3);
|
||||
}
|
||||
#if PATCH_USE_GREGORY == 2
|
||||
else if (ty & CatmullClarkRing::TYPE_GREGORY_CREASES) {
|
||||
assert(depth > 0); return GregoryPatch::create(alloc,patch,border0,border1,border2,border3);
|
||||
}
|
||||
#endif
|
||||
else if (depth >= PATCH_MAX_CACHE_DEPTH) {
|
||||
return EvalPatch::create(alloc,patch);
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
Ref child[4];
|
||||
array_t<CatmullClarkPatch,4> patches;
|
||||
patch.subdivide(patches);
|
||||
|
||||
for (size_t i=0; i<4; i++)
|
||||
child[i] = PatchT::create(alloc,patches[i],edge,vertices,stride,depth+1);
|
||||
return SubdividedQuadPatch::create(alloc,child);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
typedef PatchT<Vec3fa,Vec3fa_t> Patch3fa;
|
||||
}
|
||||
129
Framework/external/embree/kernels/subdiv/patch_eval.h
vendored
Normal file
129
Framework/external/embree/kernels/subdiv/patch_eval.h
vendored
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "patch.h"
|
||||
#include "feature_adaptive_eval.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<typename Vertex, typename Vertex_t = Vertex>
|
||||
struct PatchEval
|
||||
{
|
||||
public:
|
||||
|
||||
typedef PatchT<Vertex,Vertex_t> Patch;
|
||||
typedef typename Patch::Ref Ref;
|
||||
typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch;
|
||||
|
||||
PatchEval (SharedLazyTessellationCache::CacheEntry& entry, size_t commitCounter,
|
||||
const HalfEdge* edge, const char* vertices, size_t stride, const float u, const float v,
|
||||
Vertex* P, Vertex* dPdu, Vertex* dPdv, Vertex* ddPdudu, Vertex* ddPdvdv, Vertex* ddPdudv)
|
||||
: P(P), dPdu(dPdu), dPdv(dPdv), ddPdudu(ddPdudu), ddPdvdv(ddPdvdv), ddPdudv(ddPdudv)
|
||||
{
|
||||
/* conservative time for the very first allocation */
|
||||
auto time = SharedLazyTessellationCache::sharedLazyTessellationCache.getTime(commitCounter);
|
||||
|
||||
Ref patch = SharedLazyTessellationCache::lookup(entry,commitCounter,[&] () {
|
||||
auto alloc = [&](size_t bytes) { return SharedLazyTessellationCache::malloc(bytes); };
|
||||
return Patch::create(alloc,edge,vertices,stride);
|
||||
},true);
|
||||
|
||||
auto curTime = SharedLazyTessellationCache::sharedLazyTessellationCache.getTime(commitCounter);
|
||||
const bool allAllocationsValid = SharedLazyTessellationCache::validTime(time,curTime);
|
||||
|
||||
if (patch && allAllocationsValid && eval(patch,u,v,1.0f,0)) {
|
||||
SharedLazyTessellationCache::unlock();
|
||||
return;
|
||||
}
|
||||
SharedLazyTessellationCache::unlock();
|
||||
FeatureAdaptiveEval<Vertex,Vertex_t>(edge,vertices,stride,u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv);
|
||||
PATCH_DEBUG_SUBDIVISION(edge,c,-1,-1);
|
||||
}
|
||||
|
||||
__forceinline bool eval_quad(const typename Patch::SubdividedQuadPatch* This, const float u, const float v, const float dscale, const size_t depth)
|
||||
{
|
||||
if (v < 0.5f) {
|
||||
if (u < 0.5f) return eval(This->child[0],2.0f*u,2.0f*v,2.0f*dscale,depth+1);
|
||||
else return eval(This->child[1],2.0f*u-1.0f,2.0f*v,2.0f*dscale,depth+1);
|
||||
} else {
|
||||
if (u > 0.5f) return eval(This->child[2],2.0f*u-1.0f,2.0f*v-1.0f,2.0f*dscale,depth+1);
|
||||
else return eval(This->child[3],2.0f*u,2.0f*v-1.0f,2.0f*dscale,depth+1);
|
||||
}
|
||||
}
|
||||
|
||||
bool eval_general(const typename Patch::SubdividedGeneralPatch* This, const float U, const float V, const size_t depth)
|
||||
{
|
||||
const unsigned l = (unsigned) floor(0.5f*U); const float u = 2.0f*frac(0.5f*U)-0.5f;
|
||||
const unsigned h = (unsigned) floor(0.5f*V); const float v = 2.0f*frac(0.5f*V)-0.5f;
|
||||
const unsigned i = 4*h+l; assert(i<This->N);
|
||||
return eval(This->child[i],u,v,1.0f,depth+1);
|
||||
}
|
||||
|
||||
bool eval(Ref This, const float& u, const float& v, const float dscale, const size_t depth)
|
||||
{
|
||||
if (!This) return false;
|
||||
//PRINT(depth);
|
||||
//PRINT2(u,v);
|
||||
|
||||
switch (This.type())
|
||||
{
|
||||
case Patch::BILINEAR_PATCH: {
|
||||
//PRINT("bilinear");
|
||||
((typename Patch::BilinearPatch*)This.object())->patch.eval(u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale);
|
||||
PATCH_DEBUG_SUBDIVISION(This,-1,c,c);
|
||||
return true;
|
||||
}
|
||||
case Patch::BSPLINE_PATCH: {
|
||||
//PRINT("bspline");
|
||||
((typename Patch::BSplinePatch*)This.object())->patch.eval(u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale);
|
||||
PATCH_DEBUG_SUBDIVISION(This,-1,c,-1);
|
||||
return true;
|
||||
}
|
||||
case Patch::BEZIER_PATCH: {
|
||||
//PRINT("bezier");
|
||||
((typename Patch::BezierPatch*)This.object())->patch.eval(u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale);
|
||||
PATCH_DEBUG_SUBDIVISION(This,-1,c,-1);
|
||||
return true;
|
||||
}
|
||||
case Patch::GREGORY_PATCH: {
|
||||
//PRINT("gregory");
|
||||
((typename Patch::GregoryPatch*)This.object())->patch.eval(u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale);
|
||||
PATCH_DEBUG_SUBDIVISION(This,-1,-1,c);
|
||||
return true;
|
||||
}
|
||||
case Patch::SUBDIVIDED_QUAD_PATCH: {
|
||||
//PRINT("subdivided quad");
|
||||
return eval_quad(((typename Patch::SubdividedQuadPatch*)This.object()),u,v,dscale,depth);
|
||||
}
|
||||
case Patch::SUBDIVIDED_GENERAL_PATCH: {
|
||||
//PRINT("general_patch");
|
||||
assert(dscale == 1.0f);
|
||||
return eval_general(((typename Patch::SubdividedGeneralPatch*)This.object()),u,v,depth);
|
||||
}
|
||||
case Patch::EVAL_PATCH: {
|
||||
//PRINT("eval_patch");
|
||||
CatmullClarkPatch patch; patch.deserialize(This.object());
|
||||
FeatureAdaptiveEval<Vertex,Vertex_t>(patch,u,v,dscale,depth,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv);
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
assert(false);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
Vertex* const P;
|
||||
Vertex* const dPdu;
|
||||
Vertex* const dPdv;
|
||||
Vertex* const ddPdudu;
|
||||
Vertex* const ddPdvdv;
|
||||
Vertex* const ddPdudv;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
245
Framework/external/embree/kernels/subdiv/patch_eval_grid.h
vendored
Normal file
245
Framework/external/embree/kernels/subdiv/patch_eval_grid.h
vendored
Normal file
|
|
@ -0,0 +1,245 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "patch.h"
|
||||
#include "feature_adaptive_eval_grid.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
struct PatchEvalGrid
|
||||
{
|
||||
typedef Patch3fa Patch;
|
||||
typedef Patch::Ref Ref;
|
||||
typedef GeneralCatmullClarkPatch3fa GeneralCatmullClarkPatch;
|
||||
typedef CatmullClarkPatch3fa CatmullClarkPatch;
|
||||
typedef BSplinePatch3fa BSplinePatch;
|
||||
typedef BezierPatch3fa BezierPatch;
|
||||
typedef GregoryPatch3fa GregoryPatch;
|
||||
typedef BilinearPatch3fa BilinearPatch;
|
||||
|
||||
private:
|
||||
const unsigned x0,x1;
|
||||
const unsigned y0,y1;
|
||||
const unsigned swidth,sheight;
|
||||
const float rcp_swidth, rcp_sheight;
|
||||
float* const Px;
|
||||
float* const Py;
|
||||
float* const Pz;
|
||||
float* const U;
|
||||
float* const V;
|
||||
float* const Nx;
|
||||
float* const Ny;
|
||||
float* const Nz;
|
||||
const unsigned dwidth,dheight;
|
||||
unsigned count;
|
||||
|
||||
public:
|
||||
|
||||
PatchEvalGrid (Ref patch, unsigned subPatch,
|
||||
const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight,
|
||||
float* Px, float* Py, float* Pz, float* U, float* V,
|
||||
float* Nx, float* Ny, float* Nz,
|
||||
const unsigned dwidth, const unsigned dheight)
|
||||
: x0(x0), x1(x1), y0(y0), y1(y1), swidth(swidth), sheight(sheight), rcp_swidth(1.0f/(swidth-1.0f)), rcp_sheight(1.0f/(sheight-1.0f)),
|
||||
Px(Px), Py(Py), Pz(Pz), U(U), V(V), Nx(Nx), Ny(Ny), Nz(Nz), dwidth(dwidth), dheight(dheight), count(0)
|
||||
{
|
||||
assert(swidth < (2<<20) && sheight < (2<<20));
|
||||
const BBox2f srange(Vec2f(0.0f,0.0f),Vec2f(float(swidth-1),float(sheight-1)));
|
||||
const BBox2f erange(Vec2f(float(x0),float(y0)),Vec2f((float)x1,(float)y1));
|
||||
bool done MAYBE_UNUSED = eval(patch,subPatch,srange,erange);
|
||||
assert(done);
|
||||
assert(count == (x1-x0+1)*(y1-y0+1));
|
||||
}
|
||||
|
||||
template<typename Patch>
|
||||
__forceinline void evalLocalGrid(const Patch* patch, const BBox2f& srange, const int lx0, const int lx1, const int ly0, const int ly1)
|
||||
{
|
||||
const float scale_x = rcp(srange.upper.x-srange.lower.x);
|
||||
const float scale_y = rcp(srange.upper.y-srange.lower.y);
|
||||
count += (lx1-lx0)*(ly1-ly0);
|
||||
|
||||
#if 0
|
||||
for (unsigned iy=ly0; iy<ly1; iy++) {
|
||||
for (unsigned ix=lx0; ix<lx1; ix++) {
|
||||
const float lu = select(ix == swidth -1, float(1.0f), (float(ix)-srange.lower.x)*scale_x);
|
||||
const float lv = select(iy == sheight-1, float(1.0f), (float(iy)-srange.lower.y)*scale_y);
|
||||
const Vec3fa p = patch->patch.eval(lu,lv);
|
||||
const float u = float(ix)*rcp_swidth;
|
||||
const float v = float(iy)*rcp_sheight;
|
||||
const int ofs = (iy-y0)*dwidth+(ix-x0);
|
||||
Px[ofs] = p.x;
|
||||
Py[ofs] = p.y;
|
||||
Pz[ofs] = p.z;
|
||||
U[ofs] = u;
|
||||
V[ofs] = v;
|
||||
}
|
||||
}
|
||||
#else
|
||||
foreach2(lx0,lx1,ly0,ly1,[&](const vboolx& valid, const vintx& ix, const vintx& iy) {
|
||||
const vfloatx lu = select(ix == swidth -1, vfloatx(1.0f), (vfloatx(ix)-srange.lower.x)*scale_x);
|
||||
const vfloatx lv = select(iy == sheight-1, vfloatx(1.0f), (vfloatx(iy)-srange.lower.y)*scale_y);
|
||||
const Vec3vfx p = patch->patch.eval(lu,lv);
|
||||
Vec3vfx n = zero;
|
||||
if (unlikely(Nx != nullptr)) n = normalize_safe(patch->patch.normal(lu,lv));
|
||||
const vfloatx u = vfloatx(ix)*rcp_swidth;
|
||||
const vfloatx v = vfloatx(iy)*rcp_sheight;
|
||||
const vintx ofs = (iy-y0)*dwidth+(ix-x0);
|
||||
if (likely(all(valid)) && all(iy==iy[0])) {
|
||||
const unsigned ofs2 = ofs[0];
|
||||
vfloatx::storeu(Px+ofs2,p.x);
|
||||
vfloatx::storeu(Py+ofs2,p.y);
|
||||
vfloatx::storeu(Pz+ofs2,p.z);
|
||||
vfloatx::storeu(U+ofs2,u);
|
||||
vfloatx::storeu(V+ofs2,v);
|
||||
if (unlikely(Nx != nullptr)) {
|
||||
vfloatx::storeu(Nx+ofs2,n.x);
|
||||
vfloatx::storeu(Ny+ofs2,n.y);
|
||||
vfloatx::storeu(Nz+ofs2,n.z);
|
||||
}
|
||||
} else {
|
||||
foreach_unique_index(valid,iy,[&](const vboolx& valid, const int iy0, const int j) {
|
||||
const unsigned ofs2 = ofs[j]-j;
|
||||
vfloatx::storeu(valid,Px+ofs2,p.x);
|
||||
vfloatx::storeu(valid,Py+ofs2,p.y);
|
||||
vfloatx::storeu(valid,Pz+ofs2,p.z);
|
||||
vfloatx::storeu(valid,U+ofs2,u);
|
||||
vfloatx::storeu(valid,V+ofs2,v);
|
||||
if (unlikely(Nx != nullptr)) {
|
||||
vfloatx::storeu(valid,Nx+ofs2,n.x);
|
||||
vfloatx::storeu(valid,Ny+ofs2,n.y);
|
||||
vfloatx::storeu(valid,Nz+ofs2,n.z);
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
#endif
|
||||
}
|
||||
|
||||
bool eval(Ref This, const BBox2f& srange, const BBox2f& erange, const unsigned depth)
|
||||
{
|
||||
if (erange.empty())
|
||||
return true;
|
||||
|
||||
const int lx0 = (int) ceilf(erange.lower.x);
|
||||
const int lx1 = (int) ceilf(erange.upper.x) + (erange.upper.x == x1 && (srange.lower.x < erange.upper.x || erange.upper.x == 0));
|
||||
const int ly0 = (int) ceilf(erange.lower.y);
|
||||
const int ly1 = (int) ceilf(erange.upper.y) + (erange.upper.y == y1 && (srange.lower.y < erange.upper.y || erange.upper.y == 0));
|
||||
if (lx0 >= lx1 || ly0 >= ly1)
|
||||
return true;
|
||||
|
||||
if (!This)
|
||||
return false;
|
||||
|
||||
switch (This.type())
|
||||
{
|
||||
case Patch::BILINEAR_PATCH: {
|
||||
evalLocalGrid((Patch::BilinearPatch*)This.object(),srange,lx0,lx1,ly0,ly1);
|
||||
return true;
|
||||
}
|
||||
case Patch::BSPLINE_PATCH: {
|
||||
evalLocalGrid((Patch::BSplinePatch*)This.object(),srange,lx0,lx1,ly0,ly1);
|
||||
return true;
|
||||
}
|
||||
case Patch::BEZIER_PATCH: {
|
||||
evalLocalGrid((Patch::BezierPatch*)This.object(),srange,lx0,lx1,ly0,ly1);
|
||||
return true;
|
||||
}
|
||||
case Patch::GREGORY_PATCH: {
|
||||
evalLocalGrid((Patch::GregoryPatch*)This.object(),srange,lx0,lx1,ly0,ly1);
|
||||
return true;
|
||||
}
|
||||
case Patch::SUBDIVIDED_QUAD_PATCH:
|
||||
{
|
||||
const Vec2f c = srange.center();
|
||||
const BBox2f srange0(srange.lower,c);
|
||||
const BBox2f srange1(Vec2f(c.x,srange.lower.y),Vec2f(srange.upper.x,c.y));
|
||||
const BBox2f srange2(c,srange.upper);
|
||||
const BBox2f srange3(Vec2f(srange.lower.x,c.y),Vec2f(c.x,srange.upper.y));
|
||||
|
||||
Patch::SubdividedQuadPatch* patch = (Patch::SubdividedQuadPatch*)This.object();
|
||||
eval(patch->child[0],srange0,intersect(srange0,erange),depth+1);
|
||||
eval(patch->child[1],srange1,intersect(srange1,erange),depth+1);
|
||||
eval(patch->child[2],srange2,intersect(srange2,erange),depth+1);
|
||||
eval(patch->child[3],srange3,intersect(srange3,erange),depth+1);
|
||||
return true;
|
||||
}
|
||||
case Patch::EVAL_PATCH: {
|
||||
CatmullClarkPatch patch; patch.deserialize(This.object());
|
||||
FeatureAdaptiveEvalGrid(patch,srange,erange,depth,x0,x1,y0,y1,swidth,sheight,Px,Py,Pz,U,V,Nx,Ny,Nz,dwidth,dheight);
|
||||
count += (lx1-lx0)*(ly1-ly0);
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
assert(false);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool eval(Ref This, unsigned subPatch, const BBox2f& srange, const BBox2f& erange)
|
||||
{
|
||||
if (!This)
|
||||
return false;
|
||||
|
||||
switch (This.type())
|
||||
{
|
||||
case Patch::SUBDIVIDED_GENERAL_PATCH: {
|
||||
Patch::SubdividedGeneralPatch* patch = (Patch::SubdividedGeneralPatch*)This.object();
|
||||
assert(subPatch < patch->N);
|
||||
return eval(patch->child[subPatch],srange,erange,1);
|
||||
}
|
||||
default:
|
||||
assert(subPatch == 0);
|
||||
return eval(This,srange,erange,0);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
__forceinline unsigned patch_eval_subdivision_count (const HalfEdge* h)
|
||||
{
|
||||
const unsigned N = h->numEdges();
|
||||
if (N == 4) return 1;
|
||||
else return N;
|
||||
}
|
||||
|
||||
template<typename Tessellator>
|
||||
inline void patch_eval_subdivision (const HalfEdge* h, Tessellator tessellator)
|
||||
{
|
||||
const unsigned N = h->numEdges();
|
||||
int neighborSubdiv[GeneralCatmullClarkPatch3fa::SIZE]; // FIXME: use array_t
|
||||
float levels[GeneralCatmullClarkPatch3fa::SIZE];
|
||||
for (unsigned i=0; i<N; i++) {
|
||||
assert(i<GeneralCatmullClarkPatch3fa::SIZE);
|
||||
neighborSubdiv[i] = h->hasOpposite() ? h->opposite()->numEdges() != 4 : 0;
|
||||
levels[i] = h->edge_level;
|
||||
h = h->next();
|
||||
}
|
||||
if (N == 4)
|
||||
{
|
||||
const Vec2f uv[4] = { Vec2f(0.0f,0.0f), Vec2f(1.0f,0.0f), Vec2f(1.0f,1.0f), Vec2f(0.0f,1.0f) };
|
||||
tessellator(uv,neighborSubdiv,levels,0);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (unsigned i=0; i<N; i++)
|
||||
{
|
||||
assert(i<MAX_PATCH_VALENCE);
|
||||
static_assert(MAX_PATCH_VALENCE <= 16, "MAX_PATCH_VALENCE > 16");
|
||||
const int h = (i >> 2) & 3, l = i & 3;
|
||||
const Vec2f subPatchID((float)l,(float)h);
|
||||
const Vec2f uv[4] = { 2.0f*subPatchID + (0.5f+Vec2f(0.0f,0.0f)),
|
||||
2.0f*subPatchID + (0.5f+Vec2f(1.0f,0.0f)),
|
||||
2.0f*subPatchID + (0.5f+Vec2f(1.0f,1.0f)),
|
||||
2.0f*subPatchID + (0.5f+Vec2f(0.0f,1.0f)) };
|
||||
const int neighborSubdiv1[4] = { 0,0,0,0 };
|
||||
const float levels1[4] = { 0.5f*levels[(i+0)%N], 0.5f*levels[(i+0)%N], 0.5f*levels[(i+N-1)%N], 0.5f*levels[(i+N-1)%N] };
|
||||
tessellator(uv,neighborSubdiv1,levels1,i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
127
Framework/external/embree/kernels/subdiv/patch_eval_simd.h
vendored
Normal file
127
Framework/external/embree/kernels/subdiv/patch_eval_simd.h
vendored
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "patch.h"
|
||||
#include "feature_adaptive_eval_simd.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<typename vbool, typename vint, typename vfloat, typename Vertex, typename Vertex_t = Vertex>
|
||||
struct PatchEvalSimd
|
||||
{
|
||||
public:
|
||||
|
||||
typedef PatchT<Vertex,Vertex_t> Patch;
|
||||
typedef typename Patch::Ref Ref;
|
||||
typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch;
|
||||
|
||||
PatchEvalSimd (SharedLazyTessellationCache::CacheEntry& entry, size_t commitCounter,
|
||||
const HalfEdge* edge, const char* vertices, size_t stride, const vbool& valid0, const vfloat& u, const vfloat& v,
|
||||
float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv, const size_t dstride, const size_t N)
|
||||
: P(P), dPdu(dPdu), dPdv(dPdv), ddPdudu(ddPdudu), ddPdvdv(ddPdvdv), ddPdudv(ddPdudv), dstride(dstride), N(N)
|
||||
{
|
||||
/* conservative time for the very first allocation */
|
||||
auto time = SharedLazyTessellationCache::sharedLazyTessellationCache.getTime(commitCounter);
|
||||
|
||||
Ref patch = SharedLazyTessellationCache::lookup(entry,commitCounter,[&] () {
|
||||
auto alloc = [](size_t bytes) { return SharedLazyTessellationCache::malloc(bytes); };
|
||||
return Patch::create(alloc,edge,vertices,stride);
|
||||
}, true);
|
||||
|
||||
auto curTime = SharedLazyTessellationCache::sharedLazyTessellationCache.getTime(commitCounter);
|
||||
const bool allAllocationsValid = SharedLazyTessellationCache::validTime(time,curTime);
|
||||
|
||||
patch = allAllocationsValid ? patch : nullptr;
|
||||
|
||||
/* use cached data structure for calculations */
|
||||
const vbool valid1 = patch ? eval(valid0,patch,u,v,1.0f,0) : vbool(false);
|
||||
SharedLazyTessellationCache::unlock();
|
||||
const vbool valid2 = valid0 & !valid1;
|
||||
if (any(valid2)) {
|
||||
FeatureAdaptiveEvalSimd<vbool,vint,vfloat,Vertex,Vertex_t>(edge,vertices,stride,valid2,u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dstride,N);
|
||||
}
|
||||
}
|
||||
|
||||
vbool eval_quad(const vbool& valid, const typename Patch::SubdividedQuadPatch* This, const vfloat& u, const vfloat& v, const float dscale, const size_t depth)
|
||||
{
|
||||
vbool ret = false;
|
||||
const vbool u0_mask = u < 0.5f, u1_mask = u >= 0.5f;
|
||||
const vbool v0_mask = v < 0.5f, v1_mask = v >= 0.5f;
|
||||
const vbool u0v0_mask = valid & u0_mask & v0_mask;
|
||||
const vbool u0v1_mask = valid & u0_mask & v1_mask;
|
||||
const vbool u1v0_mask = valid & u1_mask & v0_mask;
|
||||
const vbool u1v1_mask = valid & u1_mask & v1_mask;
|
||||
if (any(u0v0_mask)) ret |= eval(u0v0_mask,This->child[0],2.0f*u,2.0f*v,2.0f*dscale,depth+1);
|
||||
if (any(u1v0_mask)) ret |= eval(u1v0_mask,This->child[1],2.0f*u-1.0f,2.0f*v,2.0f*dscale,depth+1);
|
||||
if (any(u1v1_mask)) ret |= eval(u1v1_mask,This->child[2],2.0f*u-1.0f,2.0f*v-1.0f,2.0f*dscale,depth+1);
|
||||
if (any(u0v1_mask)) ret |= eval(u0v1_mask,This->child[3],2.0f*u,2.0f*v-1.0f,2.0f*dscale,depth+1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
vbool eval_general(const vbool& valid, const typename Patch::SubdividedGeneralPatch* patch, const vfloat& U, const vfloat& V, const size_t depth)
|
||||
{
|
||||
vbool ret = false;
|
||||
const vint l = (vint)floor(0.5f*U); const vfloat u = 2.0f*frac(0.5f*U)-0.5f;
|
||||
const vint h = (vint)floor(0.5f*V); const vfloat v = 2.0f*frac(0.5f*V)-0.5f;
|
||||
const vint i = (h<<2)+l; assert(all(valid,i<patch->N));
|
||||
foreach_unique(valid,i,[&](const vbool& valid, const int i) {
|
||||
ret |= eval(valid,patch->child[i],u,v,1.0f,depth+1);
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
|
||||
vbool eval(const vbool& valid, Ref This, const vfloat& u, const vfloat& v, const float dscale, const size_t depth)
|
||||
{
|
||||
if (!This) return false;
|
||||
switch (This.type())
|
||||
{
|
||||
case Patch::BILINEAR_PATCH: {
|
||||
((typename Patch::BilinearPatch*)This.object())->patch.eval(valid,u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale,dstride,N);
|
||||
return valid;
|
||||
}
|
||||
case Patch::BSPLINE_PATCH: {
|
||||
((typename Patch::BSplinePatch*)This.object())->patch.eval(valid,u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale,dstride,N);
|
||||
return valid;
|
||||
}
|
||||
case Patch::BEZIER_PATCH: {
|
||||
((typename Patch::BezierPatch*)This.object())->patch.eval(valid,u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale,dstride,N);
|
||||
return valid;
|
||||
}
|
||||
case Patch::GREGORY_PATCH: {
|
||||
((typename Patch::GregoryPatch*)This.object())->patch.eval(valid,u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale,dstride,N);
|
||||
return valid;
|
||||
}
|
||||
case Patch::SUBDIVIDED_QUAD_PATCH: {
|
||||
return eval_quad(valid,((typename Patch::SubdividedQuadPatch*)This.object()),u,v,dscale,depth);
|
||||
}
|
||||
case Patch::SUBDIVIDED_GENERAL_PATCH: {
|
||||
assert(dscale == 1.0f);
|
||||
return eval_general(valid,((typename Patch::SubdividedGeneralPatch*)This.object()),u,v,depth);
|
||||
}
|
||||
case Patch::EVAL_PATCH: {
|
||||
CatmullClarkPatch patch; patch.deserialize(This.object());
|
||||
FeatureAdaptiveEvalSimd<vbool,vint,vfloat,Vertex,Vertex_t>(patch,valid,u,v,dscale,depth,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dstride,N);
|
||||
return valid;
|
||||
}
|
||||
default:
|
||||
assert(false);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
float* const P;
|
||||
float* const dPdu;
|
||||
float* const dPdv;
|
||||
float* const ddPdudu;
|
||||
float* const ddPdvdv;
|
||||
float* const ddPdudv;
|
||||
const size_t dstride;
|
||||
const size_t N;
|
||||
};
|
||||
}
|
||||
}
|
||||
113
Framework/external/embree/kernels/subdiv/subdivpatch1base.cpp
vendored
Normal file
113
Framework/external/embree/kernels/subdiv/subdivpatch1base.cpp
vendored
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "subdivpatch1base.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
SubdivPatch1Base::SubdivPatch1Base (const unsigned int gID,
|
||||
const unsigned int pID,
|
||||
const unsigned int subPatch,
|
||||
const SubdivMesh *const mesh,
|
||||
const size_t time,
|
||||
const Vec2f uv[4],
|
||||
const float edge_level[4],
|
||||
const int subdiv[4],
|
||||
const int simd_width)
|
||||
: flags(0), type(INVALID_PATCH), geom(gID), prim(pID), time_(unsigned(time))
|
||||
{
|
||||
static_assert(sizeof(SubdivPatch1Base) == 5 * 64, "SubdivPatch1Base has wrong size");
|
||||
|
||||
const HalfEdge* edge = mesh->getHalfEdge(0,pID);
|
||||
|
||||
if (edge->patch_type == HalfEdge::BILINEAR_PATCH)
|
||||
{
|
||||
type = BILINEAR_PATCH;
|
||||
new (patch_v) BilinearPatch3fa(edge,mesh->getVertexBuffer(time));
|
||||
}
|
||||
else if (edge->patch_type == HalfEdge::REGULAR_QUAD_PATCH)
|
||||
{
|
||||
#if PATCH_USE_BEZIER_PATCH
|
||||
type = BEZIER_PATCH;
|
||||
new (patch_v) BezierPatch3fa(BSplinePatch3fa(CatmullClarkPatch3fa(edge,mesh->getVertexBuffer(time))));
|
||||
#else
|
||||
type = BSPLINE_PATCH;
|
||||
new (patch_v) BSplinePatch3fa(CatmullClarkPatch3fa(edge,mesh->getVertexBuffer(time))); // FIXME: init BSpline directly from half edge structure
|
||||
#endif
|
||||
}
|
||||
#if PATCH_USE_GREGORY == 2
|
||||
else if (edge->patch_type == HalfEdge::IRREGULAR_QUAD_PATCH)
|
||||
{
|
||||
type = GREGORY_PATCH;
|
||||
new (patch_v) DenseGregoryPatch3fa(GregoryPatch3fa(CatmullClarkPatch3fa(edge,mesh->getVertexBuffer(time))));
|
||||
}
|
||||
#endif
|
||||
else
|
||||
{
|
||||
type = EVAL_PATCH;
|
||||
set_edge(mesh->getHalfEdge(0,pID));
|
||||
set_subPatch(subPatch);
|
||||
}
|
||||
|
||||
for (size_t i=0; i<4; i++) {
|
||||
u[i] = (unsigned short)clamp(uv[i].x * (0x10000/8.0f), 0.0f, float(0xFFFF));
|
||||
v[i] = (unsigned short)clamp(uv[i].y * (0x10000/8.0f), 0.0f, float(0xFFFF));
|
||||
}
|
||||
|
||||
updateEdgeLevels(edge_level,subdiv,mesh,simd_width);
|
||||
}
|
||||
|
||||
void SubdivPatch1Base::computeEdgeLevels(const float edge_level[4], const int subdiv[4], float level[4])
|
||||
{
|
||||
/* init discrete edge tessellation levels and grid resolution */
|
||||
assert( edge_level[0] >= 0.0f );
|
||||
assert( edge_level[1] >= 0.0f );
|
||||
assert( edge_level[2] >= 0.0f );
|
||||
assert( edge_level[3] >= 0.0f );
|
||||
|
||||
level[0] = max(ceilf(adjustTessellationLevel(edge_level[0],subdiv[0])),1.0f);
|
||||
level[1] = max(ceilf(adjustTessellationLevel(edge_level[1],subdiv[1])),1.0f);
|
||||
level[2] = max(ceilf(adjustTessellationLevel(edge_level[2],subdiv[2])),1.0f);
|
||||
level[3] = max(ceilf(adjustTessellationLevel(edge_level[3],subdiv[3])),1.0f);
|
||||
}
|
||||
|
||||
Vec2i SubdivPatch1Base::computeGridSize(const float level[4])
|
||||
{
|
||||
return Vec2i((int)max(level[0],level[2])+1,
|
||||
(int)max(level[1],level[3])+1);
|
||||
}
|
||||
|
||||
bool SubdivPatch1Base::updateEdgeLevels(const float edge_level[4], const int subdiv[4], const SubdivMesh *const mesh, const int simd_width)
|
||||
{
|
||||
/* calculate edge levels */
|
||||
float new_level[4];
|
||||
computeEdgeLevels(edge_level,subdiv,new_level);
|
||||
|
||||
/* calculate if tessellation pattern changed */
|
||||
bool grid_changed = false;
|
||||
for (size_t i=0; i<4; i++) {
|
||||
grid_changed |= (int)new_level[i] != (int)level[i];
|
||||
level[i] = new_level[i];
|
||||
}
|
||||
|
||||
/* compute grid resolution */
|
||||
Vec2i res = computeGridSize(level);
|
||||
grid_u_res = res.x; grid_v_res = res.y;
|
||||
grid_size_simd_blocks = ((grid_u_res*grid_v_res+simd_width-1)&(-simd_width)) / simd_width;
|
||||
|
||||
/* need stiching? */
|
||||
flags &= ~TRANSITION_PATCH;
|
||||
const int int_edge_points0 = (int)level[0] + 1;
|
||||
const int int_edge_points1 = (int)level[1] + 1;
|
||||
const int int_edge_points2 = (int)level[2] + 1;
|
||||
const int int_edge_points3 = (int)level[3] + 1;
|
||||
if (int_edge_points0 < (int)grid_u_res ||
|
||||
int_edge_points2 < (int)grid_u_res ||
|
||||
int_edge_points1 < (int)grid_v_res ||
|
||||
int_edge_points3 < (int)grid_v_res) {
|
||||
flags |= TRANSITION_PATCH;
|
||||
}
|
||||
|
||||
return grid_changed;
|
||||
}
|
||||
}
|
||||
156
Framework/external/embree/kernels/subdiv/subdivpatch1base.h
vendored
Normal file
156
Framework/external/embree/kernels/subdiv/subdivpatch1base.h
vendored
Normal file
|
|
@ -0,0 +1,156 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../geometry/primitive.h"
|
||||
#include "bspline_patch.h"
|
||||
#include "bezier_patch.h"
|
||||
#include "gregory_patch.h"
|
||||
#include "gregory_patch_dense.h"
|
||||
#include "tessellation.h"
|
||||
#include "tessellation_cache.h"
|
||||
#include "gridrange.h"
|
||||
#include "patch_eval_grid.h"
|
||||
#include "feature_adaptive_eval_grid.h"
|
||||
#include "../common/scene_subdiv_mesh.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct __aligned(64) SubdivPatch1Base
|
||||
{
|
||||
public:
|
||||
|
||||
enum Type {
|
||||
INVALID_PATCH = 0,
|
||||
BSPLINE_PATCH = 1,
|
||||
BEZIER_PATCH = 2,
|
||||
GREGORY_PATCH = 3,
|
||||
EVAL_PATCH = 5,
|
||||
BILINEAR_PATCH = 6,
|
||||
};
|
||||
|
||||
enum Flags {
|
||||
TRANSITION_PATCH = 16,
|
||||
};
|
||||
|
||||
/*! Default constructor. */
|
||||
__forceinline SubdivPatch1Base () {}
|
||||
|
||||
SubdivPatch1Base (const unsigned int gID,
|
||||
const unsigned int pID,
|
||||
const unsigned int subPatch,
|
||||
const SubdivMesh *const mesh,
|
||||
const size_t time,
|
||||
const Vec2f uv[4],
|
||||
const float edge_level[4],
|
||||
const int subdiv[4],
|
||||
const int simd_width);
|
||||
|
||||
__forceinline bool needsStitching() const {
|
||||
return flags & TRANSITION_PATCH;
|
||||
}
|
||||
|
||||
__forceinline Vec2f getUV(const size_t i) const {
|
||||
return Vec2f((float)u[i],(float)v[i]) * (8.0f/0x10000);
|
||||
}
|
||||
|
||||
static void computeEdgeLevels(const float edge_level[4], const int subdiv[4], float level[4]);
|
||||
static Vec2i computeGridSize(const float level[4]);
|
||||
bool updateEdgeLevels(const float edge_level[4], const int subdiv[4], const SubdivMesh *const mesh, const int simd_width);
|
||||
|
||||
public:
|
||||
|
||||
__forceinline size_t getGridBytes() const {
|
||||
const size_t grid_size_xyzuv = (grid_size_simd_blocks * VSIZEX) * 4;
|
||||
return 64*((grid_size_xyzuv+15) / 16);
|
||||
}
|
||||
|
||||
__forceinline void write_lock() { mtx.lock(); }
|
||||
__forceinline void write_unlock() { mtx.unlock(); }
|
||||
__forceinline bool try_write_lock() { return mtx.try_lock(); }
|
||||
//__forceinline bool try_read_lock() { return mtx.try_read_lock(); }
|
||||
|
||||
__forceinline void resetRootRef() {
|
||||
//assert( mtx.hasInitialState() );
|
||||
root_ref = SharedLazyTessellationCache::Tag();
|
||||
}
|
||||
|
||||
__forceinline SharedLazyTessellationCache::CacheEntry& entry() {
|
||||
return (SharedLazyTessellationCache::CacheEntry&) root_ref;
|
||||
}
|
||||
|
||||
public:
|
||||
__forceinline unsigned int geomID() const {
|
||||
return geom;
|
||||
}
|
||||
|
||||
__forceinline unsigned int primID() const {
|
||||
return prim;
|
||||
}
|
||||
|
||||
public:
|
||||
SharedLazyTessellationCache::Tag root_ref;
|
||||
SpinLock mtx;
|
||||
|
||||
unsigned short u[4]; //!< 16bit discretized u,v coordinates
|
||||
unsigned short v[4];
|
||||
float level[4];
|
||||
|
||||
unsigned char flags;
|
||||
unsigned char type;
|
||||
unsigned short grid_u_res;
|
||||
unsigned int geom; //!< geometry ID of the subdivision mesh this patch belongs to
|
||||
unsigned int prim; //!< primitive ID of this subdivision patch
|
||||
unsigned short grid_v_res;
|
||||
|
||||
unsigned short grid_size_simd_blocks;
|
||||
unsigned int time_;
|
||||
|
||||
struct PatchHalfEdge {
|
||||
const HalfEdge* edge;
|
||||
unsigned subPatch;
|
||||
};
|
||||
|
||||
Vec3fa patch_v[4][4];
|
||||
|
||||
const HalfEdge *edge() const { return ((PatchHalfEdge*)patch_v)->edge; }
|
||||
unsigned time() const { return time_; }
|
||||
unsigned subPatch() const { return ((PatchHalfEdge*)patch_v)->subPatch; }
|
||||
|
||||
void set_edge(const HalfEdge *h) const { ((PatchHalfEdge*)patch_v)->edge = h; }
|
||||
void set_subPatch(const unsigned s) const { ((PatchHalfEdge*)patch_v)->subPatch = s; }
|
||||
};
|
||||
|
||||
namespace isa
|
||||
{
|
||||
Vec3fa patchEval(const SubdivPatch1Base& patch, const float uu, const float vv);
|
||||
Vec3fa patchNormal(const SubdivPatch1Base& patch, const float uu, const float vv);
|
||||
|
||||
template<typename simdf>
|
||||
Vec3<simdf> patchEval(const SubdivPatch1Base& patch, const simdf& uu, const simdf& vv);
|
||||
|
||||
template<typename simdf>
|
||||
Vec3<simdf> patchNormal(const SubdivPatch1Base& patch, const simdf& uu, const simdf& vv);
|
||||
|
||||
|
||||
/* eval grid over patch and stich edges when required */
|
||||
void evalGrid(const SubdivPatch1Base& patch,
|
||||
const unsigned x0, const unsigned x1,
|
||||
const unsigned y0, const unsigned y1,
|
||||
const unsigned swidth, const unsigned sheight,
|
||||
float *__restrict__ const grid_x,
|
||||
float *__restrict__ const grid_y,
|
||||
float *__restrict__ const grid_z,
|
||||
float *__restrict__ const grid_u,
|
||||
float *__restrict__ const grid_v,
|
||||
const SubdivMesh* const geom);
|
||||
|
||||
/* eval grid over patch and stich edges when required */
|
||||
BBox3fa evalGridBounds(const SubdivPatch1Base& patch,
|
||||
const unsigned x0, const unsigned x1,
|
||||
const unsigned y0, const unsigned y1,
|
||||
const unsigned swidth, const unsigned sheight,
|
||||
const SubdivMesh* const geom);
|
||||
}
|
||||
}
|
||||
415
Framework/external/embree/kernels/subdiv/subdivpatch1base_eval.cpp
vendored
Normal file
415
Framework/external/embree/kernels/subdiv/subdivpatch1base_eval.cpp
vendored
Normal file
|
|
@ -0,0 +1,415 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "subdivpatch1base.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
Vec3fa patchEval(const SubdivPatch1Base& patch, const float uu, const float vv)
|
||||
{
|
||||
if (likely(patch.type == SubdivPatch1Base::BEZIER_PATCH))
|
||||
return ((BezierPatch3fa*)patch.patch_v)->eval(uu,vv);
|
||||
else if (likely(patch.type == SubdivPatch1Base::BSPLINE_PATCH))
|
||||
return ((BSplinePatch3fa*)patch.patch_v)->eval(uu,vv);
|
||||
else if (likely(patch.type == SubdivPatch1Base::GREGORY_PATCH))
|
||||
return ((DenseGregoryPatch3fa*)patch.patch_v)->eval(uu,vv);
|
||||
else if (likely(patch.type == SubdivPatch1Base::BILINEAR_PATCH))
|
||||
return ((BilinearPatch3fa*)patch.patch_v)->eval(uu,vv);
|
||||
return Vec3fa( zero );
|
||||
}
|
||||
|
||||
Vec3fa patchNormal(const SubdivPatch1Base& patch, const float uu, const float vv)
|
||||
{
|
||||
if (likely(patch.type == SubdivPatch1Base::BEZIER_PATCH))
|
||||
return ((BezierPatch3fa*)patch.patch_v)->normal(uu,vv);
|
||||
else if (likely(patch.type == SubdivPatch1Base::BSPLINE_PATCH))
|
||||
return ((BSplinePatch3fa*)patch.patch_v)->normal(uu,vv);
|
||||
else if (likely(patch.type == SubdivPatch1Base::GREGORY_PATCH))
|
||||
return ((DenseGregoryPatch3fa*)patch.patch_v)->normal(uu,vv);
|
||||
else if (likely(patch.type == SubdivPatch1Base::BILINEAR_PATCH))
|
||||
return ((BilinearPatch3fa*)patch.patch_v)->normal(uu,vv);
|
||||
return Vec3fa( zero );
|
||||
}
|
||||
|
||||
template<typename simdf>
|
||||
Vec3<simdf> patchEval(const SubdivPatch1Base& patch, const simdf& uu, const simdf& vv)
|
||||
{
|
||||
if (likely(patch.type == SubdivPatch1Base::BEZIER_PATCH))
|
||||
return ((BezierPatch3fa*)patch.patch_v)->eval(uu,vv);
|
||||
else if (likely(patch.type == SubdivPatch1Base::BSPLINE_PATCH))
|
||||
return ((BSplinePatch3fa*)patch.patch_v)->eval(uu,vv);
|
||||
else if (likely(patch.type == SubdivPatch1Base::GREGORY_PATCH))
|
||||
return ((DenseGregoryPatch3fa*)patch.patch_v)->eval(uu,vv);
|
||||
else if (likely(patch.type == SubdivPatch1Base::BILINEAR_PATCH))
|
||||
return ((BilinearPatch3fa*)patch.patch_v)->eval(uu,vv);
|
||||
return Vec3<simdf>( zero );
|
||||
}
|
||||
|
||||
template<typename simdf>
|
||||
Vec3<simdf> patchNormal(const SubdivPatch1Base& patch, const simdf& uu, const simdf& vv)
|
||||
{
|
||||
if (likely(patch.type == SubdivPatch1Base::BEZIER_PATCH))
|
||||
return ((BezierPatch3fa*)patch.patch_v)->normal(uu,vv);
|
||||
else if (likely(patch.type == SubdivPatch1Base::BSPLINE_PATCH))
|
||||
return ((BSplinePatch3fa*)patch.patch_v)->normal(uu,vv);
|
||||
else if (likely(patch.type == SubdivPatch1Base::GREGORY_PATCH))
|
||||
return ((DenseGregoryPatch3fa*)patch.patch_v)->normal(uu,vv);
|
||||
else if (likely(patch.type == SubdivPatch1Base::BILINEAR_PATCH))
|
||||
return ((BilinearPatch3fa*)patch.patch_v)->normal(uu,vv);
|
||||
return Vec3<simdf>( zero );
|
||||
}
|
||||
|
||||
/* eval grid over patch and stich edges when required */
|
||||
void evalGrid(const SubdivPatch1Base& patch,
|
||||
const unsigned x0, const unsigned x1,
|
||||
const unsigned y0, const unsigned y1,
|
||||
const unsigned swidth, const unsigned sheight,
|
||||
float *__restrict__ const grid_x,
|
||||
float *__restrict__ const grid_y,
|
||||
float *__restrict__ const grid_z,
|
||||
float *__restrict__ const grid_u,
|
||||
float *__restrict__ const grid_v,
|
||||
const SubdivMesh* const geom)
|
||||
{
|
||||
const unsigned dwidth = x1-x0+1;
|
||||
const unsigned dheight = y1-y0+1;
|
||||
const unsigned M = dwidth*dheight+VSIZEX;
|
||||
const unsigned grid_size_simd_blocks = (M-1)/VSIZEX;
|
||||
|
||||
if (unlikely(patch.type == SubdivPatch1Base::EVAL_PATCH))
|
||||
{
|
||||
const bool displ = geom->displFunc;
|
||||
const unsigned N = displ ? M : 0;
|
||||
dynamic_large_stack_array(float,grid_Ng_x,N,32*32*sizeof(float));
|
||||
dynamic_large_stack_array(float,grid_Ng_y,N,32*32*sizeof(float));
|
||||
dynamic_large_stack_array(float,grid_Ng_z,N,32*32*sizeof(float));
|
||||
|
||||
if (geom->patch_eval_trees.size())
|
||||
{
|
||||
feature_adaptive_eval_grid<PatchEvalGrid>
|
||||
(geom->patch_eval_trees[geom->numTimeSteps*patch.primID()+patch.time()], patch.subPatch(), patch.needsStitching() ? patch.level : nullptr,
|
||||
x0,x1,y0,y1,swidth,sheight,
|
||||
grid_x,grid_y,grid_z,grid_u,grid_v,
|
||||
displ ? (float*)grid_Ng_x : nullptr, displ ? (float*)grid_Ng_y : nullptr, displ ? (float*)grid_Ng_z : nullptr,
|
||||
dwidth,dheight);
|
||||
}
|
||||
else
|
||||
{
|
||||
GeneralCatmullClarkPatch3fa ccpatch(patch.edge(),geom->getVertexBuffer(patch.time()));
|
||||
|
||||
feature_adaptive_eval_grid<FeatureAdaptiveEvalGrid,GeneralCatmullClarkPatch3fa>
|
||||
(ccpatch, patch.subPatch(), patch.needsStitching() ? patch.level : nullptr,
|
||||
x0,x1,y0,y1,swidth,sheight,
|
||||
grid_x,grid_y,grid_z,grid_u,grid_v,
|
||||
displ ? (float*)grid_Ng_x : nullptr, displ ? (float*)grid_Ng_y : nullptr, displ ? (float*)grid_Ng_z : nullptr,
|
||||
dwidth,dheight);
|
||||
}
|
||||
|
||||
/* convert sub-patch UVs to patch UVs*/
|
||||
const Vec2f uv0 = patch.getUV(0);
|
||||
const Vec2f uv1 = patch.getUV(1);
|
||||
const Vec2f uv2 = patch.getUV(2);
|
||||
const Vec2f uv3 = patch.getUV(3);
|
||||
for (unsigned i=0; i<grid_size_simd_blocks; i++)
|
||||
{
|
||||
const vfloatx u = vfloatx::load(&grid_u[i*VSIZEX]);
|
||||
const vfloatx v = vfloatx::load(&grid_v[i*VSIZEX]);
|
||||
const vfloatx patch_u = lerp2(uv0.x,uv1.x,uv3.x,uv2.x,u,v);
|
||||
const vfloatx patch_v = lerp2(uv0.y,uv1.y,uv3.y,uv2.y,u,v);
|
||||
vfloatx::store(&grid_u[i*VSIZEX],patch_u);
|
||||
vfloatx::store(&grid_v[i*VSIZEX],patch_v);
|
||||
}
|
||||
|
||||
/* call displacement shader */
|
||||
if (unlikely(geom->displFunc)) {
|
||||
RTCDisplacementFunctionNArguments args;
|
||||
args.geometryUserPtr = geom->userPtr;
|
||||
args.geometry = (RTCGeometry)geom;
|
||||
//args.geomID = patch.geomID();
|
||||
args.primID = patch.primID();
|
||||
args.timeStep = patch.time();
|
||||
args.u = grid_u;
|
||||
args.v = grid_v;
|
||||
args.Ng_x = grid_Ng_x;
|
||||
args.Ng_y = grid_Ng_y;
|
||||
args.Ng_z = grid_Ng_z;
|
||||
args.P_x = grid_x;
|
||||
args.P_y = grid_y;
|
||||
args.P_z = grid_z;
|
||||
args.N = dwidth*dheight;
|
||||
geom->displFunc(&args);
|
||||
}
|
||||
|
||||
/* set last elements in u,v array to 1.0f */
|
||||
const float last_u = grid_u[dwidth*dheight-1];
|
||||
const float last_v = grid_v[dwidth*dheight-1];
|
||||
const float last_x = grid_x[dwidth*dheight-1];
|
||||
const float last_y = grid_y[dwidth*dheight-1];
|
||||
const float last_z = grid_z[dwidth*dheight-1];
|
||||
for (unsigned i=dwidth*dheight;i<grid_size_simd_blocks*VSIZEX;i++)
|
||||
{
|
||||
grid_u[i] = last_u;
|
||||
grid_v[i] = last_v;
|
||||
grid_x[i] = last_x;
|
||||
grid_y[i] = last_y;
|
||||
grid_z[i] = last_z;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* grid_u, grid_v need to be padded as we write with SIMD granularity */
|
||||
gridUVTessellator(patch.level,swidth,sheight,x0,y0,dwidth,dheight,grid_u,grid_v);
|
||||
|
||||
/* set last elements in u,v array to last valid point */
|
||||
const float last_u = grid_u[dwidth*dheight-1];
|
||||
const float last_v = grid_v[dwidth*dheight-1];
|
||||
for (unsigned i=dwidth*dheight;i<grid_size_simd_blocks*VSIZEX;i++) {
|
||||
grid_u[i] = last_u;
|
||||
grid_v[i] = last_v;
|
||||
}
|
||||
|
||||
/* stitch edges if necessary */
|
||||
if (unlikely(patch.needsStitching()))
|
||||
stitchUVGrid(patch.level,swidth,sheight,x0,y0,dwidth,dheight,grid_u,grid_v);
|
||||
|
||||
/* iterates over all grid points */
|
||||
for (unsigned i=0; i<grid_size_simd_blocks; i++)
|
||||
{
|
||||
const vfloatx u = vfloatx::load(&grid_u[i*VSIZEX]);
|
||||
const vfloatx v = vfloatx::load(&grid_v[i*VSIZEX]);
|
||||
Vec3vfx vtx = patchEval(patch,u,v);
|
||||
|
||||
/* evaluate displacement function */
|
||||
if (unlikely(geom->displFunc != nullptr))
|
||||
{
|
||||
const Vec3vfx normal = normalize_safe(patchNormal(patch, u, v));
|
||||
RTCDisplacementFunctionNArguments args;
|
||||
args.geometryUserPtr = geom->userPtr;
|
||||
args.geometry = (RTCGeometry)geom;
|
||||
//args.geomID = patch.geomID();
|
||||
args.primID = patch.primID();
|
||||
args.timeStep = patch.time();
|
||||
args.u = &u[0];
|
||||
args.v = &v[0];
|
||||
args.Ng_x = &normal.x[0];
|
||||
args.Ng_y = &normal.y[0];
|
||||
args.Ng_z = &normal.z[0];
|
||||
args.P_x = &vtx.x[0];
|
||||
args.P_y = &vtx.y[0];
|
||||
args.P_z = &vtx.z[0];
|
||||
args.N = VSIZEX;
|
||||
geom->displFunc(&args);
|
||||
}
|
||||
|
||||
vfloatx::store(&grid_x[i*VSIZEX],vtx.x);
|
||||
vfloatx::store(&grid_y[i*VSIZEX],vtx.y);
|
||||
vfloatx::store(&grid_z[i*VSIZEX],vtx.z);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* eval grid over patch and stich edges when required */
|
||||
BBox3fa evalGridBounds(const SubdivPatch1Base& patch,
|
||||
const unsigned x0, const unsigned x1,
|
||||
const unsigned y0, const unsigned y1,
|
||||
const unsigned swidth, const unsigned sheight,
|
||||
const SubdivMesh* const geom)
|
||||
{
|
||||
BBox3fa b(empty);
|
||||
const unsigned dwidth = x1-x0+1;
|
||||
const unsigned dheight = y1-y0+1;
|
||||
const unsigned M = dwidth*dheight+VSIZEX;
|
||||
const unsigned grid_size_simd_blocks = (M-1)/VSIZEX;
|
||||
dynamic_large_stack_array(float,grid_u,M,64*64*sizeof(float));
|
||||
dynamic_large_stack_array(float,grid_v,M,64*64*sizeof(float));
|
||||
|
||||
if (unlikely(patch.type == SubdivPatch1Base::EVAL_PATCH))
|
||||
{
|
||||
const bool displ = geom->displFunc;
|
||||
dynamic_large_stack_array(float,grid_x,M,64*64*sizeof(float));
|
||||
dynamic_large_stack_array(float,grid_y,M,64*64*sizeof(float));
|
||||
dynamic_large_stack_array(float,grid_z,M,64*64*sizeof(float));
|
||||
dynamic_large_stack_array(float,grid_Ng_x,displ ? M : 0,64*64*sizeof(float));
|
||||
dynamic_large_stack_array(float,grid_Ng_y,displ ? M : 0,64*64*sizeof(float));
|
||||
dynamic_large_stack_array(float,grid_Ng_z,displ ? M : 0,64*64*sizeof(float));
|
||||
|
||||
if (geom->patch_eval_trees.size())
|
||||
{
|
||||
feature_adaptive_eval_grid<PatchEvalGrid>
|
||||
(geom->patch_eval_trees[geom->numTimeSteps*patch.primID()+patch.time()], patch.subPatch(), patch.needsStitching() ? patch.level : nullptr,
|
||||
x0,x1,y0,y1,swidth,sheight,
|
||||
grid_x,grid_y,grid_z,grid_u,grid_v,
|
||||
displ ? (float*)grid_Ng_x : nullptr, displ ? (float*)grid_Ng_y : nullptr, displ ? (float*)grid_Ng_z : nullptr,
|
||||
dwidth,dheight);
|
||||
}
|
||||
else
|
||||
{
|
||||
GeneralCatmullClarkPatch3fa ccpatch(patch.edge(),geom->getVertexBuffer(patch.time()));
|
||||
|
||||
feature_adaptive_eval_grid <FeatureAdaptiveEvalGrid,GeneralCatmullClarkPatch3fa>
|
||||
(ccpatch, patch.subPatch(), patch.needsStitching() ? patch.level : nullptr,
|
||||
x0,x1,y0,y1,swidth,sheight,
|
||||
grid_x,grid_y,grid_z,grid_u,grid_v,
|
||||
displ ? (float*)grid_Ng_x : nullptr, displ ? (float*)grid_Ng_y : nullptr, displ ? (float*)grid_Ng_z : nullptr,
|
||||
dwidth,dheight);
|
||||
}
|
||||
|
||||
/* call displacement shader */
|
||||
if (unlikely(geom->displFunc))
|
||||
{
|
||||
RTCDisplacementFunctionNArguments args;
|
||||
args.geometryUserPtr = geom->userPtr;
|
||||
args.geometry = (RTCGeometry)geom;
|
||||
//args.geomID = patch.geomID();
|
||||
args.primID = patch.primID();
|
||||
args.timeStep = patch.time();
|
||||
args.u = grid_u;
|
||||
args.v = grid_v;
|
||||
args.Ng_x = grid_Ng_x;
|
||||
args.Ng_y = grid_Ng_y;
|
||||
args.Ng_z = grid_Ng_z;
|
||||
args.P_x = grid_x;
|
||||
args.P_y = grid_y;
|
||||
args.P_z = grid_z;
|
||||
args.N = dwidth*dheight;
|
||||
geom->displFunc(&args);
|
||||
}
|
||||
|
||||
/* set last elements in u,v array to 1.0f */
|
||||
const float last_u = grid_u[dwidth*dheight-1];
|
||||
const float last_v = grid_v[dwidth*dheight-1];
|
||||
const float last_x = grid_x[dwidth*dheight-1];
|
||||
const float last_y = grid_y[dwidth*dheight-1];
|
||||
const float last_z = grid_z[dwidth*dheight-1];
|
||||
for (unsigned i=dwidth*dheight;i<grid_size_simd_blocks*VSIZEX;i++)
|
||||
{
|
||||
grid_u[i] = last_u;
|
||||
grid_v[i] = last_v;
|
||||
grid_x[i] = last_x;
|
||||
grid_y[i] = last_y;
|
||||
grid_z[i] = last_z;
|
||||
}
|
||||
|
||||
vfloatx bounds_min_x = pos_inf;
|
||||
vfloatx bounds_min_y = pos_inf;
|
||||
vfloatx bounds_min_z = pos_inf;
|
||||
vfloatx bounds_max_x = neg_inf;
|
||||
vfloatx bounds_max_y = neg_inf;
|
||||
vfloatx bounds_max_z = neg_inf;
|
||||
for (unsigned i = 0; i<grid_size_simd_blocks; i++)
|
||||
{
|
||||
vfloatx x = vfloatx::loadu(&grid_x[i * VSIZEX]);
|
||||
vfloatx y = vfloatx::loadu(&grid_y[i * VSIZEX]);
|
||||
vfloatx z = vfloatx::loadu(&grid_z[i * VSIZEX]);
|
||||
|
||||
bounds_min_x = min(bounds_min_x,x);
|
||||
bounds_min_y = min(bounds_min_y,y);
|
||||
bounds_min_z = min(bounds_min_z,z);
|
||||
|
||||
bounds_max_x = max(bounds_max_x,x);
|
||||
bounds_max_y = max(bounds_max_y,y);
|
||||
bounds_max_z = max(bounds_max_z,z);
|
||||
}
|
||||
|
||||
b.lower.x = reduce_min(bounds_min_x);
|
||||
b.lower.y = reduce_min(bounds_min_y);
|
||||
b.lower.z = reduce_min(bounds_min_z);
|
||||
b.upper.x = reduce_max(bounds_max_x);
|
||||
b.upper.y = reduce_max(bounds_max_y);
|
||||
b.upper.z = reduce_max(bounds_max_z);
|
||||
//b.lower.a = 0;
|
||||
//b.upper.a = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* grid_u, grid_v need to be padded as we write with SIMD granularity */
|
||||
gridUVTessellator(patch.level,swidth,sheight,x0,y0,dwidth,dheight,grid_u,grid_v);
|
||||
|
||||
/* set last elements in u,v array to last valid point */
|
||||
const float last_u = grid_u[dwidth*dheight-1];
|
||||
const float last_v = grid_v[dwidth*dheight-1];
|
||||
for (unsigned i=dwidth*dheight;i<grid_size_simd_blocks*VSIZEX;i++) {
|
||||
grid_u[i] = last_u;
|
||||
grid_v[i] = last_v;
|
||||
}
|
||||
|
||||
/* stitch edges if necessary */
|
||||
if (unlikely(patch.needsStitching()))
|
||||
stitchUVGrid(patch.level,swidth,sheight,x0,y0,dwidth,dheight,grid_u,grid_v);
|
||||
|
||||
/* iterates over all grid points */
|
||||
Vec3vfx bounds_min;
|
||||
bounds_min[0] = pos_inf;
|
||||
bounds_min[1] = pos_inf;
|
||||
bounds_min[2] = pos_inf;
|
||||
|
||||
Vec3vfx bounds_max;
|
||||
bounds_max[0] = neg_inf;
|
||||
bounds_max[1] = neg_inf;
|
||||
bounds_max[2] = neg_inf;
|
||||
|
||||
for (unsigned i=0; i<grid_size_simd_blocks; i++)
|
||||
{
|
||||
const vfloatx u = vfloatx::load(&grid_u[i*VSIZEX]);
|
||||
const vfloatx v = vfloatx::load(&grid_v[i*VSIZEX]);
|
||||
Vec3vfx vtx = patchEval(patch,u,v);
|
||||
|
||||
/* evaluate displacement function */
|
||||
if (unlikely(geom->displFunc != nullptr))
|
||||
{
|
||||
const Vec3vfx normal = normalize_safe(patchNormal(patch,u,v));
|
||||
RTCDisplacementFunctionNArguments args;
|
||||
args.geometryUserPtr = geom->userPtr;
|
||||
args.geometry = (RTCGeometry)geom;
|
||||
//args.geomID = patch.geomID();
|
||||
args.primID = patch.primID();
|
||||
args.timeStep = patch.time();
|
||||
args.u = &u[0];
|
||||
args.v = &v[0];
|
||||
args.Ng_x = &normal.x[0];
|
||||
args.Ng_y = &normal.y[0];
|
||||
args.Ng_z = &normal.z[0];
|
||||
args.P_x = &vtx.x[0];
|
||||
args.P_y = &vtx.y[0];
|
||||
args.P_z = &vtx.z[0];
|
||||
args.N = VSIZEX;
|
||||
geom->displFunc(&args);
|
||||
}
|
||||
|
||||
bounds_min[0] = min(bounds_min[0],vtx.x);
|
||||
bounds_max[0] = max(bounds_max[0],vtx.x);
|
||||
bounds_min[1] = min(bounds_min[1],vtx.y);
|
||||
bounds_max[1] = max(bounds_max[1],vtx.y);
|
||||
bounds_min[2] = min(bounds_min[2],vtx.z);
|
||||
bounds_max[2] = max(bounds_max[2],vtx.z);
|
||||
}
|
||||
|
||||
b.lower.x = reduce_min(bounds_min[0]);
|
||||
b.lower.y = reduce_min(bounds_min[1]);
|
||||
b.lower.z = reduce_min(bounds_min[2]);
|
||||
b.upper.x = reduce_max(bounds_max[0]);
|
||||
b.upper.y = reduce_max(bounds_max[1]);
|
||||
b.upper.z = reduce_max(bounds_max[2]);
|
||||
//b.lower.a = 0;
|
||||
//b.upper.a = 0;
|
||||
}
|
||||
|
||||
assert( std::isfinite(b.lower.x) );
|
||||
assert( std::isfinite(b.lower.y) );
|
||||
assert( std::isfinite(b.lower.z) );
|
||||
|
||||
assert( std::isfinite(b.upper.x) );
|
||||
assert( std::isfinite(b.upper.y) );
|
||||
assert( std::isfinite(b.upper.z) );
|
||||
|
||||
|
||||
assert(b.lower.x <= b.upper.x);
|
||||
assert(b.lower.y <= b.upper.y);
|
||||
assert(b.lower.z <= b.upper.z);
|
||||
return b;
|
||||
}
|
||||
}
|
||||
}
|
||||
161
Framework/external/embree/kernels/subdiv/tessellation.h
vendored
Normal file
161
Framework/external/embree/kernels/subdiv/tessellation.h
vendored
Normal file
|
|
@ -0,0 +1,161 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/* adjust discret tessellation level for feature-adaptive pre-subdivision */
|
||||
__forceinline float adjustTessellationLevel(float l, const size_t sublevel)
|
||||
{
|
||||
for (size_t i=0; i<sublevel; i++) l *= 0.5f;
|
||||
float r = ceilf(l);
|
||||
for (size_t i=0; i<sublevel; i++) r *= 2.0f;
|
||||
return r;
|
||||
}
|
||||
|
||||
__forceinline int stitch(const int x, const int fine, const int coarse) {
|
||||
return (2*x+1)*coarse/(2*fine);
|
||||
}
|
||||
|
||||
__forceinline void stitchGridEdges(const unsigned int low_rate,
|
||||
const unsigned int high_rate,
|
||||
const unsigned int x0,
|
||||
const unsigned int x1,
|
||||
float * __restrict__ const uv_array,
|
||||
const unsigned int uv_array_step)
|
||||
{
|
||||
#if 1
|
||||
const float inv_low_rate = rcp((float)(low_rate-1));
|
||||
for (unsigned x=x0; x<=x1; x++) {
|
||||
uv_array[(x-x0)*uv_array_step] = float(stitch(x,high_rate-1,low_rate-1))*inv_low_rate;
|
||||
}
|
||||
if (unlikely(x1 == high_rate-1))
|
||||
uv_array[(x1-x0)*uv_array_step] = 1.0f;
|
||||
#else
|
||||
assert(low_rate < high_rate);
|
||||
assert(high_rate >= 2);
|
||||
|
||||
const float inv_low_rate = rcp((float)(low_rate-1));
|
||||
const unsigned int dy = low_rate - 1;
|
||||
const unsigned int dx = high_rate - 1;
|
||||
|
||||
int p = 2*dy-dx;
|
||||
|
||||
unsigned int offset = 0;
|
||||
unsigned int y = 0;
|
||||
float value = 0.0f;
|
||||
for(unsigned int x=0;x<high_rate-1; x++) // '<=' would be correct but we will leave the 1.0f at the end
|
||||
{
|
||||
uv_array[offset] = value;
|
||||
|
||||
offset += uv_array_step;
|
||||
if (unlikely(p > 0))
|
||||
{
|
||||
y++;
|
||||
value = (float)y * inv_low_rate;
|
||||
p -= 2*dx;
|
||||
}
|
||||
p += 2*dy;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline void stitchUVGrid(const float edge_levels[4],
|
||||
const unsigned int swidth,
|
||||
const unsigned int sheight,
|
||||
const unsigned int x0,
|
||||
const unsigned int y0,
|
||||
const unsigned int grid_u_res,
|
||||
const unsigned int grid_v_res,
|
||||
float * __restrict__ const u_array,
|
||||
float * __restrict__ const v_array)
|
||||
{
|
||||
const unsigned int x1 = x0+grid_u_res-1;
|
||||
const unsigned int y1 = y0+grid_v_res-1;
|
||||
const unsigned int int_edge_points0 = (unsigned int)edge_levels[0] + 1;
|
||||
const unsigned int int_edge_points1 = (unsigned int)edge_levels[1] + 1;
|
||||
const unsigned int int_edge_points2 = (unsigned int)edge_levels[2] + 1;
|
||||
const unsigned int int_edge_points3 = (unsigned int)edge_levels[3] + 1;
|
||||
|
||||
if (unlikely(y0 == 0 && int_edge_points0 < swidth))
|
||||
stitchGridEdges(int_edge_points0,swidth,x0,x1,u_array,1);
|
||||
|
||||
if (unlikely(y1 == sheight-1 && int_edge_points2 < swidth))
|
||||
stitchGridEdges(int_edge_points2,swidth,x0,x1,&u_array[(grid_v_res-1)*grid_u_res],1);
|
||||
|
||||
if (unlikely(x0 == 0 && int_edge_points1 < sheight))
|
||||
stitchGridEdges(int_edge_points1,sheight,y0,y1,&v_array[grid_u_res-1],grid_u_res);
|
||||
|
||||
if (unlikely(x1 == swidth-1 && int_edge_points3 < sheight))
|
||||
stitchGridEdges(int_edge_points3,sheight,y0,y1,v_array,grid_u_res);
|
||||
}
|
||||
|
||||
__forceinline void gridUVTessellator(const float edge_levels[4],
|
||||
const unsigned int swidth,
|
||||
const unsigned int sheight,
|
||||
const unsigned int x0,
|
||||
const unsigned int y0,
|
||||
const unsigned int grid_u_res,
|
||||
const unsigned int grid_v_res,
|
||||
float * __restrict__ const u_array,
|
||||
float * __restrict__ const v_array)
|
||||
{
|
||||
assert( grid_u_res >= 1);
|
||||
assert( grid_v_res >= 1);
|
||||
assert( edge_levels[0] >= 1.0f );
|
||||
assert( edge_levels[1] >= 1.0f );
|
||||
assert( edge_levels[2] >= 1.0f );
|
||||
assert( edge_levels[3] >= 1.0f );
|
||||
|
||||
#if defined(__AVX__)
|
||||
const vint8 grid_u_segments = vint8(swidth)-1;
|
||||
const vint8 grid_v_segments = vint8(sheight)-1;
|
||||
|
||||
const vfloat8 inv_grid_u_segments = rcp(vfloat8(grid_u_segments));
|
||||
const vfloat8 inv_grid_v_segments = rcp(vfloat8(grid_v_segments));
|
||||
|
||||
unsigned int index = 0;
|
||||
vint8 v_i( zero );
|
||||
for (unsigned int y=0;y<grid_v_res;y++,index+=grid_u_res,v_i += 1)
|
||||
{
|
||||
vint8 u_i ( step );
|
||||
|
||||
const vbool8 m_v = v_i < grid_v_segments;
|
||||
|
||||
for (unsigned int x=0;x<grid_u_res;x+=8, u_i += 8)
|
||||
{
|
||||
const vbool8 m_u = u_i < grid_u_segments;
|
||||
const vfloat8 u = select(m_u, vfloat8(x0+u_i) * inv_grid_u_segments, 1.0f);
|
||||
const vfloat8 v = select(m_v, vfloat8(y0+v_i) * inv_grid_v_segments, 1.0f);
|
||||
vfloat8::storeu(&u_array[index + x],u);
|
||||
vfloat8::storeu(&v_array[index + x],v);
|
||||
}
|
||||
}
|
||||
#else
|
||||
const vint4 grid_u_segments = vint4(swidth)-1;
|
||||
const vint4 grid_v_segments = vint4(sheight)-1;
|
||||
|
||||
const vfloat4 inv_grid_u_segments = rcp(vfloat4(grid_u_segments));
|
||||
const vfloat4 inv_grid_v_segments = rcp(vfloat4(grid_v_segments));
|
||||
|
||||
unsigned int index = 0;
|
||||
vint4 v_i( zero );
|
||||
for (unsigned int y=0;y<grid_v_res;y++,index+=grid_u_res,v_i += 1)
|
||||
{
|
||||
vint4 u_i ( step );
|
||||
|
||||
const vbool4 m_v = v_i < grid_v_segments;
|
||||
|
||||
for (unsigned int x=0;x<grid_u_res;x+=4, u_i += 4)
|
||||
{
|
||||
const vbool4 m_u = u_i < grid_u_segments;
|
||||
const vfloat4 u = select(m_u, vfloat4(x0+u_i) * inv_grid_u_segments, 1.0f);
|
||||
const vfloat4 v = select(m_v, vfloat4(y0+v_i) * inv_grid_v_segments, 1.0f);
|
||||
vfloat4::storeu(&u_array[index + x],u);
|
||||
vfloat4::storeu(&v_array[index + x],v);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
328
Framework/external/embree/kernels/subdiv/tessellation_cache.cpp
vendored
Normal file
328
Framework/external/embree/kernels/subdiv/tessellation_cache.cpp
vendored
Normal file
|
|
@ -0,0 +1,328 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "tessellation_cache.h"
|
||||
|
||||
#include "../../common/tasking/taskscheduler.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
SharedLazyTessellationCache SharedLazyTessellationCache::sharedLazyTessellationCache;
|
||||
|
||||
__thread ThreadWorkState* SharedLazyTessellationCache::init_t_state = nullptr;
|
||||
ThreadWorkState* SharedLazyTessellationCache::current_t_state = nullptr;
|
||||
|
||||
void resizeTessellationCache(size_t new_size)
|
||||
{
|
||||
if (new_size >= SharedLazyTessellationCache::MAX_TESSELLATION_CACHE_SIZE)
|
||||
new_size = SharedLazyTessellationCache::MAX_TESSELLATION_CACHE_SIZE;
|
||||
if (SharedLazyTessellationCache::sharedLazyTessellationCache.getSize() != new_size)
|
||||
SharedLazyTessellationCache::sharedLazyTessellationCache.realloc(new_size);
|
||||
}
|
||||
|
||||
void resetTessellationCache()
|
||||
{
|
||||
//SharedLazyTessellationCache::sharedLazyTessellationCache.addCurrentIndex(SharedLazyTessellationCache::NUM_CACHE_SEGMENTS);
|
||||
SharedLazyTessellationCache::sharedLazyTessellationCache.reset();
|
||||
}
|
||||
|
||||
SharedLazyTessellationCache::SharedLazyTessellationCache()
|
||||
{
|
||||
size = 0;
|
||||
data = nullptr;
|
||||
hugepages = false;
|
||||
maxBlocks = size/BLOCK_SIZE;
|
||||
localTime = NUM_CACHE_SEGMENTS;
|
||||
next_block = 0;
|
||||
numRenderThreads = 0;
|
||||
#if FORCE_SIMPLE_FLUSH == 1
|
||||
switch_block_threshold = maxBlocks;
|
||||
#else
|
||||
switch_block_threshold = maxBlocks/NUM_CACHE_SEGMENTS;
|
||||
#endif
|
||||
threadWorkState = new ThreadWorkState[NUM_PREALLOC_THREAD_WORK_STATES];
|
||||
|
||||
//reset_state.reset();
|
||||
//linkedlist_mtx.reset();
|
||||
}
|
||||
|
||||
SharedLazyTessellationCache::~SharedLazyTessellationCache()
|
||||
{
|
||||
for (ThreadWorkState* t=current_t_state; t!=nullptr; )
|
||||
{
|
||||
ThreadWorkState* next = t->next;
|
||||
if (t->allocated) delete t;
|
||||
t = next;
|
||||
}
|
||||
|
||||
delete[] threadWorkState;
|
||||
}
|
||||
|
||||
void SharedLazyTessellationCache::getNextRenderThreadWorkState()
|
||||
{
|
||||
const size_t id = numRenderThreads.fetch_add(1);
|
||||
if (id >= NUM_PREALLOC_THREAD_WORK_STATES) init_t_state = new ThreadWorkState(true);
|
||||
else init_t_state = &threadWorkState[id];
|
||||
|
||||
/* critical section for updating link list with new thread state */
|
||||
linkedlist_mtx.lock();
|
||||
init_t_state->next = current_t_state;
|
||||
current_t_state = init_t_state;
|
||||
linkedlist_mtx.unlock();
|
||||
}
|
||||
|
||||
void SharedLazyTessellationCache::waitForUsersLessEqual(ThreadWorkState *const t_state,
|
||||
const unsigned int users)
|
||||
{
|
||||
while( !(t_state->counter <= users) )
|
||||
{
|
||||
_mm_pause();
|
||||
_mm_pause();
|
||||
_mm_pause();
|
||||
_mm_pause();
|
||||
}
|
||||
}
|
||||
|
||||
void SharedLazyTessellationCache::allocNextSegment()
|
||||
{
|
||||
if (reset_state.try_lock())
|
||||
{
|
||||
if (next_block >= switch_block_threshold)
|
||||
{
|
||||
/* lock the linked list of thread states */
|
||||
|
||||
linkedlist_mtx.lock();
|
||||
|
||||
/* block all threads */
|
||||
for (ThreadWorkState *t=current_t_state;t!=nullptr;t=t->next)
|
||||
if (lockThread(t,THREAD_BLOCK_ATOMIC_ADD) != 0)
|
||||
waitForUsersLessEqual(t,THREAD_BLOCK_ATOMIC_ADD);
|
||||
|
||||
/* switch to the next segment */
|
||||
addCurrentIndex();
|
||||
CACHE_STATS(PRINT("RESET TESS CACHE"));
|
||||
|
||||
#if FORCE_SIMPLE_FLUSH == 1
|
||||
next_block = 0;
|
||||
switch_block_threshold = maxBlocks;
|
||||
#else
|
||||
const size_t region = localTime % NUM_CACHE_SEGMENTS;
|
||||
next_block = region * (maxBlocks/NUM_CACHE_SEGMENTS);
|
||||
switch_block_threshold = next_block + (maxBlocks/NUM_CACHE_SEGMENTS);
|
||||
assert( switch_block_threshold <= maxBlocks );
|
||||
#endif
|
||||
|
||||
CACHE_STATS(SharedTessellationCacheStats::cache_flushes++);
|
||||
|
||||
/* release all blocked threads */
|
||||
|
||||
for (ThreadWorkState *t=current_t_state;t!=nullptr;t=t->next)
|
||||
unlockThread(t,-THREAD_BLOCK_ATOMIC_ADD);
|
||||
|
||||
/* unlock the linked list of thread states */
|
||||
|
||||
linkedlist_mtx.unlock();
|
||||
|
||||
|
||||
}
|
||||
reset_state.unlock();
|
||||
}
|
||||
else
|
||||
reset_state.wait_until_unlocked();
|
||||
}
|
||||
|
||||
|
||||
void SharedLazyTessellationCache::reset()
|
||||
{
|
||||
/* lock the reset_state */
|
||||
reset_state.lock();
|
||||
|
||||
/* lock the linked list of thread states */
|
||||
linkedlist_mtx.lock();
|
||||
|
||||
/* block all threads */
|
||||
for (ThreadWorkState *t=current_t_state;t!=nullptr;t=t->next)
|
||||
if (lockThread(t,THREAD_BLOCK_ATOMIC_ADD) != 0)
|
||||
waitForUsersLessEqual(t,THREAD_BLOCK_ATOMIC_ADD);
|
||||
|
||||
/* reset to the first segment */
|
||||
next_block = 0;
|
||||
#if FORCE_SIMPLE_FLUSH == 1
|
||||
switch_block_threshold = maxBlocks;
|
||||
#else
|
||||
switch_block_threshold = maxBlocks/NUM_CACHE_SEGMENTS;
|
||||
#endif
|
||||
|
||||
/* reset local time */
|
||||
localTime = NUM_CACHE_SEGMENTS;
|
||||
|
||||
/* release all blocked threads */
|
||||
for (ThreadWorkState *t=current_t_state;t!=nullptr;t=t->next)
|
||||
unlockThread(t,-THREAD_BLOCK_ATOMIC_ADD);
|
||||
|
||||
/* unlock the linked list of thread states */
|
||||
linkedlist_mtx.unlock();
|
||||
|
||||
/* unlock the reset_state */
|
||||
reset_state.unlock();
|
||||
}
|
||||
|
||||
void SharedLazyTessellationCache::realloc(const size_t new_size)
|
||||
{
|
||||
/* lock the reset_state */
|
||||
reset_state.lock();
|
||||
|
||||
/* lock the linked list of thread states */
|
||||
linkedlist_mtx.lock();
|
||||
|
||||
/* block all threads */
|
||||
for (ThreadWorkState *t=current_t_state;t!=nullptr;t=t->next)
|
||||
if (lockThread(t,THREAD_BLOCK_ATOMIC_ADD) != 0)
|
||||
waitForUsersLessEqual(t,THREAD_BLOCK_ATOMIC_ADD);
|
||||
|
||||
/* reallocate data */
|
||||
if (data) os_free(data,size,hugepages);
|
||||
size = new_size;
|
||||
data = nullptr;
|
||||
if (size) data = (float*)os_malloc(size,hugepages);
|
||||
maxBlocks = size/BLOCK_SIZE;
|
||||
|
||||
/* invalidate entire cache */
|
||||
localTime += NUM_CACHE_SEGMENTS;
|
||||
|
||||
/* reset to the first segment */
|
||||
#if FORCE_SIMPLE_FLUSH == 1
|
||||
next_block = 0;
|
||||
switch_block_threshold = maxBlocks;
|
||||
#else
|
||||
const size_t region = localTime % NUM_CACHE_SEGMENTS;
|
||||
next_block = region * (maxBlocks/NUM_CACHE_SEGMENTS);
|
||||
switch_block_threshold = next_block + (maxBlocks/NUM_CACHE_SEGMENTS);
|
||||
assert( switch_block_threshold <= maxBlocks );
|
||||
#endif
|
||||
|
||||
/* release all blocked threads */
|
||||
for (ThreadWorkState *t=current_t_state;t!=nullptr;t=t->next)
|
||||
unlockThread(t,-THREAD_BLOCK_ATOMIC_ADD);
|
||||
|
||||
/* unlock the linked list of thread states */
|
||||
linkedlist_mtx.unlock();
|
||||
|
||||
/* unlock the reset_state */
|
||||
reset_state.unlock();
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
std::atomic<size_t> SharedTessellationCacheStats::cache_accesses(0);
|
||||
std::atomic<size_t> SharedTessellationCacheStats::cache_hits(0);
|
||||
std::atomic<size_t> SharedTessellationCacheStats::cache_misses(0);
|
||||
std::atomic<size_t> SharedTessellationCacheStats::cache_flushes(0);
|
||||
SpinLock SharedTessellationCacheStats::mtx;
|
||||
size_t SharedTessellationCacheStats::cache_num_patches(0);
|
||||
|
||||
void SharedTessellationCacheStats::printStats()
|
||||
{
|
||||
PRINT(cache_accesses);
|
||||
PRINT(cache_misses);
|
||||
PRINT(cache_hits);
|
||||
PRINT(cache_flushes);
|
||||
PRINT(100.0f * cache_hits / cache_accesses);
|
||||
assert(cache_hits + cache_misses == cache_accesses);
|
||||
PRINT(cache_num_patches);
|
||||
}
|
||||
|
||||
void SharedTessellationCacheStats::clearStats()
|
||||
{
|
||||
SharedTessellationCacheStats::cache_accesses = 0;
|
||||
SharedTessellationCacheStats::cache_hits = 0;
|
||||
SharedTessellationCacheStats::cache_misses = 0;
|
||||
SharedTessellationCacheStats::cache_flushes = 0;
|
||||
}
|
||||
|
||||
struct cache_regression_test : public RegressionTest
|
||||
{
|
||||
BarrierSys barrier;
|
||||
std::atomic<size_t> numFailed;
|
||||
std::atomic<int> threadIDCounter;
|
||||
static const size_t numEntries = 4*1024;
|
||||
SharedLazyTessellationCache::CacheEntry entry[numEntries];
|
||||
|
||||
cache_regression_test()
|
||||
: RegressionTest("cache_regression_test"), numFailed(0), threadIDCounter(0)
|
||||
{
|
||||
registerRegressionTest(this);
|
||||
}
|
||||
|
||||
static void thread_alloc(cache_regression_test* This)
|
||||
{
|
||||
int threadID = This->threadIDCounter++;
|
||||
size_t maxN = SharedLazyTessellationCache::sharedLazyTessellationCache.maxAllocSize()/4;
|
||||
This->barrier.wait();
|
||||
|
||||
for (size_t j=0; j<100000; j++)
|
||||
{
|
||||
size_t elt = (threadID+j)%numEntries;
|
||||
size_t N = min(1+10*(elt%1000),maxN);
|
||||
|
||||
volatile int* data = (volatile int*) SharedLazyTessellationCache::lookup(This->entry[elt],0,[&] () {
|
||||
int* data = (int*) SharedLazyTessellationCache::sharedLazyTessellationCache.malloc(4*N);
|
||||
for (size_t k=0; k<N; k++) data[k] = (int)elt;
|
||||
return data;
|
||||
});
|
||||
|
||||
if (data == nullptr) {
|
||||
SharedLazyTessellationCache::sharedLazyTessellationCache.unlock();
|
||||
This->numFailed++;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* check memory block */
|
||||
for (size_t k=0; k<N; k++) {
|
||||
if (data[k] != (int)elt) {
|
||||
This->numFailed++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
SharedLazyTessellationCache::sharedLazyTessellationCache.unlock();
|
||||
}
|
||||
This->barrier.wait();
|
||||
}
|
||||
|
||||
bool run ()
|
||||
{
|
||||
numFailed.store(0);
|
||||
|
||||
size_t numThreads = getNumberOfLogicalThreads();
|
||||
barrier.init(numThreads+1);
|
||||
|
||||
/* create threads */
|
||||
std::vector<thread_t> threads;
|
||||
for (size_t i=0; i<numThreads; i++)
|
||||
threads.push_back(createThread((thread_func)thread_alloc,this,0,i));
|
||||
|
||||
/* run test */
|
||||
barrier.wait();
|
||||
barrier.wait();
|
||||
|
||||
/* destroy threads */
|
||||
for (size_t i=0; i<numThreads; i++)
|
||||
join(threads[i]);
|
||||
|
||||
return numFailed == 0;
|
||||
}
|
||||
};
|
||||
|
||||
cache_regression_test cache_regression;
|
||||
};
|
||||
|
||||
extern "C" void printTessCacheStats()
|
||||
{
|
||||
PRINT("SHARED TESSELLATION CACHE");
|
||||
embree::SharedTessellationCacheStats::printStats();
|
||||
embree::SharedTessellationCacheStats::clearStats();
|
||||
}
|
||||
325
Framework/external/embree/kernels/subdiv/tessellation_cache.h
vendored
Normal file
325
Framework/external/embree/kernels/subdiv/tessellation_cache.h
vendored
Normal file
|
|
@ -0,0 +1,325 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../common/default.h"
|
||||
|
||||
/* force a complete cache invalidation when running out of allocation space */
|
||||
#define FORCE_SIMPLE_FLUSH 0
|
||||
|
||||
#define THREAD_BLOCK_ATOMIC_ADD 4
|
||||
|
||||
#if defined(DEBUG)
|
||||
#define CACHE_STATS(x)
|
||||
#else
|
||||
#define CACHE_STATS(x)
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
class SharedTessellationCacheStats
|
||||
{
|
||||
public:
|
||||
/* stats */
|
||||
static std::atomic<size_t> cache_accesses;
|
||||
static std::atomic<size_t> cache_hits;
|
||||
static std::atomic<size_t> cache_misses;
|
||||
static std::atomic<size_t> cache_flushes;
|
||||
static size_t cache_num_patches;
|
||||
__aligned(64) static SpinLock mtx;
|
||||
|
||||
/* print stats for debugging */
|
||||
static void printStats();
|
||||
static void clearStats();
|
||||
};
|
||||
|
||||
void resizeTessellationCache(size_t new_size);
|
||||
void resetTessellationCache();
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct __aligned(64) ThreadWorkState
|
||||
{
|
||||
ALIGNED_STRUCT_(64);
|
||||
|
||||
std::atomic<size_t> counter;
|
||||
ThreadWorkState* next;
|
||||
bool allocated;
|
||||
|
||||
__forceinline ThreadWorkState(bool allocated = false)
|
||||
: counter(0), next(nullptr), allocated(allocated)
|
||||
{
|
||||
assert( ((size_t)this % 64) == 0 );
|
||||
}
|
||||
};
|
||||
|
||||
class __aligned(64) SharedLazyTessellationCache
|
||||
{
|
||||
public:
|
||||
|
||||
static const size_t NUM_CACHE_SEGMENTS = 8;
|
||||
static const size_t NUM_PREALLOC_THREAD_WORK_STATES = 512;
|
||||
static const size_t COMMIT_INDEX_SHIFT = 32+8;
|
||||
#if defined(__64BIT__)
|
||||
static const size_t REF_TAG_MASK = 0xffffffffff;
|
||||
#else
|
||||
static const size_t REF_TAG_MASK = 0x7FFFFFFF;
|
||||
#endif
|
||||
static const size_t MAX_TESSELLATION_CACHE_SIZE = REF_TAG_MASK+1;
|
||||
static const size_t BLOCK_SIZE = 64;
|
||||
|
||||
|
||||
/*! Per thread tessellation ref cache */
|
||||
static __thread ThreadWorkState* init_t_state;
|
||||
static ThreadWorkState* current_t_state;
|
||||
|
||||
static __forceinline ThreadWorkState *threadState()
|
||||
{
|
||||
if (unlikely(!init_t_state))
|
||||
/* sets init_t_state, can't return pointer due to macosx icc bug*/
|
||||
SharedLazyTessellationCache::sharedLazyTessellationCache.getNextRenderThreadWorkState();
|
||||
return init_t_state;
|
||||
}
|
||||
|
||||
struct Tag
|
||||
{
|
||||
__forceinline Tag() : data(0) {}
|
||||
|
||||
__forceinline Tag(void* ptr, size_t combinedTime) {
|
||||
init(ptr,combinedTime);
|
||||
}
|
||||
|
||||
__forceinline Tag(size_t ptr, size_t combinedTime) {
|
||||
init((void*)ptr,combinedTime);
|
||||
}
|
||||
|
||||
__forceinline void init(void* ptr, size_t combinedTime)
|
||||
{
|
||||
if (ptr == nullptr) {
|
||||
data = 0;
|
||||
return;
|
||||
}
|
||||
int64_t new_root_ref = (int64_t) ptr;
|
||||
new_root_ref -= (int64_t)SharedLazyTessellationCache::sharedLazyTessellationCache.getDataPtr();
|
||||
assert( new_root_ref <= (int64_t)REF_TAG_MASK );
|
||||
new_root_ref |= (int64_t)combinedTime << COMMIT_INDEX_SHIFT;
|
||||
data = new_root_ref;
|
||||
}
|
||||
|
||||
__forceinline int64_t get() const { return data.load(); }
|
||||
__forceinline void set( int64_t v ) { data.store(v); }
|
||||
__forceinline void reset() { data.store(0); }
|
||||
|
||||
private:
|
||||
atomic<int64_t> data;
|
||||
};
|
||||
|
||||
static __forceinline size_t extractCommitIndex(const int64_t v) { return v >> SharedLazyTessellationCache::COMMIT_INDEX_SHIFT; }
|
||||
|
||||
struct CacheEntry
|
||||
{
|
||||
Tag tag;
|
||||
SpinLock mutex;
|
||||
};
|
||||
|
||||
private:
|
||||
|
||||
float *data;
|
||||
bool hugepages;
|
||||
size_t size;
|
||||
size_t maxBlocks;
|
||||
ThreadWorkState *threadWorkState;
|
||||
|
||||
__aligned(64) std::atomic<size_t> localTime;
|
||||
__aligned(64) std::atomic<size_t> next_block;
|
||||
__aligned(64) SpinLock reset_state;
|
||||
__aligned(64) SpinLock linkedlist_mtx;
|
||||
__aligned(64) std::atomic<size_t> switch_block_threshold;
|
||||
__aligned(64) std::atomic<size_t> numRenderThreads;
|
||||
|
||||
|
||||
public:
|
||||
|
||||
|
||||
SharedLazyTessellationCache();
|
||||
~SharedLazyTessellationCache();
|
||||
|
||||
void getNextRenderThreadWorkState();
|
||||
|
||||
__forceinline size_t maxAllocSize() const {
|
||||
return switch_block_threshold;
|
||||
}
|
||||
|
||||
__forceinline size_t getCurrentIndex() { return localTime.load(); }
|
||||
__forceinline void addCurrentIndex(const size_t i=1) { localTime.fetch_add(i); }
|
||||
|
||||
__forceinline size_t getTime(const size_t globalTime) {
|
||||
return localTime.load()+NUM_CACHE_SEGMENTS*globalTime;
|
||||
}
|
||||
|
||||
|
||||
__forceinline size_t lockThread (ThreadWorkState *const t_state, const ssize_t plus=1) { return t_state->counter.fetch_add(plus); }
|
||||
__forceinline size_t unlockThread(ThreadWorkState *const t_state, const ssize_t plus=-1) { assert(isLocked(t_state)); return t_state->counter.fetch_add(plus); }
|
||||
|
||||
__forceinline bool isLocked(ThreadWorkState *const t_state) { return t_state->counter.load() != 0; }
|
||||
|
||||
static __forceinline void lock () { sharedLazyTessellationCache.lockThread(threadState()); }
|
||||
static __forceinline void unlock() { sharedLazyTessellationCache.unlockThread(threadState()); }
|
||||
static __forceinline bool isLocked() { return sharedLazyTessellationCache.isLocked(threadState()); }
|
||||
static __forceinline size_t getState() { return threadState()->counter.load(); }
|
||||
static __forceinline void lockThreadLoop() { sharedLazyTessellationCache.lockThreadLoop(threadState()); }
|
||||
|
||||
static __forceinline size_t getTCacheTime(const size_t globalTime) {
|
||||
return sharedLazyTessellationCache.getTime(globalTime);
|
||||
}
|
||||
|
||||
/* per thread lock */
|
||||
__forceinline void lockThreadLoop (ThreadWorkState *const t_state)
|
||||
{
|
||||
while(1)
|
||||
{
|
||||
size_t lock = SharedLazyTessellationCache::sharedLazyTessellationCache.lockThread(t_state,1);
|
||||
if (unlikely(lock >= THREAD_BLOCK_ATOMIC_ADD))
|
||||
{
|
||||
/* lock failed wait until sync phase is over */
|
||||
sharedLazyTessellationCache.unlockThread(t_state,-1);
|
||||
sharedLazyTessellationCache.waitForUsersLessEqual(t_state,0);
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static __forceinline void* lookup(CacheEntry& entry, size_t globalTime)
|
||||
{
|
||||
const int64_t subdiv_patch_root_ref = entry.tag.get();
|
||||
CACHE_STATS(SharedTessellationCacheStats::cache_accesses++);
|
||||
|
||||
if (likely(subdiv_patch_root_ref != 0))
|
||||
{
|
||||
const size_t subdiv_patch_root = (subdiv_patch_root_ref & REF_TAG_MASK) + (size_t)sharedLazyTessellationCache.getDataPtr();
|
||||
const size_t subdiv_patch_cache_index = extractCommitIndex(subdiv_patch_root_ref);
|
||||
|
||||
if (likely( sharedLazyTessellationCache.validCacheIndex(subdiv_patch_cache_index,globalTime) ))
|
||||
{
|
||||
CACHE_STATS(SharedTessellationCacheStats::cache_hits++);
|
||||
return (void*) subdiv_patch_root;
|
||||
}
|
||||
}
|
||||
CACHE_STATS(SharedTessellationCacheStats::cache_misses++);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template<typename Constructor>
|
||||
static __forceinline auto lookup (CacheEntry& entry, size_t globalTime, const Constructor constructor, const bool before=false) -> decltype(constructor())
|
||||
{
|
||||
ThreadWorkState *t_state = SharedLazyTessellationCache::threadState();
|
||||
|
||||
while (true)
|
||||
{
|
||||
sharedLazyTessellationCache.lockThreadLoop(t_state);
|
||||
void* patch = SharedLazyTessellationCache::lookup(entry,globalTime);
|
||||
if (patch) return (decltype(constructor())) patch;
|
||||
|
||||
if (entry.mutex.try_lock())
|
||||
{
|
||||
if (!validTag(entry.tag,globalTime))
|
||||
{
|
||||
auto timeBefore = sharedLazyTessellationCache.getTime(globalTime);
|
||||
auto ret = constructor(); // thread is locked here!
|
||||
assert(ret);
|
||||
/* this should never return nullptr */
|
||||
auto timeAfter = sharedLazyTessellationCache.getTime(globalTime);
|
||||
auto time = before ? timeBefore : timeAfter;
|
||||
__memory_barrier();
|
||||
entry.tag = SharedLazyTessellationCache::Tag(ret,time);
|
||||
__memory_barrier();
|
||||
entry.mutex.unlock();
|
||||
return ret;
|
||||
}
|
||||
entry.mutex.unlock();
|
||||
}
|
||||
SharedLazyTessellationCache::sharedLazyTessellationCache.unlockThread(t_state);
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline bool validCacheIndex(const size_t i, const size_t globalTime)
|
||||
{
|
||||
#if FORCE_SIMPLE_FLUSH == 1
|
||||
return i == getTime(globalTime);
|
||||
#else
|
||||
return i+(NUM_CACHE_SEGMENTS-1) >= getTime(globalTime);
|
||||
#endif
|
||||
}
|
||||
|
||||
static __forceinline bool validTime(const size_t oldtime, const size_t newTime)
|
||||
{
|
||||
return oldtime+(NUM_CACHE_SEGMENTS-1) >= newTime;
|
||||
}
|
||||
|
||||
|
||||
static __forceinline bool validTag(const Tag& tag, size_t globalTime)
|
||||
{
|
||||
const int64_t subdiv_patch_root_ref = tag.get();
|
||||
if (subdiv_patch_root_ref == 0) return false;
|
||||
const size_t subdiv_patch_cache_index = extractCommitIndex(subdiv_patch_root_ref);
|
||||
return sharedLazyTessellationCache.validCacheIndex(subdiv_patch_cache_index,globalTime);
|
||||
}
|
||||
|
||||
void waitForUsersLessEqual(ThreadWorkState *const t_state,
|
||||
const unsigned int users);
|
||||
|
||||
__forceinline size_t alloc(const size_t blocks)
|
||||
{
|
||||
if (unlikely(blocks >= switch_block_threshold))
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"allocation exceeds size of tessellation cache segment");
|
||||
|
||||
assert(blocks < switch_block_threshold);
|
||||
size_t index = next_block.fetch_add(blocks);
|
||||
if (unlikely(index + blocks >= switch_block_threshold)) return (size_t)-1;
|
||||
return index;
|
||||
}
|
||||
|
||||
static __forceinline void* malloc(const size_t bytes)
|
||||
{
|
||||
size_t block_index = -1;
|
||||
ThreadWorkState *const t_state = threadState();
|
||||
while (true)
|
||||
{
|
||||
block_index = sharedLazyTessellationCache.alloc((bytes+BLOCK_SIZE-1)/BLOCK_SIZE);
|
||||
if (block_index == (size_t)-1)
|
||||
{
|
||||
sharedLazyTessellationCache.unlockThread(t_state);
|
||||
sharedLazyTessellationCache.allocNextSegment();
|
||||
sharedLazyTessellationCache.lockThread(t_state);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return sharedLazyTessellationCache.getBlockPtr(block_index);
|
||||
}
|
||||
|
||||
__forceinline void *getBlockPtr(const size_t block_index)
|
||||
{
|
||||
assert(block_index < maxBlocks);
|
||||
assert(data);
|
||||
assert(block_index*16 <= size);
|
||||
return (void*)&data[block_index*16];
|
||||
}
|
||||
|
||||
__forceinline void* getDataPtr() { return data; }
|
||||
__forceinline size_t getNumUsedBytes() { return next_block * BLOCK_SIZE; }
|
||||
__forceinline size_t getMaxBlocks() { return maxBlocks; }
|
||||
__forceinline size_t getSize() { return size; }
|
||||
|
||||
void allocNextSegment();
|
||||
void realloc(const size_t newSize);
|
||||
|
||||
void reset();
|
||||
|
||||
static SharedLazyTessellationCache sharedLazyTessellationCache;
|
||||
};
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue