UfnCod3r
شنبه 21 اردیبهشت 1392, 10:56 صبح
سلام
این کلاس برداره ک کلا با SSE نوشتم
ی سری چیزا دیگه هم نوشتم بعدا کلا همه چی رو می زارم تو نت :شیطان:
دیگه ازین سریع تر نمیشه :لبخندساده:
///////////////////////////XVec3.h/////////////////////////
#ifndef __XVEC3_H__
#define __XVEC3_H__
#include "XBasic.h"
#define XALIGN16 __declspec(align(16))
struct XQuat;
//Vector3D
XALIGN16 struct XEXPORT XVec3
{
union
{
struct { flt x, y, z, unused; };
flt xyz[3];
};
//////////////////////////////////////////////////////////////////////////ctors dtors
inline XVec3(){};
inline XVec3(flt _x, flt _y, flt _z)
{
x = _x;
y = _y;
z = _z;
}
///////////////////////////////////////////////////////////////////////////operators
XVec3& operator = (const XVec3& rv);
XVec3& operator += (const XVec3& rv);
XVec3& operator -= (const XVec3& rv);
XVec3& operator *= (const XVec3& rv);
XVec3& operator /= (const XVec3& rv);
XVec3& operator += (flt f);
XVec3& operator -= (flt f);
XVec3& operator *= (flt f);
XVec3& operator /= (flt f);
XVec3 operator + (const XVec3& v) const;
XVec3 operator - (const XVec3& v) const;
XVec3 operator * (const XVec3& v) const;
XVec3 operator / (const XVec3& v) const;
XVec3 operator + (flt f) const;
XVec3 operator - (flt f) const;
XVec3 operator * (flt f) const;
XVec3 operator / (flt f) const;
XVec3 operator -() const;
inline void* operator new(size_t x) { return _aligned_malloc(x, 16); }
inline void operator delete(void* x) { _aligned_free(x); }
//////////////////////////////////////////////////////////////////////member func
void setZero();
void setOne();
void setForward();
void setUp();
void setRight();
flt length() const;
flt lengthSq() const;
//1.0f / length()
flt rLength() const;
//1.0f / lenghtSq()
flt rLengthSq() const;
void normalize();
void getNormalize(XVec3* dst) const;
inline XVec3 getNormalize() const
{
XVec3 r;
getNormalize(&r);
return r;
}
flt dot(const XVec3& v) const;
void clamp(const XVec3& min, const XVec3& max);
///////////////////////////////////////////////////////////////////////static var
//(0,1,0)
static const XVec3 UP;
//(0,0,1)
static const XVec3 FORWARD;
//(1,0,0)
static const XVec3 RIGHT;
//(1,1,1)
static const XVec3 ONE;
//(0,0,0)
static const XVec3 ZERO;
////////////////////////////////////////////////////////////////////////static func
static flt Distance(const XVec3& a, const XVec3& b);
static flt DistanceSq(const XVec3& a, const XVec3& b);
static flt Dot(const XVec3& a, const XVec3& b);
static void Cross(const XVec3& a, const XVec3& b, XVec3* dst);
static void Lerp(const XVec3& from, const XVec3& to, flt dt, XVec3* dst);
static void SLerp(const XVec3& from, const XVec3& to, flt dt, XVec3* dst); //###INCOMPLATE####
static flt Angle(const XVec3& from, const XVec3& to);
static void RotateAround(const XVec3& point, const XVec3& pivot, const XQuat& rotation, XVec3* dst);
static void RotateAroundX(const XVec3& point, flt angle, XVec3* dst);
static void RotateAroundY(const XVec3& point, flt angle, XVec3* dst);
static void RotateAroundZ(const XVec3& point, flt angle, XVec3* dst);
static void Clamp(const XVec3& v, const XVec3& min, const XVec3& max, XVec3* dst);
static void Smooth(const XVec3& from, const XVec3& to, flt elapsedTime, flt responseTime, XVec3* dst);
static void Orthonormalize(XVec3 &a, XVec3 &b);
static inline XVec3 RotateAroundX(const XVec3& point, flt angle)
{
XVec3 v;
RotateAroundX(point, angle, &v);
return v;
}
static inline XVec3 RotateAroundY(const XVec3& point, flt angle)
{
XVec3 v;
RotateAroundY(point, angle, &v);
return v;
}
static inline XVec3 RotateAroundZ(const XVec3& point, flt angle)
{
XVec3 v;
RotateAroundZ(point, angle, &v);
return v;
}
};
#endif
-
-
-
/////////////////////////////////////XVec3_SSE.cpp////////////////////////////////
#ifdef XUSE_SSE
#include "XVec3.h"
#include "XMath.h"
#include "XQuat.h"
//#include "XSIMD.h"
//SSE4.1
#include <smmintrin.h>
#define _mm_allw_ps(m128) _mm_shuffle_ps(m128,m128,_MM_SHUFFLE(0,0,0,0))
#define _mm_allx_ps(m128) _mm_shuffle_ps(m128,m128,_MM_SHUFFLE(1,1,1,1))
#define _mm_ally_ps(m128) _mm_shuffle_ps(m128,m128,_MM_SHUFFLE(2,2,2,2))
#define _mm_allz_ps(m128) _mm_shuffle_ps(m128,m128,_MM_SHUFFLE(3,3,3,3))
#define _mm_swap_ps(m128) _mm_shuffle_ps(m128, m128, 0x1B)
#define _mm_rol_ps(m128) _mm_shuffle_ps(m128, m128, 0x93)
#define _mm_ror_ps(m128) _mm_shuffle_ps(m128, m128, 0x39)
#define _mm_sqrtfast_ss(m128) _mm_rcp_ss(_mm_rsqrt_ss(m128))
#define _mm_abs_ps(m128) _mm_andnot_ps(_mm_set1_ps(-0.0f), m128))
#define _mm_neg_ps(m128) _mm_xor_ps(_mm_set1_ps(-0.0f), m128)
#define _mm_clamp_ps(m128_Val, m128_Min, m128_Max) _mm_min_ps(_mm_max_ps(m128_Val, m128_Min), m128_Max)
#define _mm_vec3_cross(m128_a, m128_b, m128_out) \
m128_out = _mm_sub_ps(_mm_mul_ps(m128_a, _mm_shuffle_ps(m128_b, m128_b, _MM_SHUFFLE(0,0,2,1))) \
, _mm_mul_ps(_mm_shuffle_ps(m128_a, m128_a, _MM_SHUFFLE(0,0,2,1)), m128_b)); \
m128_out = _mm_shuffle_ps(m128_out, m128_out, _MM_SHUFFLE(0,0,2,1)));
#define _mm_vec3_normalize_fast(m128_InOut, m128_TmpLen) \
m128_TmpLen = _mm_rsqrt_ss(_mm_dp_ps(m128_InOut, m128_InOut, 0x71)); \
m128_InOut = _mm_mul_ps(m128_InOut, _mm_shuffle_ps(m128_TmpLen, m128_TmpLen, _MM_SHUFFLE(0,0,0,0)))
#define _mm_vec3_normalize(m128_InOut, m128_TmpLen) \
m128_TmpLen = _mm_dp_ps(m128_InOut, m128_InOut, 0x71); /*get square length*/ \
if(_mm_comigt_ss(m128_TmpLen, _mm_set_ss(0.000001f))) /*is greater than 0.000001f*/ \
{ \
m128_TmpLen = _mm_rsqrt_ss(m128_TmpLen); /* 1.0f / sqrt(SquareLenght)*/ \
m128_InOut = _mm_mul_ps(m128_InOut, _mm_shuffle_ps(m128_TmpLen, m128_TmpLen, _MM_SHUFFLE(0,0,0,0))); \
}
#define _mm_vec3_length(m128) _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(m128, m128, 0x71)))
#define _mm_vec3_rlength(m128) _mm_cvtss_f32(_mm_rsqrt_ss(_mm_dp_ps(m128, m128, 0x71)))
#define _mm_vec3_lengthSq(m128) _mm_cvtss_f32(_mm_dp_ps(m128, m128, 0x71))
#define _mm_vec3_rlengthSq(m128) _mm_cvtss_f32(_mm_rcp_ss(_mm_dp_ps(m128, m128, 0x71)))
#define _mm_vec3_dot(m128_a, m128_b) _mm_cvtss_f32(_mm_dp_ps(m128_a, m128_b, 0x71))
void XVec3::RotateAroundX( const XVec3& point, flt angle, XVec3* dst )
{
flt fsin,fcos;
XSinCosDegSSE(angle, &fsin, &fcos);
dst->x = point.x;
dst->y = fcos * point.y - fsin * point.z;
dst->z = fsin * point.y + fcos * point.z;
}
void XVec3::RotateAroundY( const XVec3& point, flt angle, XVec3* dst )
{
flt fsin,fcos;
XSinCosDegSSE(angle, &fsin, &fcos);
dst->x = fsin * point.z + fcos * point.x;
dst->y = point.y;
dst->z = fcos * point.z - fsin * point.x;
}
void XVec3::RotateAroundZ( const XVec3& point, flt angle, XVec3* dst )
{
flt fsin,fcos;
XSinCosDegSSE(angle, &fsin, &fcos);
dst->x = fcos * point.x - fsin * point.y;
dst->y = fsin * point.x + fcos * point.y;
dst->z = point.z;
}
/////////////////////////////////////////////////////////////////////////////////////////////operators
XVec3& XVec3::operator = (const XVec3& rv)
{
_mm_store_ps(this->xyz, _mm_load_ps(rv.xyz));
return *this;
}
XVec3& XVec3::operator += (const XVec3& rv)
{
_mm_store_ps(xyz, _mm_add_ps(_mm_load_ps(xyz), _mm_load_ps(rv.xyz)));
return *this;
}
XVec3& XVec3::operator -= (const XVec3& rv)
{
_mm_store_ps(xyz, _mm_sub_ps(_mm_load_ps(xyz), _mm_load_ps(rv.xyz)));
return *this;
}
XVec3& XVec3::operator *= (const XVec3& rv)
{
_mm_store_ps(xyz, _mm_mul_ps(_mm_load_ps(xyz), _mm_load_ps(rv.xyz)));
return *this;
}
XVec3& XVec3::operator /= (const XVec3& rv)
{
_mm_store_ps(xyz, _mm_div_ps(_mm_load_ps(xyz), _mm_load_ps(rv.xyz)));
return *this;
}
XVec3& XVec3::operator += (flt f)
{
_mm_store_ps(xyz, _mm_add_ps(_mm_load_ps(xyz), _mm_set_ps1(f)));
return *this;
}
XVec3& XVec3::operator -= (flt f)
{
_mm_store_ps(xyz, _mm_sub_ps(_mm_load_ps(xyz), _mm_set_ps1(f)));
return *this;
}
XVec3& XVec3::operator *= (flt f)
{
_mm_store_ps(xyz, _mm_mul_ps(_mm_load_ps(xyz), _mm_set_ps1(f)));
return *this;
}
XVec3& XVec3::operator /= (flt f)
{
_mm_store_ps(xyz, _mm_div_ps(_mm_load_ps(xyz), _mm_set_ps1(f)));
return *this;
}
XVec3 XVec3::operator + (const XVec3& v) const
{
XVec3 r;
_mm_store_ps(r.xyz, _mm_add_ps(_mm_load_ps(xyz), _mm_load_ps(v.xyz)));
return r;
}
XVec3 XVec3::operator - (const XVec3& v) const
{
XVec3 r;
_mm_store_ps(r.xyz, _mm_sub_ps(_mm_load_ps(xyz), _mm_load_ps(v.xyz)));
return r;
}
XVec3 XVec3::operator * (const XVec3& v) const
{
XVec3 r;
_mm_store_ps(r.xyz, _mm_mul_ps(_mm_load_ps(xyz), _mm_load_ps(v.xyz)));
return r;
}
XVec3 XVec3::operator / (const XVec3& v) const
{
XVec3 r;
_mm_store_ps(r.xyz, _mm_div_ps(_mm_load_ps(xyz), _mm_load_ps(v.xyz)));
return r;
}
XVec3 XVec3::operator + (float f) const
{
XVec3 r;
_mm_store_ps(r.xyz, _mm_add_ps(_mm_load_ps(xyz), _mm_set_ps1(f)));
return r;
}
XVec3 XVec3::operator - (float f) const
{
XVec3 r;
_mm_store_ps(r.xyz, _mm_sub_ps(_mm_load_ps(xyz), _mm_set_ps1(f)));
return r;
}
XVec3 XVec3::operator * (float f) const
{
XVec3 r;
_mm_store_ps(r.xyz, _mm_mul_ps(_mm_load_ps(xyz), _mm_set_ps1(f)));
return r;
}
XVec3 XVec3::operator / (float f) const
{
XVec3 r;
_mm_store_ps(r.xyz, _mm_div_ps(_mm_load_ps(xyz), _mm_set_ps1(f)));
return r;
}
XVec3 XVec3::operator -() const
{
XVec3 r;
_mm_store_ps(r.xyz, _mm_neg_ps(_mm_load_ps(xyz)));
return r;
}
/////////////////////////////////////////////////////////////funcs/////////////////////////////////////
void XVec3::setZero()
{
_mm_store_ps(xyz, _mm_setzero_ps());
}
void XVec3::setOne()
{
_mm_store_ps(xyz,_mm_load_ps(ONE.xyz));
}
void XVec3::setForward()
{
_mm_store_ps(xyz,_mm_load_ps(FORWARD.xyz));
}
void XVec3::setUp()
{
_mm_store_ps(xyz,_mm_load_ps(UP.xyz));
}
void XVec3::setRight()
{
_mm_store_ps(xyz,_mm_load_ps(RIGHT.xyz));
}
void XVec3::Clamp(const XVec3& v, const XVec3& min, const XVec3& max, XVec3* dst)
{
XASSERT(dst);
XASSERT(!(min.x > max.x || min.y > max.y || min.z > max.z));
_mm_store_ps(dst->xyz, _mm_clamp_ps(_mm_load_ps(v.xyz), _mm_load_ps(min.xyz), _mm_load_ps(max.xyz)));
}
void XVec3::Smooth(const XVec3& from, const XVec3& to, flt elapsedTime, flt responseTime, XVec3* dst)
{
XASSERT(dst);
if (elapsedTime > 0.0f)
{
//@NormalCode *dst = from + (to - from) * (elapsedTime / (elapsedTime + responseTime));
__m128 mFrom = _mm_load_ps(from.xyz);
_mm_store_ps(dst->xyz, _mm_add_ps(mFrom, _mm_mul_ps(_mm_sub_ps(_mm_load_ps(to.xyz), mFrom)
, _mm_set_ps1(elapsedTime / (elapsedTime + responseTime)))));
}
}
void XVec3::clamp(const XVec3& min, const XVec3& max)
{
XCHECK(!(min.x > max.x || min.y > max.y || min.z > max.z));
_mm_store_ps(this->xyz, _mm_clamp_ps(_mm_load_ps(xyz), _mm_load_ps(min.xyz), _mm_load_ps(max.xyz)));
}
void XVec3::Cross(const XVec3& a, const XVec3& b, XVec3* dst)
{
XCHECK(dst);
/*
dst->z = (a.x * b.y) - (a.y * b.x);
dst->x = (a.y * b.z) - (a.z * b.y);
dst->y = (a.z * b.x) - (a.x * b.z);
*/
__m128 ma = _mm_load_ps(a.xyz);
__m128 mb = _mm_load_ps(b.xyz);
__m128 mMul1 = _mm_mul_ps(ma, _mm_shuffle_ps(mb, mb, _MM_SHUFFLE(0,0,2,1)));
__m128 mMul2 = _mm_mul_ps(_mm_shuffle_ps(ma, ma, _MM_SHUFFLE(0,0,2,1)), mb);
__m128 mRes = _mm_sub_ps(mMul1, mMul2);
_mm_store_ps(dst->xyz, _mm_shuffle_ps(mRes, mRes, _MM_SHUFFLE(0,0,2,1)));
}
void XVec3::Lerp(const XVec3& from, const XVec3& to, flt dt, XVec3* dst)
{
//@NormalCode *dst = from + (to - from) * dt;
XASSERT(dst);
__m128 mFrom = _mm_load_ps(from.xyz);
_mm_store_ps(dst->xyz, _mm_add_ps(mFrom, _mm_mul_ps(_mm_sub_ps(_mm_load_ps(to.xyz), mFrom), _mm_set_ps1(dt))));
}
/*
void XVec3::RotateAround(const XVec3& point, const XVec3& pivot, const XQuat& rotation, XVec3* dst)
{
XASSERT(dst);
// @NormalCode
// *dst = (rotation * (point - pivot)) + pivot;
__m128 mPivot = _mm_load_ps(pivot.xyz);
__m128 mV = _mm_sub_ps(_mm_load_ps(point.xyz), mPivot);
//------------- XQuat * XVec3 ------------
__m128 mQVec = _mm_load_ps(rotation.xyzw);
__m128 mUV = _mm_vec3_cross(mQVec, mV);
__m128 mUUV = _mm_vec3_cross(mQVec, mUV);
mUV = _mm_mul_ps(mUV, _mm_set_ps1(rotation.w * 2.0f));
mUUV = _mm_mul_ps(mUUV, _mm_set_ps1(2.0f));
__m128 mQuatMulVec = _mm_add_ps(_mm_add_ps(mV, mUV), mUUV);
_mm_store_ps(dst->xyz, _mm_add_ps(mQuatMulVec, mPivot));
}
*/
#ifdef XUSE_SSE_4_1
/////////////////////////////////////////////////SSE4.1////////////////////////////////////////////////
flt XVec3::length() const
{
__m128 m = _mm_load_ps(xyz);
return _mm_vec3_length(m);
}
flt XVec3::rLength() const
{
__m128 m = _mm_load_ps(xyz);
return _mm_vec3_rlength(m);
}
flt XVec3::lengthSq() const
{
__m128 m = _mm_load_ps(xyz);
return _mm_vec3_lengthSq(m);
}
flt XVec3::rLengthSq() const
{
__m128 m = _mm_load_ps(xyz);
return _mm_vec3_rlengthSq(m);
}
void XVec3::normalize()
{
__m128 mThis = _mm_load_ps(this->xyz);
__m128 mLen = _mm_dp_ps(mThis, mThis, 0x71); //0x71 == 0111 0001
if(_mm_comigt_ss(mLen, _mm_set_ss(1.000003f)))
{
mLen = _mm_rsqrt_ss(mLen);
_mm_store_ps(this->xyz, _mm_mul_ps(mThis, _mm_allw_ps(mLen)));
}
}
void XVec3::getNormalize(XVec3* dst) const
{
XCHECK(dst);
__m128 mThis = _mm_load_ps(this->xyz);
__m128 mLen = _mm_dp_ps(mThis, mThis, 0x71); //0x71 == 0111 0001
if(_mm_comigt_ss(mLen, _mm_set_ss(1.000003f)))
{
mLen = _mm_rsqrt_ss(mLen);
_mm_store_ps(dst->xyz, _mm_mul_ps(mThis, _mm_allw_ps(mLen)));
}
}
float XVec3::dot(const XVec3& v) const
{
return _mm_vec3_dot(_mm_load_ps(this->xyz), _mm_load_ps(v.xyz));
}
float XVec3::Distance(const XVec3& a, const XVec3& b)
{
__m128 mSub = _mm_sub_ps(_mm_load_ps(a.xyz), _mm_load_ps(b.xyz));
return _mm_vec3_length(mSub);
}
float XVec3::DistanceSq(const XVec3& a, const XVec3& b)
{
__m128 mSub = _mm_sub_ps(_mm_load_ps(a.xyz), _mm_load_ps(b.xyz));
return _mm_vec3_lengthSq(mSub);
}
flt XVec3::Dot(const XVec3& a, const XVec3& b)
{
return _mm_vec3_dot(_mm_load_ps(a.xyz), _mm_load_ps(b.xyz));
}
flt XVec3::Angle(const XVec3& v1, const XVec3& v2)
{
/*
@NormalCode
float dz = v1.x * v2.y - v1.y * v2.x;
float dx = v1.y * v2.z - v1.z * v2.y;
float dy = v1.z * v2.x - v1.x * v2.z;
return atan2f(sqrt(dx * dx + dy * dy + dz * dz) + 0.0000001f, Dot(v1, v2));
*/
__m128 mV1 = _mm_load_ps(v1.xyz);
__m128 mV2 = _mm_load_ps(v2.xyz);
__m128 mSub = _mm_sub_ps(_mm_mul_ps(mV1, _mm_shuffle_ps(mV2, mV2, _MM_SHUFFLE(0,0,2,1)))
,_mm_mul_ps(mV2, _mm_shuffle_ps(mV1, mV1, _MM_SHUFFLE(0,0,2,1))));
return XAtan2(_mm_vec3_length(mSub) + 0.0000001f, _mm_vec3_dot(mV1, mV2));
}
#else //#ifdef XUSE_SSE_4_1
flt XVec3::length() const
{
__m128 mThis = _mm_load_ps(xyz);
__m128 mMul = _mm_mul_ps(mThis, mThis);
__m128 mY = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(1,1,1,1));
__m128 mZ = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(2,2,2,2));
return _mm_cvtss_f32(_mm_sqrt_ss(_mm_add_ss(_mm_add_ss(mM ul, mY), mZ)));
}
flt XVec3::rLength() const
{
__m128 mThis = _mm_load_ps(xyz);
__m128 mMul = _mm_mul_ps(mThis, mThis);
__m128 mY = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(1,1,1,1));
__m128 mZ = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(2,2,2,2));
return _mm_cvtss_f32(_mm_rsqrt_ss(_mm_add_ss(_mm_add_ss(m Mul, mY), mZ)));
}
flt XVec3::lengthSq() const
{
__m128 mThis = _mm_load_ps(xyz);
__m128 mMul = _mm_mul_ps(mThis, mThis);
__m128 mY = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(1,1,1,1));
__m128 mZ = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(2,2,2,2));
return _mm_cvtss_f32(_mm_add_ss(_mm_add_ss(mMul, mY), mZ));
}
flt XVec3::rLengthSq() const
{
__m128 mThis = _mm_load_ps(xyz);
__m128 mMul = _mm_mul_ps(mThis, mThis);
__m128 mY = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(1,1,1,1));
__m128 mZ = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(2,2,2,2));
return _mm_cvtss_f32(_mm_rcp_ss(_mm_add_ss(_mm_add_ss(mMu l, mY), mZ)));
}
void XVec3::normalize()
{
__m128 mThis = _mm_load_ps(xyz);
__m128 mMul = _mm_mul_ps(mThis, mThis);
__m128 mY = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(1,1,1,1));
__m128 mZ = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(2,2,2,2));
__m128 mLen = _mm_add_ss(_mm_add_ss(mMul, mY), mZ);
if(_mm_comigt_ss(mLen, _mm_set_ss(1.000003f)))
{
__m128 mLen = _mm_rsqrt_ss(mLen);
_mm_store_ps(this->xyz, _mm_mul_ps(mThis, _mm_allw_ps(mLen)));
}
}
void XVec3::getNormalize(XVec3* dst) const
{
XCHECK(dst);
__m128 mThis = _mm_load_ps(xyz);
__m128 mMul = _mm_mul_ps(mThis, mThis);
__m128 mY = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(1,1,1,1));
__m128 mZ = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(2,2,2,2));
__m128 mLen = _mm_add_ss(_mm_add_ss(mMul, mY), mZ);
if(_mm_comigt_ss(mLen, _mm_set_ss(1.000003f)))
{
__m128 mLen = _mm_rsqrt_ss(mLen);
_mm_store_ps(dst->xyz, _mm_mul_ps(mThis, _mm_allw_ps(mLen)));
}
}
float XVec3::dot(const XVec3& v) const
{
__m128 mMul = _mm_mul_ps(_mm_load_ps(this->xyz), _mm_load_ps(v.xyz));
__m128 mY = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(1,1,1,1));
__m128 mZ = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(2,2,2,2));
return _mm_cvtss_f32(_mm_add_ss(_mm_add_ss(mMul, mY), mZ));
}
float XVec3::Distance(const XVec3& a, const XVec3& b)
{
__m128 mSub = _mm_sub_ps(_mm_load_ps(a.xyz), _mm_load_ps(b.xyz));
__m128 mMul = _mm_mul_ps(mSub, mSub);
__m128 mY = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(1,1,1,1));
__m128 mZ = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(2,2,2,2));
return _mm_cvtss_f32(_mm_sqrt_ss(_mm_add_ss(_mm_add_ss(mM ul, mY), mZ)));
}
float XVec3::DistanceSq(const XVec3& a, const XVec3& b)
{
__m128 mSub = _mm_sub_ps(_mm_load_ps(a.xyz), _mm_load_ps(b.xyz));
__m128 mMul = _mm_mul_ps(mSub, mSub);
__m128 mY = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(1,1,1,1));
__m128 mZ = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(2,2,2,2));
return _mm_cvtss_f32(_mm_add_ss(_mm_add_ss(mMul, mY), mZ));
}
flt XVec3::Dot(const XVec3& a, const XVec3& b)
{
__m128 mMul = _mm_mul_ps(_mm_load_ps(a.xyz), _mm_load_ps(b.xyz));
__m128 mY = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(1,1,1,1));
__m128 mZ = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(2,2,2,2));
return _mm_cvtss_f32(_mm_add_ss(_mm_add_ss(mMul, mY), mZ));
}
flt XVec3::Angle(const XVec3& v1, const XVec3& v2)
{
/*
@NormalCode
float dz = v1.x * v2.y - v1.y * v2.x;
float dx = v1.y * v2.z - v1.z * v2.y;
float dy = v1.z * v2.x - v1.x * v2.z;
return atan2f(sqrt(dx * dx + dy * dy + dz * dz) + 0.0000001f, Dot(v1, v2));
*/
__m128 mV1 = _mm_load_ps(v1.xyz);
__m128 mV2 = _mm_load_ps(v2.xyz);
__m128 mSub = _mm_sub_ps(_mm_mul_ps(mV1, _mm_shuffle_ps(mV2, mV2, _MM_SHUFFLE(0,0,2,1)))
, _mm_mul_ps(mV2, _mm_shuffle_ps(mV1, mV1, _MM_SHUFFLE(0,0,2,1))));
__m128 mMul = _mm_mul_ps(mSub, mSub);
__m128 mY = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(1,1,1,1));
__m128 mZ = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(2,2,2,2));
flt fLen = _mm_cvtss_f32(_mm_sqrt_ss(_mm_add_ss(_mm_add_ss(mM ul, mY), mZ)));
mMul = _mm_mul_ps(mV1, mV2);
mY = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(1,1,1,1));
mZ = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(2,2,2,2));
flt fDot = _mm_cvtss_f32(_mm_add_ss(_mm_add_ss(mMul, mY), mZ));
return XAtan2(fLen + 0.0000001f, fDot);
}
#endif //#else //#ifdef XUSE_SSE_4_1
#endif //#ifdef XUSE_SSE
:شیطان::شیطان::شیطان:
این کلاس برداره ک کلا با SSE نوشتم
ی سری چیزا دیگه هم نوشتم بعدا کلا همه چی رو می زارم تو نت :شیطان:
دیگه ازین سریع تر نمیشه :لبخندساده:
///////////////////////////XVec3.h/////////////////////////
#ifndef __XVEC3_H__
#define __XVEC3_H__
#include "XBasic.h"
#define XALIGN16 __declspec(align(16))
struct XQuat;
//Vector3D
XALIGN16 struct XEXPORT XVec3
{
union
{
struct { flt x, y, z, unused; };
flt xyz[3];
};
//////////////////////////////////////////////////////////////////////////ctors dtors
inline XVec3(){};
inline XVec3(flt _x, flt _y, flt _z)
{
x = _x;
y = _y;
z = _z;
}
///////////////////////////////////////////////////////////////////////////operators
XVec3& operator = (const XVec3& rv);
XVec3& operator += (const XVec3& rv);
XVec3& operator -= (const XVec3& rv);
XVec3& operator *= (const XVec3& rv);
XVec3& operator /= (const XVec3& rv);
XVec3& operator += (flt f);
XVec3& operator -= (flt f);
XVec3& operator *= (flt f);
XVec3& operator /= (flt f);
XVec3 operator + (const XVec3& v) const;
XVec3 operator - (const XVec3& v) const;
XVec3 operator * (const XVec3& v) const;
XVec3 operator / (const XVec3& v) const;
XVec3 operator + (flt f) const;
XVec3 operator - (flt f) const;
XVec3 operator * (flt f) const;
XVec3 operator / (flt f) const;
XVec3 operator -() const;
inline void* operator new(size_t x) { return _aligned_malloc(x, 16); }
inline void operator delete(void* x) { _aligned_free(x); }
//////////////////////////////////////////////////////////////////////member func
void setZero();
void setOne();
void setForward();
void setUp();
void setRight();
flt length() const;
flt lengthSq() const;
//1.0f / length()
flt rLength() const;
//1.0f / lenghtSq()
flt rLengthSq() const;
void normalize();
void getNormalize(XVec3* dst) const;
inline XVec3 getNormalize() const
{
XVec3 r;
getNormalize(&r);
return r;
}
flt dot(const XVec3& v) const;
void clamp(const XVec3& min, const XVec3& max);
///////////////////////////////////////////////////////////////////////static var
//(0,1,0)
static const XVec3 UP;
//(0,0,1)
static const XVec3 FORWARD;
//(1,0,0)
static const XVec3 RIGHT;
//(1,1,1)
static const XVec3 ONE;
//(0,0,0)
static const XVec3 ZERO;
////////////////////////////////////////////////////////////////////////static func
static flt Distance(const XVec3& a, const XVec3& b);
static flt DistanceSq(const XVec3& a, const XVec3& b);
static flt Dot(const XVec3& a, const XVec3& b);
static void Cross(const XVec3& a, const XVec3& b, XVec3* dst);
static void Lerp(const XVec3& from, const XVec3& to, flt dt, XVec3* dst);
static void SLerp(const XVec3& from, const XVec3& to, flt dt, XVec3* dst); //###INCOMPLATE####
static flt Angle(const XVec3& from, const XVec3& to);
static void RotateAround(const XVec3& point, const XVec3& pivot, const XQuat& rotation, XVec3* dst);
static void RotateAroundX(const XVec3& point, flt angle, XVec3* dst);
static void RotateAroundY(const XVec3& point, flt angle, XVec3* dst);
static void RotateAroundZ(const XVec3& point, flt angle, XVec3* dst);
static void Clamp(const XVec3& v, const XVec3& min, const XVec3& max, XVec3* dst);
static void Smooth(const XVec3& from, const XVec3& to, flt elapsedTime, flt responseTime, XVec3* dst);
static void Orthonormalize(XVec3 &a, XVec3 &b);
static inline XVec3 RotateAroundX(const XVec3& point, flt angle)
{
XVec3 v;
RotateAroundX(point, angle, &v);
return v;
}
static inline XVec3 RotateAroundY(const XVec3& point, flt angle)
{
XVec3 v;
RotateAroundY(point, angle, &v);
return v;
}
static inline XVec3 RotateAroundZ(const XVec3& point, flt angle)
{
XVec3 v;
RotateAroundZ(point, angle, &v);
return v;
}
};
#endif
-
-
-
/////////////////////////////////////XVec3_SSE.cpp////////////////////////////////
#ifdef XUSE_SSE
#include "XVec3.h"
#include "XMath.h"
#include "XQuat.h"
//#include "XSIMD.h"
//SSE4.1
#include <smmintrin.h>
#define _mm_allw_ps(m128) _mm_shuffle_ps(m128,m128,_MM_SHUFFLE(0,0,0,0))
#define _mm_allx_ps(m128) _mm_shuffle_ps(m128,m128,_MM_SHUFFLE(1,1,1,1))
#define _mm_ally_ps(m128) _mm_shuffle_ps(m128,m128,_MM_SHUFFLE(2,2,2,2))
#define _mm_allz_ps(m128) _mm_shuffle_ps(m128,m128,_MM_SHUFFLE(3,3,3,3))
#define _mm_swap_ps(m128) _mm_shuffle_ps(m128, m128, 0x1B)
#define _mm_rol_ps(m128) _mm_shuffle_ps(m128, m128, 0x93)
#define _mm_ror_ps(m128) _mm_shuffle_ps(m128, m128, 0x39)
#define _mm_sqrtfast_ss(m128) _mm_rcp_ss(_mm_rsqrt_ss(m128))
#define _mm_abs_ps(m128) _mm_andnot_ps(_mm_set1_ps(-0.0f), m128))
#define _mm_neg_ps(m128) _mm_xor_ps(_mm_set1_ps(-0.0f), m128)
#define _mm_clamp_ps(m128_Val, m128_Min, m128_Max) _mm_min_ps(_mm_max_ps(m128_Val, m128_Min), m128_Max)
#define _mm_vec3_cross(m128_a, m128_b, m128_out) \
m128_out = _mm_sub_ps(_mm_mul_ps(m128_a, _mm_shuffle_ps(m128_b, m128_b, _MM_SHUFFLE(0,0,2,1))) \
, _mm_mul_ps(_mm_shuffle_ps(m128_a, m128_a, _MM_SHUFFLE(0,0,2,1)), m128_b)); \
m128_out = _mm_shuffle_ps(m128_out, m128_out, _MM_SHUFFLE(0,0,2,1)));
#define _mm_vec3_normalize_fast(m128_InOut, m128_TmpLen) \
m128_TmpLen = _mm_rsqrt_ss(_mm_dp_ps(m128_InOut, m128_InOut, 0x71)); \
m128_InOut = _mm_mul_ps(m128_InOut, _mm_shuffle_ps(m128_TmpLen, m128_TmpLen, _MM_SHUFFLE(0,0,0,0)))
#define _mm_vec3_normalize(m128_InOut, m128_TmpLen) \
m128_TmpLen = _mm_dp_ps(m128_InOut, m128_InOut, 0x71); /*get square length*/ \
if(_mm_comigt_ss(m128_TmpLen, _mm_set_ss(0.000001f))) /*is greater than 0.000001f*/ \
{ \
m128_TmpLen = _mm_rsqrt_ss(m128_TmpLen); /* 1.0f / sqrt(SquareLenght)*/ \
m128_InOut = _mm_mul_ps(m128_InOut, _mm_shuffle_ps(m128_TmpLen, m128_TmpLen, _MM_SHUFFLE(0,0,0,0))); \
}
#define _mm_vec3_length(m128) _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(m128, m128, 0x71)))
#define _mm_vec3_rlength(m128) _mm_cvtss_f32(_mm_rsqrt_ss(_mm_dp_ps(m128, m128, 0x71)))
#define _mm_vec3_lengthSq(m128) _mm_cvtss_f32(_mm_dp_ps(m128, m128, 0x71))
#define _mm_vec3_rlengthSq(m128) _mm_cvtss_f32(_mm_rcp_ss(_mm_dp_ps(m128, m128, 0x71)))
#define _mm_vec3_dot(m128_a, m128_b) _mm_cvtss_f32(_mm_dp_ps(m128_a, m128_b, 0x71))
void XVec3::RotateAroundX( const XVec3& point, flt angle, XVec3* dst )
{
flt fsin,fcos;
XSinCosDegSSE(angle, &fsin, &fcos);
dst->x = point.x;
dst->y = fcos * point.y - fsin * point.z;
dst->z = fsin * point.y + fcos * point.z;
}
void XVec3::RotateAroundY( const XVec3& point, flt angle, XVec3* dst )
{
flt fsin,fcos;
XSinCosDegSSE(angle, &fsin, &fcos);
dst->x = fsin * point.z + fcos * point.x;
dst->y = point.y;
dst->z = fcos * point.z - fsin * point.x;
}
void XVec3::RotateAroundZ( const XVec3& point, flt angle, XVec3* dst )
{
flt fsin,fcos;
XSinCosDegSSE(angle, &fsin, &fcos);
dst->x = fcos * point.x - fsin * point.y;
dst->y = fsin * point.x + fcos * point.y;
dst->z = point.z;
}
/////////////////////////////////////////////////////////////////////////////////////////////operators
XVec3& XVec3::operator = (const XVec3& rv)
{
_mm_store_ps(this->xyz, _mm_load_ps(rv.xyz));
return *this;
}
XVec3& XVec3::operator += (const XVec3& rv)
{
_mm_store_ps(xyz, _mm_add_ps(_mm_load_ps(xyz), _mm_load_ps(rv.xyz)));
return *this;
}
XVec3& XVec3::operator -= (const XVec3& rv)
{
_mm_store_ps(xyz, _mm_sub_ps(_mm_load_ps(xyz), _mm_load_ps(rv.xyz)));
return *this;
}
XVec3& XVec3::operator *= (const XVec3& rv)
{
_mm_store_ps(xyz, _mm_mul_ps(_mm_load_ps(xyz), _mm_load_ps(rv.xyz)));
return *this;
}
XVec3& XVec3::operator /= (const XVec3& rv)
{
_mm_store_ps(xyz, _mm_div_ps(_mm_load_ps(xyz), _mm_load_ps(rv.xyz)));
return *this;
}
XVec3& XVec3::operator += (flt f)
{
_mm_store_ps(xyz, _mm_add_ps(_mm_load_ps(xyz), _mm_set_ps1(f)));
return *this;
}
XVec3& XVec3::operator -= (flt f)
{
_mm_store_ps(xyz, _mm_sub_ps(_mm_load_ps(xyz), _mm_set_ps1(f)));
return *this;
}
XVec3& XVec3::operator *= (flt f)
{
_mm_store_ps(xyz, _mm_mul_ps(_mm_load_ps(xyz), _mm_set_ps1(f)));
return *this;
}
XVec3& XVec3::operator /= (flt f)
{
_mm_store_ps(xyz, _mm_div_ps(_mm_load_ps(xyz), _mm_set_ps1(f)));
return *this;
}
XVec3 XVec3::operator + (const XVec3& v) const
{
XVec3 r;
_mm_store_ps(r.xyz, _mm_add_ps(_mm_load_ps(xyz), _mm_load_ps(v.xyz)));
return r;
}
XVec3 XVec3::operator - (const XVec3& v) const
{
XVec3 r;
_mm_store_ps(r.xyz, _mm_sub_ps(_mm_load_ps(xyz), _mm_load_ps(v.xyz)));
return r;
}
XVec3 XVec3::operator * (const XVec3& v) const
{
XVec3 r;
_mm_store_ps(r.xyz, _mm_mul_ps(_mm_load_ps(xyz), _mm_load_ps(v.xyz)));
return r;
}
XVec3 XVec3::operator / (const XVec3& v) const
{
XVec3 r;
_mm_store_ps(r.xyz, _mm_div_ps(_mm_load_ps(xyz), _mm_load_ps(v.xyz)));
return r;
}
XVec3 XVec3::operator + (float f) const
{
XVec3 r;
_mm_store_ps(r.xyz, _mm_add_ps(_mm_load_ps(xyz), _mm_set_ps1(f)));
return r;
}
XVec3 XVec3::operator - (float f) const
{
XVec3 r;
_mm_store_ps(r.xyz, _mm_sub_ps(_mm_load_ps(xyz), _mm_set_ps1(f)));
return r;
}
XVec3 XVec3::operator * (float f) const
{
XVec3 r;
_mm_store_ps(r.xyz, _mm_mul_ps(_mm_load_ps(xyz), _mm_set_ps1(f)));
return r;
}
XVec3 XVec3::operator / (float f) const
{
XVec3 r;
_mm_store_ps(r.xyz, _mm_div_ps(_mm_load_ps(xyz), _mm_set_ps1(f)));
return r;
}
XVec3 XVec3::operator -() const
{
XVec3 r;
_mm_store_ps(r.xyz, _mm_neg_ps(_mm_load_ps(xyz)));
return r;
}
/////////////////////////////////////////////////////////////funcs/////////////////////////////////////
void XVec3::setZero()
{
_mm_store_ps(xyz, _mm_setzero_ps());
}
void XVec3::setOne()
{
_mm_store_ps(xyz,_mm_load_ps(ONE.xyz));
}
void XVec3::setForward()
{
_mm_store_ps(xyz,_mm_load_ps(FORWARD.xyz));
}
void XVec3::setUp()
{
_mm_store_ps(xyz,_mm_load_ps(UP.xyz));
}
void XVec3::setRight()
{
_mm_store_ps(xyz,_mm_load_ps(RIGHT.xyz));
}
void XVec3::Clamp(const XVec3& v, const XVec3& min, const XVec3& max, XVec3* dst)
{
XASSERT(dst);
XASSERT(!(min.x > max.x || min.y > max.y || min.z > max.z));
_mm_store_ps(dst->xyz, _mm_clamp_ps(_mm_load_ps(v.xyz), _mm_load_ps(min.xyz), _mm_load_ps(max.xyz)));
}
void XVec3::Smooth(const XVec3& from, const XVec3& to, flt elapsedTime, flt responseTime, XVec3* dst)
{
XASSERT(dst);
if (elapsedTime > 0.0f)
{
//@NormalCode *dst = from + (to - from) * (elapsedTime / (elapsedTime + responseTime));
__m128 mFrom = _mm_load_ps(from.xyz);
_mm_store_ps(dst->xyz, _mm_add_ps(mFrom, _mm_mul_ps(_mm_sub_ps(_mm_load_ps(to.xyz), mFrom)
, _mm_set_ps1(elapsedTime / (elapsedTime + responseTime)))));
}
}
void XVec3::clamp(const XVec3& min, const XVec3& max)
{
XCHECK(!(min.x > max.x || min.y > max.y || min.z > max.z));
_mm_store_ps(this->xyz, _mm_clamp_ps(_mm_load_ps(xyz), _mm_load_ps(min.xyz), _mm_load_ps(max.xyz)));
}
void XVec3::Cross(const XVec3& a, const XVec3& b, XVec3* dst)
{
XCHECK(dst);
/*
dst->z = (a.x * b.y) - (a.y * b.x);
dst->x = (a.y * b.z) - (a.z * b.y);
dst->y = (a.z * b.x) - (a.x * b.z);
*/
__m128 ma = _mm_load_ps(a.xyz);
__m128 mb = _mm_load_ps(b.xyz);
__m128 mMul1 = _mm_mul_ps(ma, _mm_shuffle_ps(mb, mb, _MM_SHUFFLE(0,0,2,1)));
__m128 mMul2 = _mm_mul_ps(_mm_shuffle_ps(ma, ma, _MM_SHUFFLE(0,0,2,1)), mb);
__m128 mRes = _mm_sub_ps(mMul1, mMul2);
_mm_store_ps(dst->xyz, _mm_shuffle_ps(mRes, mRes, _MM_SHUFFLE(0,0,2,1)));
}
void XVec3::Lerp(const XVec3& from, const XVec3& to, flt dt, XVec3* dst)
{
//@NormalCode *dst = from + (to - from) * dt;
XASSERT(dst);
__m128 mFrom = _mm_load_ps(from.xyz);
_mm_store_ps(dst->xyz, _mm_add_ps(mFrom, _mm_mul_ps(_mm_sub_ps(_mm_load_ps(to.xyz), mFrom), _mm_set_ps1(dt))));
}
/*
void XVec3::RotateAround(const XVec3& point, const XVec3& pivot, const XQuat& rotation, XVec3* dst)
{
XASSERT(dst);
// @NormalCode
// *dst = (rotation * (point - pivot)) + pivot;
__m128 mPivot = _mm_load_ps(pivot.xyz);
__m128 mV = _mm_sub_ps(_mm_load_ps(point.xyz), mPivot);
//------------- XQuat * XVec3 ------------
__m128 mQVec = _mm_load_ps(rotation.xyzw);
__m128 mUV = _mm_vec3_cross(mQVec, mV);
__m128 mUUV = _mm_vec3_cross(mQVec, mUV);
mUV = _mm_mul_ps(mUV, _mm_set_ps1(rotation.w * 2.0f));
mUUV = _mm_mul_ps(mUUV, _mm_set_ps1(2.0f));
__m128 mQuatMulVec = _mm_add_ps(_mm_add_ps(mV, mUV), mUUV);
_mm_store_ps(dst->xyz, _mm_add_ps(mQuatMulVec, mPivot));
}
*/
#ifdef XUSE_SSE_4_1
/////////////////////////////////////////////////SSE4.1////////////////////////////////////////////////
flt XVec3::length() const
{
__m128 m = _mm_load_ps(xyz);
return _mm_vec3_length(m);
}
flt XVec3::rLength() const
{
__m128 m = _mm_load_ps(xyz);
return _mm_vec3_rlength(m);
}
flt XVec3::lengthSq() const
{
__m128 m = _mm_load_ps(xyz);
return _mm_vec3_lengthSq(m);
}
flt XVec3::rLengthSq() const
{
__m128 m = _mm_load_ps(xyz);
return _mm_vec3_rlengthSq(m);
}
void XVec3::normalize()
{
__m128 mThis = _mm_load_ps(this->xyz);
__m128 mLen = _mm_dp_ps(mThis, mThis, 0x71); //0x71 == 0111 0001
if(_mm_comigt_ss(mLen, _mm_set_ss(1.000003f)))
{
mLen = _mm_rsqrt_ss(mLen);
_mm_store_ps(this->xyz, _mm_mul_ps(mThis, _mm_allw_ps(mLen)));
}
}
void XVec3::getNormalize(XVec3* dst) const
{
XCHECK(dst);
__m128 mThis = _mm_load_ps(this->xyz);
__m128 mLen = _mm_dp_ps(mThis, mThis, 0x71); //0x71 == 0111 0001
if(_mm_comigt_ss(mLen, _mm_set_ss(1.000003f)))
{
mLen = _mm_rsqrt_ss(mLen);
_mm_store_ps(dst->xyz, _mm_mul_ps(mThis, _mm_allw_ps(mLen)));
}
}
float XVec3::dot(const XVec3& v) const
{
return _mm_vec3_dot(_mm_load_ps(this->xyz), _mm_load_ps(v.xyz));
}
float XVec3::Distance(const XVec3& a, const XVec3& b)
{
__m128 mSub = _mm_sub_ps(_mm_load_ps(a.xyz), _mm_load_ps(b.xyz));
return _mm_vec3_length(mSub);
}
float XVec3::DistanceSq(const XVec3& a, const XVec3& b)
{
__m128 mSub = _mm_sub_ps(_mm_load_ps(a.xyz), _mm_load_ps(b.xyz));
return _mm_vec3_lengthSq(mSub);
}
flt XVec3::Dot(const XVec3& a, const XVec3& b)
{
return _mm_vec3_dot(_mm_load_ps(a.xyz), _mm_load_ps(b.xyz));
}
flt XVec3::Angle(const XVec3& v1, const XVec3& v2)
{
/*
@NormalCode
float dz = v1.x * v2.y - v1.y * v2.x;
float dx = v1.y * v2.z - v1.z * v2.y;
float dy = v1.z * v2.x - v1.x * v2.z;
return atan2f(sqrt(dx * dx + dy * dy + dz * dz) + 0.0000001f, Dot(v1, v2));
*/
__m128 mV1 = _mm_load_ps(v1.xyz);
__m128 mV2 = _mm_load_ps(v2.xyz);
__m128 mSub = _mm_sub_ps(_mm_mul_ps(mV1, _mm_shuffle_ps(mV2, mV2, _MM_SHUFFLE(0,0,2,1)))
,_mm_mul_ps(mV2, _mm_shuffle_ps(mV1, mV1, _MM_SHUFFLE(0,0,2,1))));
return XAtan2(_mm_vec3_length(mSub) + 0.0000001f, _mm_vec3_dot(mV1, mV2));
}
#else //#ifdef XUSE_SSE_4_1
flt XVec3::length() const
{
__m128 mThis = _mm_load_ps(xyz);
__m128 mMul = _mm_mul_ps(mThis, mThis);
__m128 mY = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(1,1,1,1));
__m128 mZ = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(2,2,2,2));
return _mm_cvtss_f32(_mm_sqrt_ss(_mm_add_ss(_mm_add_ss(mM ul, mY), mZ)));
}
flt XVec3::rLength() const
{
__m128 mThis = _mm_load_ps(xyz);
__m128 mMul = _mm_mul_ps(mThis, mThis);
__m128 mY = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(1,1,1,1));
__m128 mZ = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(2,2,2,2));
return _mm_cvtss_f32(_mm_rsqrt_ss(_mm_add_ss(_mm_add_ss(m Mul, mY), mZ)));
}
flt XVec3::lengthSq() const
{
__m128 mThis = _mm_load_ps(xyz);
__m128 mMul = _mm_mul_ps(mThis, mThis);
__m128 mY = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(1,1,1,1));
__m128 mZ = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(2,2,2,2));
return _mm_cvtss_f32(_mm_add_ss(_mm_add_ss(mMul, mY), mZ));
}
flt XVec3::rLengthSq() const
{
__m128 mThis = _mm_load_ps(xyz);
__m128 mMul = _mm_mul_ps(mThis, mThis);
__m128 mY = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(1,1,1,1));
__m128 mZ = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(2,2,2,2));
return _mm_cvtss_f32(_mm_rcp_ss(_mm_add_ss(_mm_add_ss(mMu l, mY), mZ)));
}
void XVec3::normalize()
{
__m128 mThis = _mm_load_ps(xyz);
__m128 mMul = _mm_mul_ps(mThis, mThis);
__m128 mY = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(1,1,1,1));
__m128 mZ = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(2,2,2,2));
__m128 mLen = _mm_add_ss(_mm_add_ss(mMul, mY), mZ);
if(_mm_comigt_ss(mLen, _mm_set_ss(1.000003f)))
{
__m128 mLen = _mm_rsqrt_ss(mLen);
_mm_store_ps(this->xyz, _mm_mul_ps(mThis, _mm_allw_ps(mLen)));
}
}
void XVec3::getNormalize(XVec3* dst) const
{
XCHECK(dst);
__m128 mThis = _mm_load_ps(xyz);
__m128 mMul = _mm_mul_ps(mThis, mThis);
__m128 mY = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(1,1,1,1));
__m128 mZ = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(2,2,2,2));
__m128 mLen = _mm_add_ss(_mm_add_ss(mMul, mY), mZ);
if(_mm_comigt_ss(mLen, _mm_set_ss(1.000003f)))
{
__m128 mLen = _mm_rsqrt_ss(mLen);
_mm_store_ps(dst->xyz, _mm_mul_ps(mThis, _mm_allw_ps(mLen)));
}
}
float XVec3::dot(const XVec3& v) const
{
__m128 mMul = _mm_mul_ps(_mm_load_ps(this->xyz), _mm_load_ps(v.xyz));
__m128 mY = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(1,1,1,1));
__m128 mZ = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(2,2,2,2));
return _mm_cvtss_f32(_mm_add_ss(_mm_add_ss(mMul, mY), mZ));
}
float XVec3::Distance(const XVec3& a, const XVec3& b)
{
__m128 mSub = _mm_sub_ps(_mm_load_ps(a.xyz), _mm_load_ps(b.xyz));
__m128 mMul = _mm_mul_ps(mSub, mSub);
__m128 mY = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(1,1,1,1));
__m128 mZ = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(2,2,2,2));
return _mm_cvtss_f32(_mm_sqrt_ss(_mm_add_ss(_mm_add_ss(mM ul, mY), mZ)));
}
float XVec3::DistanceSq(const XVec3& a, const XVec3& b)
{
__m128 mSub = _mm_sub_ps(_mm_load_ps(a.xyz), _mm_load_ps(b.xyz));
__m128 mMul = _mm_mul_ps(mSub, mSub);
__m128 mY = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(1,1,1,1));
__m128 mZ = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(2,2,2,2));
return _mm_cvtss_f32(_mm_add_ss(_mm_add_ss(mMul, mY), mZ));
}
flt XVec3::Dot(const XVec3& a, const XVec3& b)
{
__m128 mMul = _mm_mul_ps(_mm_load_ps(a.xyz), _mm_load_ps(b.xyz));
__m128 mY = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(1,1,1,1));
__m128 mZ = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(2,2,2,2));
return _mm_cvtss_f32(_mm_add_ss(_mm_add_ss(mMul, mY), mZ));
}
flt XVec3::Angle(const XVec3& v1, const XVec3& v2)
{
/*
@NormalCode
float dz = v1.x * v2.y - v1.y * v2.x;
float dx = v1.y * v2.z - v1.z * v2.y;
float dy = v1.z * v2.x - v1.x * v2.z;
return atan2f(sqrt(dx * dx + dy * dy + dz * dz) + 0.0000001f, Dot(v1, v2));
*/
__m128 mV1 = _mm_load_ps(v1.xyz);
__m128 mV2 = _mm_load_ps(v2.xyz);
__m128 mSub = _mm_sub_ps(_mm_mul_ps(mV1, _mm_shuffle_ps(mV2, mV2, _MM_SHUFFLE(0,0,2,1)))
, _mm_mul_ps(mV2, _mm_shuffle_ps(mV1, mV1, _MM_SHUFFLE(0,0,2,1))));
__m128 mMul = _mm_mul_ps(mSub, mSub);
__m128 mY = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(1,1,1,1));
__m128 mZ = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(2,2,2,2));
flt fLen = _mm_cvtss_f32(_mm_sqrt_ss(_mm_add_ss(_mm_add_ss(mM ul, mY), mZ)));
mMul = _mm_mul_ps(mV1, mV2);
mY = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(1,1,1,1));
mZ = _mm_shuffle_ps(mMul, mMul, _MM_SHUFFLE(2,2,2,2));
flt fDot = _mm_cvtss_f32(_mm_add_ss(_mm_add_ss(mMul, mY), mZ));
return XAtan2(fLen + 0.0000001f, fDot);
}
#endif //#else //#ifdef XUSE_SSE_4_1
#endif //#ifdef XUSE_SSE
:شیطان::شیطان::شیطان: