Merge pull request #248 from nillerusr/mathlib-optimize
Mathlib optimize
This commit is contained in:
commit
697a9f34f9
2
.gitignore
vendored
2
.gitignore
vendored
@ -37,5 +37,3 @@ waf3*/
|
||||
.vscode/
|
||||
.depproj/
|
||||
source-engine.sln
|
||||
hl2/
|
||||
|
||||
|
@ -33,6 +33,7 @@ CAI_PolicingBehavior::CAI_PolicingBehavior( void )
|
||||
m_bEnabled = false;
|
||||
m_nNumWarnings = 0;
|
||||
m_bTargetIsHostile = false;
|
||||
m_hPoliceGoal = NULL;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
@ -31,9 +31,9 @@ public:
|
||||
{
|
||||
MEM_ALLOC_CREDIT_( "CMatCallQueue.m_Allocator" );
|
||||
#ifdef SWDS
|
||||
m_Allocator.Init( 2*1024, 0, 0, 4 );
|
||||
m_Allocator.Init( 2*1024, 0, 0, 16 );
|
||||
#else
|
||||
m_Allocator.Init( IsX360() ? 2*1024*1024 : 8*1024*1024, 64*1024, 256*1024, 4 );
|
||||
m_Allocator.Init( IsX360() ? 2*1024*1024 : 8*1024*1024, 64*1024, 256*1024, 16 );
|
||||
#endif
|
||||
m_FunctorFactory.SetAllocator( &m_Allocator );
|
||||
m_pHead = m_pTail = NULL;
|
||||
|
@ -420,13 +420,6 @@ void MatrixGetColumn( const matrix3x4_t& in, int column, Vector &out )
|
||||
out.z = in[2][column];
|
||||
}
|
||||
|
||||
void MatrixSetColumn( const Vector &in, int column, matrix3x4_t& out )
|
||||
{
|
||||
out[0][column] = in.x;
|
||||
out[1][column] = in.y;
|
||||
out[2][column] = in.z;
|
||||
}
|
||||
|
||||
void MatrixScaleBy ( const float flScale, matrix3x4_t &out )
|
||||
{
|
||||
out[0][0] *= flScale;
|
||||
@ -1092,57 +1085,6 @@ void SetScaleMatrix( float x, float y, float z, matrix3x4_t &dst )
|
||||
dst[2][0] = 0.0f; dst[2][1] = 0.0f; dst[2][2] = z; dst[2][3] = 0.0f;
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Purpose: Builds the matrix for a counterclockwise rotation about an arbitrary axis.
|
||||
//
|
||||
// | ax2 + (1 - ax2)cosQ axay(1 - cosQ) - azsinQ azax(1 - cosQ) + aysinQ |
|
||||
// Ra(Q) = | axay(1 - cosQ) + azsinQ ay2 + (1 - ay2)cosQ ayaz(1 - cosQ) - axsinQ |
|
||||
// | azax(1 - cosQ) - aysinQ ayaz(1 - cosQ) + axsinQ az2 + (1 - az2)cosQ |
|
||||
//
|
||||
// Input : mat -
|
||||
// vAxisOrRot -
|
||||
// angle -
|
||||
//-----------------------------------------------------------------------------
|
||||
void MatrixBuildRotationAboutAxis( const Vector &vAxisOfRot, float angleDegrees, matrix3x4_t &dst )
|
||||
{
|
||||
float radians;
|
||||
float axisXSquared;
|
||||
float axisYSquared;
|
||||
float axisZSquared;
|
||||
float fSin;
|
||||
float fCos;
|
||||
|
||||
radians = angleDegrees * ( M_PI / 180.0 );
|
||||
fSin = sin( radians );
|
||||
fCos = cos( radians );
|
||||
|
||||
axisXSquared = vAxisOfRot[0] * vAxisOfRot[0];
|
||||
axisYSquared = vAxisOfRot[1] * vAxisOfRot[1];
|
||||
axisZSquared = vAxisOfRot[2] * vAxisOfRot[2];
|
||||
|
||||
// Column 0:
|
||||
dst[0][0] = axisXSquared + (1 - axisXSquared) * fCos;
|
||||
dst[1][0] = vAxisOfRot[0] * vAxisOfRot[1] * (1 - fCos) + vAxisOfRot[2] * fSin;
|
||||
dst[2][0] = vAxisOfRot[2] * vAxisOfRot[0] * (1 - fCos) - vAxisOfRot[1] * fSin;
|
||||
|
||||
// Column 1:
|
||||
dst[0][1] = vAxisOfRot[0] * vAxisOfRot[1] * (1 - fCos) - vAxisOfRot[2] * fSin;
|
||||
dst[1][1] = axisYSquared + (1 - axisYSquared) * fCos;
|
||||
dst[2][1] = vAxisOfRot[1] * vAxisOfRot[2] * (1 - fCos) + vAxisOfRot[0] * fSin;
|
||||
|
||||
// Column 2:
|
||||
dst[0][2] = vAxisOfRot[2] * vAxisOfRot[0] * (1 - fCos) + vAxisOfRot[1] * fSin;
|
||||
dst[1][2] = vAxisOfRot[1] * vAxisOfRot[2] * (1 - fCos) - vAxisOfRot[0] * fSin;
|
||||
dst[2][2] = axisZSquared + (1 - axisZSquared) * fCos;
|
||||
|
||||
// Column 3:
|
||||
dst[0][3] = 0;
|
||||
dst[1][3] = 0;
|
||||
dst[2][3] = 0;
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Computes the transpose
|
||||
//-----------------------------------------------------------------------------
|
||||
@ -1450,33 +1392,6 @@ void VectorYawRotate( const Vector &in, float flYaw, Vector &out)
|
||||
out.z = in.z;
|
||||
}
|
||||
|
||||
|
||||
|
||||
float Bias( float x, float biasAmt )
|
||||
{
|
||||
// WARNING: not thread safe
|
||||
static float lastAmt = -1;
|
||||
static float lastExponent = 0;
|
||||
if( lastAmt != biasAmt )
|
||||
{
|
||||
lastExponent = log( biasAmt ) * -1.4427f; // (-1.4427 = 1 / log(0.5))
|
||||
}
|
||||
float fRet = pow( x, lastExponent );
|
||||
Assert ( !IS_NAN( fRet ) );
|
||||
return fRet;
|
||||
}
|
||||
|
||||
|
||||
float Gain( float x, float biasAmt )
|
||||
{
|
||||
// WARNING: not thread safe
|
||||
if( x < 0.5 )
|
||||
return 0.5f * Bias( 2*x, 1-biasAmt );
|
||||
else
|
||||
return 1 - 0.5f * Bias( 2 - 2*x, 1-biasAmt );
|
||||
}
|
||||
|
||||
|
||||
float SmoothCurve( float x )
|
||||
{
|
||||
// Actual smooth curve. Visualization:
|
||||
|
1264
mathlib/vmatrix.cpp
1264
mathlib/vmatrix.cpp
File diff suppressed because it is too large
Load Diff
@ -22,10 +22,16 @@ extern float (*pfFastCos)(float x);
|
||||
|
||||
// The following are not declared as macros because they are often used in limiting situations,
|
||||
// and sometimes the compiler simply refuses to inline them for some reason
|
||||
#define FastSqrt(x) (*pfSqrt)(x)
|
||||
#define FastRSqrt(x) (*pfRSqrt)(x)
|
||||
#define FastRSqrtFast(x) (*pfRSqrtFast)(x)
|
||||
#define FastSqrt(x) sqrtf(x)
|
||||
#define FastRSqrt(x) (1.f/sqrtf(x))
|
||||
#define FastRSqrtFast(x) (1.f/sqrtf(x))
|
||||
|
||||
#ifdef _WIN32
|
||||
#define FastSinCos(x,s,c) (*pfFastSinCos)(x,s,c)
|
||||
#else
|
||||
#define FastSinCos(x,s,c) sincosf(x,s,c)
|
||||
#endif
|
||||
|
||||
#define FastCos(x) (*pfFastCos)(x)
|
||||
|
||||
#if defined(__i386__) || defined(_M_IX86)
|
||||
|
@ -30,7 +30,6 @@
|
||||
// FP exception clean so this not a turnkey operation.
|
||||
//#define FP_EXCEPTIONS_ENABLED
|
||||
|
||||
|
||||
#ifdef FP_EXCEPTIONS_ENABLED
|
||||
#include <float.h> // For _clearfp and _controlfp_s
|
||||
#endif
|
||||
@ -93,37 +92,11 @@ private:
|
||||
FPExceptionEnabler& operator=(const FPExceptionEnabler&);
|
||||
};
|
||||
|
||||
|
||||
|
||||
#ifdef DEBUG // stop crashing edit-and-continue
|
||||
FORCEINLINE float clamp( float val, float minVal, float maxVal )
|
||||
inline float clamp( const float val, const float minVal, const float maxVal )
|
||||
{
|
||||
if ( maxVal < minVal )
|
||||
return maxVal;
|
||||
else if( val < minVal )
|
||||
return minVal;
|
||||
else if( val > maxVal )
|
||||
return maxVal;
|
||||
else
|
||||
return val;
|
||||
const float t = val < minVal ? minVal : val;
|
||||
return t > maxVal ? maxVal : t;
|
||||
}
|
||||
#else // DEBUG
|
||||
FORCEINLINE float clamp( float val, float minVal, float maxVal )
|
||||
{
|
||||
#if defined(__i386__) || defined(_M_IX86)
|
||||
_mm_store_ss( &val,
|
||||
_mm_min_ss(
|
||||
_mm_max_ss(
|
||||
_mm_load_ss(&val),
|
||||
_mm_load_ss(&minVal) ),
|
||||
_mm_load_ss(&maxVal) ) );
|
||||
#else
|
||||
val = fpmax(minVal, val);
|
||||
val = fpmin(maxVal, val);
|
||||
#endif
|
||||
return val;
|
||||
}
|
||||
#endif // DEBUG
|
||||
|
||||
//
|
||||
// Returns a clamped value in the range [min, max].
|
||||
@ -131,17 +104,10 @@ FORCEINLINE float clamp( float val, float minVal, float maxVal )
|
||||
template< class T >
|
||||
inline T clamp( T const &val, T const &minVal, T const &maxVal )
|
||||
{
|
||||
if ( maxVal < minVal )
|
||||
return maxVal;
|
||||
else if( val < minVal )
|
||||
return minVal;
|
||||
else if( val > maxVal )
|
||||
return maxVal;
|
||||
else
|
||||
return val;
|
||||
const T t = val< minVal ? minVal : val;
|
||||
return t > maxVal ? maxVal : t;
|
||||
}
|
||||
|
||||
|
||||
// plane_t structure
|
||||
// !!! if this is changed, it must be changed in asm code too !!!
|
||||
// FIXME: does the asm code even exist anymore?
|
||||
@ -237,8 +203,8 @@ bool R_CullBoxSkipNear( const Vector& mins, const Vector& maxs, const Frustum_t
|
||||
|
||||
struct matrix3x4_t
|
||||
{
|
||||
matrix3x4_t() = default;
|
||||
matrix3x4_t(
|
||||
inline matrix3x4_t() = default;
|
||||
inline matrix3x4_t(
|
||||
float m00, float m01, float m02, float m03,
|
||||
float m10, float m11, float m12, float m13,
|
||||
float m20, float m21, float m22, float m23 )
|
||||
@ -252,7 +218,7 @@ struct matrix3x4_t
|
||||
// Creates a matrix where the X axis = forward
|
||||
// the Y axis = left, and the Z axis = up
|
||||
//-----------------------------------------------------------------------------
|
||||
void Init( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector &vecOrigin )
|
||||
inline void Init( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector &vecOrigin )
|
||||
{
|
||||
m_flMatVal[0][0] = xAxis.x; m_flMatVal[0][1] = yAxis.x; m_flMatVal[0][2] = zAxis.x; m_flMatVal[0][3] = vecOrigin.x;
|
||||
m_flMatVal[1][0] = xAxis.y; m_flMatVal[1][1] = yAxis.y; m_flMatVal[1][2] = zAxis.y; m_flMatVal[1][3] = vecOrigin.y;
|
||||
@ -263,26 +229,23 @@ struct matrix3x4_t
|
||||
// Creates a matrix where the X axis = forward
|
||||
// the Y axis = left, and the Z axis = up
|
||||
//-----------------------------------------------------------------------------
|
||||
matrix3x4_t( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector &vecOrigin )
|
||||
inline matrix3x4_t( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector &vecOrigin )
|
||||
{
|
||||
Init( xAxis, yAxis, zAxis, vecOrigin );
|
||||
}
|
||||
|
||||
inline void Invalidate( void )
|
||||
{
|
||||
for (int i = 0; i < 3; i++)
|
||||
for( int i=0; i < 12; i++ )
|
||||
{
|
||||
for (int j = 0; j < 4; j++)
|
||||
{
|
||||
m_flMatVal[i][j] = VEC_T_NAN;
|
||||
}
|
||||
((float*)m_flMatVal)[i] = VEC_T_NAN;
|
||||
}
|
||||
}
|
||||
|
||||
float *operator[]( int i ) { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; }
|
||||
const float *operator[]( int i ) const { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; }
|
||||
float *Base() { return &m_flMatVal[0][0]; }
|
||||
const float *Base() const { return &m_flMatVal[0][0]; }
|
||||
inline float *operator[]( int i ) { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; }
|
||||
inline const float *operator[]( int i ) const { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; }
|
||||
inline float *Base() { return &m_flMatVal[0][0]; }
|
||||
inline const float *Base() const { return &m_flMatVal[0][0]; }
|
||||
|
||||
float m_flMatVal[3][4];
|
||||
};
|
||||
@ -565,7 +528,13 @@ void MatrixInvert( const matrix3x4_t &in, matrix3x4_t &out );
|
||||
bool MatricesAreEqual( const matrix3x4_t &src1, const matrix3x4_t &src2, float flTolerance = 1e-5 );
|
||||
|
||||
void MatrixGetColumn( const matrix3x4_t &in, int column, Vector &out );
|
||||
void MatrixSetColumn( const Vector &in, int column, matrix3x4_t &out );
|
||||
|
||||
inline void MatrixSetColumn( const Vector &in, int column, matrix3x4_t& out )
|
||||
{
|
||||
out[0][column] = in.x;
|
||||
out[1][column] = in.y;
|
||||
out[2][column] = in.z;
|
||||
}
|
||||
|
||||
inline void MatrixGetTranslation( const matrix3x4_t &in, Vector &out )
|
||||
{
|
||||
@ -1079,7 +1048,19 @@ void VectorYawRotate( const Vector& in, float flYaw, Vector &out);
|
||||
// 0 1
|
||||
//
|
||||
// With a biasAmt of 0.5, Bias returns X.
|
||||
float Bias( float x, float biasAmt );
|
||||
inline float Bias( float x, float biasAmt )
|
||||
{
|
||||
// WARNING: not thread safe
|
||||
static float lastAmt = -1;
|
||||
static float lastExponent = 0;
|
||||
if( lastAmt != biasAmt )
|
||||
{
|
||||
lastExponent = log( biasAmt ) * -1.4427f; // (-1.4427 = 1 / log(0.5))
|
||||
}
|
||||
float fRet = pow( x, lastExponent );
|
||||
Assert ( !IS_NAN( fRet ) );
|
||||
return fRet;
|
||||
}
|
||||
|
||||
|
||||
// Gain is similar to Bias, but biasAmt biases towards or away from 0.5.
|
||||
@ -1111,9 +1092,14 @@ float Bias( float x, float biasAmt );
|
||||
// |*****
|
||||
// |___________________
|
||||
// 0 1
|
||||
float Gain( float x, float biasAmt );
|
||||
|
||||
|
||||
inline float Gain( float x, float biasAmt )
|
||||
{
|
||||
// WARNING: not thread safe
|
||||
if( x < 0.5 )
|
||||
return 0.5f * Bias( 2*x, 1-biasAmt );
|
||||
else
|
||||
return 1 - 0.5f * Bias( 2 - 2*x, 1-biasAmt );
|
||||
}
|
||||
// SmoothCurve maps a 0-1 value into another 0-1 value based on a cosine wave
|
||||
// where the derivatives of the function at 0 and 1 (and 0.5) are 0. This is useful for
|
||||
// any fadein/fadeout effect where it should start and end smoothly.
|
||||
|
@ -35,7 +35,7 @@ class Vector2D;
|
||||
// 4D Vector4D
|
||||
//=========================================================
|
||||
|
||||
class Vector4D
|
||||
class alignas(16) Vector4D
|
||||
{
|
||||
public:
|
||||
// Members
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1042,7 +1042,7 @@ typedef enum _D3DSHADER_PARAM_REGISTER_TYPE
|
||||
D3DSPR_FORCE_DWORD = 0x7fffffff, // force 32-bit size enum
|
||||
} D3DSHADER_PARAM_REGISTER_TYPE;
|
||||
|
||||
struct D3DMATRIX
|
||||
struct alignas(16) D3DMATRIX
|
||||
{
|
||||
union
|
||||
{
|
||||
|
@ -1042,7 +1042,7 @@ typedef enum _D3DSHADER_PARAM_REGISTER_TYPE
|
||||
D3DSPR_FORCE_DWORD = 0x7fffffff, // force 32-bit size enum
|
||||
} D3DSHADER_PARAM_REGISTER_TYPE;
|
||||
|
||||
struct D3DMATRIX
|
||||
struct alignas(16) D3DMATRIX
|
||||
{
|
||||
union
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user