1
0
mirror of https://github.com/alliedmodders/hl2sdk.git synced 2024-12-23 01:59:43 +08:00

Port GetCPUInformation and mathlib from sdk2013

This commit is contained in:
Nick Hastings 2024-04-20 13:34:13 -04:00
parent b099570391
commit 0d247b9566
35 changed files with 3188 additions and 693 deletions

View File

@ -11,6 +11,7 @@ builder.SetBuildFolder('/')
project = builder.StaticLibraryProject('mathlib')
project.sources = [
'almostequal.cpp',
'anorms.cpp',
'bumpvects.cpp',
'color_conversion.cpp',
@ -25,6 +26,7 @@ project.sources = [
'randsse.cpp',
'simdvectormatrix.cpp',
'sparse_convolution_noise.cpp',
'spherical.cpp',
'sse.cpp',
'sseconst.cpp',
'ssenoise.cpp',

View File

@ -7,6 +7,8 @@
#include "mathlib/IceKey.H"
#include <cstdint>
#include "tier0/memdbgon.h"
#ifdef _MSC_VER
#pragma warning(disable: 4244)
#endif

97
mathlib/almostequal.cpp Normal file
View File

@ -0,0 +1,97 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose: Fast ways to compare equality of two floats. Assumes
// sizeof(float) == sizeof(int) and we are using IEEE format.
//
// Source: http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
//=====================================================================================//
#include <float.h>
#include <math.h>
#include "mathlib/mathlib.h"
static inline bool AE_IsInfinite(float a)
{
const int kInfAsInt = 0x7F800000;
// An infinity has an exponent of 255 (shift left 23 positions) and
// a zero mantissa. There are two infinities - positive and negative.
if ((*(int*)&a & 0x7FFFFFFF) == kInfAsInt)
return true;
return false;
}
static inline bool AE_IsNan(float a)
{
// a NAN has an exponent of 255 (shifted left 23 positions) and
// a non-zero mantissa.
int exp = *(int*)&a & 0x7F800000;
int mantissa = *(int*)&a & 0x007FFFFF;
if (exp == 0x7F800000 && mantissa != 0)
return true;
return false;
}
static inline int AE_Sign(float a)
{
// The sign bit of a number is the high bit.
return (*(int*)&a) & 0x80000000;
}
// This is the 'final' version of the AlmostEqualUlps function.
// The optional checks are included for completeness, but in many
// cases they are not necessary, or even not desirable.
bool AlmostEqual(float a, float b, int maxUlps)
{
// There are several optional checks that you can do, depending
// on what behavior you want from your floating point comparisons.
// These checks should not be necessary and they are included
// mainly for completeness.
// If a or b are infinity (positive or negative) then
// only return true if they are exactly equal to each other -
// that is, if they are both infinities of the same sign.
// This check is only needed if you will be generating
// infinities and you don't want them 'close' to numbers
// near FLT_MAX.
if (AE_IsInfinite(a) || AE_IsInfinite(b))
return a == b;
// If a or b are a NAN, return false. NANs are equal to nothing,
// not even themselves.
// This check is only needed if you will be generating NANs
// and you use a maxUlps greater than 4 million or you want to
// ensure that a NAN does not equal itself.
if (AE_IsNan(a) || AE_IsNan(b))
return false;
// After adjusting floats so their representations are lexicographically
// ordered as twos-complement integers a very small positive number
// will compare as 'close' to a very small negative number. If this is
// not desireable, and if you are on a platform that supports
// subnormals (which is the only place the problem can show up) then
// you need this check.
// The check for a == b is because zero and negative zero have different
// signs but are equal to each other.
if (AE_Sign(a) != AE_Sign(b))
return a == b;
int aInt = *(int*)&a;
// Make aInt lexicographically ordered as a twos-complement int
if (aInt < 0)
aInt = 0x80000000 - aInt;
// Make bInt lexicographically ordered as a twos-complement int
int bInt = *(int*)&b;
if (bInt < 0)
bInt = 0x80000000 - bInt;
// Now we can compare aInt and bInt to find out how far apart a and b
// are.
int intDiff = abs(aInt - bInt);
if (intDiff <= maxUlps)
return true;
return false;
}

View File

@ -106,27 +106,23 @@ ALIGN128 float power2_n[256] = // 2**(index - 128) / 255
// You can use this to double check the exponent table and assert that
// the precomputation is correct.
#ifdef DBGFLAG_ASSERT
#ifdef _MSC_VER
#ifdef _WIN32
#pragma warning(push)
#pragma warning( disable : 4189 ) // disable unused local variable warning
#endif
#ifdef __GNUC__
__attribute__((unused)) static void CheckExponentTable()
#else
static void CheckExponentTable()
#endif
{
for( int i = 0; i < 256; i++ )
{
float testAgainst = pow( 2.0f, i - 128 ) / 255.0f;
float diff = testAgainst - power2_n[i] ;
float relativeDiff = diff / testAgainst;
Assert( sizeof(relativeDiff) > 0 && testAgainst == 0 ?
power2_n[i] < 1.16E-041 :
power2_n[i] == testAgainst );
Assert( testAgainst == 0 ?
power2_n[i] < 1.16E-041 :
power2_n[i] == testAgainst );
}
}
#ifdef _MSC_VER
#ifdef _WIN32
#pragma warning(pop)
#endif
#endif
@ -617,10 +613,10 @@ void VectorToColorRGBExp32( const Vector& vin, ColorRGBExp32 &c )
scalar = *reinterpret_cast<float *>(&fbits);
}
// we should never need to clamp:
Assert(vin.x * scalar <= 255.0f &&
vin.y * scalar <= 255.0f &&
vin.z * scalar <= 255.0f);
// We can totally wind up above 255 and that's okay--but above 256 would be right out.
Assert(vin.x * scalar < 256.0f &&
vin.y * scalar < 256.0f &&
vin.z * scalar < 256.0f);
// This awful construction is necessary to prevent VC2005 from using the
// fldcw/fnstcw control words around every float-to-unsigned-char operation.

View File

@ -6,7 +6,7 @@
//
//=============================================================================//
#include <quantize.h>
#include <tier0/basetypes.h>
#include <minmax.h>
#define N_EXTRAVALUES 1
#define N_DIMENSIONS (3+N_EXTRAVALUES)
@ -46,7 +46,7 @@ void ColorQuantize(uint8 const *Image,
val1+=PIXEL(x,y,c)*ExtraValueXForms[i*3+c];
val1>>=8;
NthSample(s,y*Width+x,N_DIMENSIONS)->Value[c]=(uint8)
(MIN(255,MAX(0,val1)));
(V_min(255,V_max(0,val1)));
}
}
struct QuantizedValue *q=Quantize(s,Width*Height,N_DIMENSIONS,
@ -76,7 +76,7 @@ void ColorQuantize(uint8 const *Image,
tryc+=Error[x][c][ErrorUse];
Error[x][c][ErrorUse]=0;
}
samp[c]=(uint8) MIN(255,MAX(0,tryc));
samp[c]=(uint8) V_min(255,V_max(0,tryc));
}
struct QuantizedValue *f=FindMatch(samp,3,Weights,q);
out_pixels[Width*y+x]=(uint8) (f->value);

View File

@ -10,7 +10,7 @@
void LightDesc_t::RecalculateDerivedValues(void)
{
m_Flags=0;
m_Flags = LIGHTTYPE_OPTIMIZATIONFLAGS_DERIVED_VALUES_CALCED;
if (m_Attenuation0)
m_Flags|=LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION0;
if (m_Attenuation1)

View File

@ -1,4 +1,4 @@
//===== Copyright © 1996-2005, Valve Corporation, All rights reserved. ======//
//===== Copyright <EFBFBD> 1996-2005, Valve Corporation, All rights reserved. ======//
//
// Purpose: Math primitives.
//
@ -17,7 +17,7 @@
#include "tier0/vprof.h"
//#define _VPROF_MATHLIB
#ifdef _MSC_VER
#ifdef _WIN32
#pragma warning(disable:4244) // "conversion from 'const int' to 'float', possible loss of data"
#pragma warning(disable:4730) // "mixing _m64 and floating point expressions may result in incorrect code"
#endif
@ -25,6 +25,7 @@
#include "mathlib/mathlib.h"
#include "mathlib/vector.h"
#if !defined( _X360 )
#include "mathlib/amd3dx.h"
#include "sse.h"
#endif
@ -426,6 +427,33 @@ void MatrixSetColumn( const Vector &in, int column, matrix3x4_t& out )
out[2][column] = in.z;
}
void MatrixScaleBy ( const float flScale, matrix3x4_t &out )
{
out[0][0] *= flScale;
out[1][0] *= flScale;
out[2][0] *= flScale;
out[0][1] *= flScale;
out[1][1] *= flScale;
out[2][1] *= flScale;
out[0][2] *= flScale;
out[1][2] *= flScale;
out[2][2] *= flScale;
}
void MatrixScaleByZero ( matrix3x4_t &out )
{
out[0][0] = 0.0f;
out[1][0] = 0.0f;
out[2][0] = 0.0f;
out[0][1] = 0.0f;
out[1][1] = 0.0f;
out[2][1] = 0.0f;
out[0][2] = 0.0f;
out[1][2] = 0.0f;
out[2][2] = 0.0f;
}
int VectorCompare (const float *v1, const float *v2)
{
@ -565,53 +593,128 @@ void ConcatRotations (const float in1[3][3], const float in2[3][3], float out[3]
in1[2][2] * in2[2][2];
}
void ConcatTransforms_Aligned( const matrix3x4_t &m0, const matrix3x4_t &m1, matrix3x4_t &out )
{
Assert( (((size_t)&m0) % 16) == 0 );
Assert( (((size_t)&m1) % 16) == 0 );
Assert( (((size_t)&out) % 16) == 0 );
fltx4 lastMask = *(fltx4 *)(&g_SIMD_ComponentMask[3]);
fltx4 rowA0 = LoadAlignedSIMD( m0.m_flMatVal[0] );
fltx4 rowA1 = LoadAlignedSIMD( m0.m_flMatVal[1] );
fltx4 rowA2 = LoadAlignedSIMD( m0.m_flMatVal[2] );
fltx4 rowB0 = LoadAlignedSIMD( m1.m_flMatVal[0] );
fltx4 rowB1 = LoadAlignedSIMD( m1.m_flMatVal[1] );
fltx4 rowB2 = LoadAlignedSIMD( m1.m_flMatVal[2] );
// now we have the rows of m0 and the columns of m1
// first output row
fltx4 A0 = SplatXSIMD(rowA0);
fltx4 A1 = SplatYSIMD(rowA0);
fltx4 A2 = SplatZSIMD(rowA0);
fltx4 mul00 = MulSIMD( A0, rowB0 );
fltx4 mul01 = MulSIMD( A1, rowB1 );
fltx4 mul02 = MulSIMD( A2, rowB2 );
fltx4 out0 = AddSIMD( mul00, AddSIMD(mul01,mul02) );
// second output row
A0 = SplatXSIMD(rowA1);
A1 = SplatYSIMD(rowA1);
A2 = SplatZSIMD(rowA1);
fltx4 mul10 = MulSIMD( A0, rowB0 );
fltx4 mul11 = MulSIMD( A1, rowB1 );
fltx4 mul12 = MulSIMD( A2, rowB2 );
fltx4 out1 = AddSIMD( mul10, AddSIMD(mul11,mul12) );
// third output row
A0 = SplatXSIMD(rowA2);
A1 = SplatYSIMD(rowA2);
A2 = SplatZSIMD(rowA2);
fltx4 mul20 = MulSIMD( A0, rowB0 );
fltx4 mul21 = MulSIMD( A1, rowB1 );
fltx4 mul22 = MulSIMD( A2, rowB2 );
fltx4 out2 = AddSIMD( mul20, AddSIMD(mul21,mul22) );
// add in translation vector
A0 = AndSIMD(rowA0,lastMask);
A1 = AndSIMD(rowA1,lastMask);
A2 = AndSIMD(rowA2,lastMask);
out0 = AddSIMD(out0, A0);
out1 = AddSIMD(out1, A1);
out2 = AddSIMD(out2, A2);
StoreAlignedSIMD( out.m_flMatVal[0], out0 );
StoreAlignedSIMD( out.m_flMatVal[1], out1 );
StoreAlignedSIMD( out.m_flMatVal[2], out2 );
}
/*
================
R_ConcatTransforms
================
*/
void ConcatTransforms (const matrix3x4_t& in1, const matrix3x4_t& in2, matrix3x4_t& out)
{
Assert( s_bMathlibInitialized );
if ( &in1 == &out )
#if 0
// test for ones that'll be 2x faster
if ( (((size_t)&in1) % 16) == 0 && (((size_t)&in2) % 16) == 0 && (((size_t)&out) % 16) == 0 )
{
matrix3x4_t in1b;
MatrixCopy( in1, in1b );
ConcatTransforms( in1b, in2, out );
ConcatTransforms_Aligned( in1, in2, out );
return;
}
if ( &in2 == &out )
{
matrix3x4_t in2b;
MatrixCopy( in2, in2b );
ConcatTransforms( in1, in2b, out );
return;
}
out[0][0] = in1[0][0] * in2[0][0] + in1[0][1] * in2[1][0] +
in1[0][2] * in2[2][0];
out[0][1] = in1[0][0] * in2[0][1] + in1[0][1] * in2[1][1] +
in1[0][2] * in2[2][1];
out[0][2] = in1[0][0] * in2[0][2] + in1[0][1] * in2[1][2] +
in1[0][2] * in2[2][2];
out[0][3] = in1[0][0] * in2[0][3] + in1[0][1] * in2[1][3] +
in1[0][2] * in2[2][3] + in1[0][3];
out[1][0] = in1[1][0] * in2[0][0] + in1[1][1] * in2[1][0] +
in1[1][2] * in2[2][0];
out[1][1] = in1[1][0] * in2[0][1] + in1[1][1] * in2[1][1] +
in1[1][2] * in2[2][1];
out[1][2] = in1[1][0] * in2[0][2] + in1[1][1] * in2[1][2] +
in1[1][2] * in2[2][2];
out[1][3] = in1[1][0] * in2[0][3] + in1[1][1] * in2[1][3] +
in1[1][2] * in2[2][3] + in1[1][3];
out[2][0] = in1[2][0] * in2[0][0] + in1[2][1] * in2[1][0] +
in1[2][2] * in2[2][0];
out[2][1] = in1[2][0] * in2[0][1] + in1[2][1] * in2[1][1] +
in1[2][2] * in2[2][1];
out[2][2] = in1[2][0] * in2[0][2] + in1[2][1] * in2[1][2] +
in1[2][2] * in2[2][2];
out[2][3] = in1[2][0] * in2[0][3] + in1[2][1] * in2[1][3] +
in1[2][2] * in2[2][3] + in1[2][3];
#endif
fltx4 lastMask = *(fltx4 *)(&g_SIMD_ComponentMask[3]);
fltx4 rowA0 = LoadUnalignedSIMD( in1.m_flMatVal[0] );
fltx4 rowA1 = LoadUnalignedSIMD( in1.m_flMatVal[1] );
fltx4 rowA2 = LoadUnalignedSIMD( in1.m_flMatVal[2] );
fltx4 rowB0 = LoadUnalignedSIMD( in2.m_flMatVal[0] );
fltx4 rowB1 = LoadUnalignedSIMD( in2.m_flMatVal[1] );
fltx4 rowB2 = LoadUnalignedSIMD( in2.m_flMatVal[2] );
// now we have the rows of m0 and the columns of m1
// first output row
fltx4 A0 = SplatXSIMD(rowA0);
fltx4 A1 = SplatYSIMD(rowA0);
fltx4 A2 = SplatZSIMD(rowA0);
fltx4 mul00 = MulSIMD( A0, rowB0 );
fltx4 mul01 = MulSIMD( A1, rowB1 );
fltx4 mul02 = MulSIMD( A2, rowB2 );
fltx4 out0 = AddSIMD( mul00, AddSIMD(mul01,mul02) );
// second output row
A0 = SplatXSIMD(rowA1);
A1 = SplatYSIMD(rowA1);
A2 = SplatZSIMD(rowA1);
fltx4 mul10 = MulSIMD( A0, rowB0 );
fltx4 mul11 = MulSIMD( A1, rowB1 );
fltx4 mul12 = MulSIMD( A2, rowB2 );
fltx4 out1 = AddSIMD( mul10, AddSIMD(mul11,mul12) );
// third output row
A0 = SplatXSIMD(rowA2);
A1 = SplatYSIMD(rowA2);
A2 = SplatZSIMD(rowA2);
fltx4 mul20 = MulSIMD( A0, rowB0 );
fltx4 mul21 = MulSIMD( A1, rowB1 );
fltx4 mul22 = MulSIMD( A2, rowB2 );
fltx4 out2 = AddSIMD( mul20, AddSIMD(mul21,mul22) );
// add in translation vector
A0 = AndSIMD(rowA0,lastMask);
A1 = AndSIMD(rowA1,lastMask);
A2 = AndSIMD(rowA2,lastMask);
out0 = AddSIMD(out0, A0);
out1 = AddSIMD(out1, A1);
out2 = AddSIMD(out2, A2);
// write to output
StoreUnalignedSIMD( out.m_flMatVal[0], out0 );
StoreUnalignedSIMD( out.m_flMatVal[1], out1 );
StoreUnalignedSIMD( out.m_flMatVal[2], out2 );
}
@ -1358,7 +1461,9 @@ float Bias( float x, float biasAmt )
{
lastExponent = log( biasAmt ) * -1.4427f; // (-1.4427 = 1 / log(0.5))
}
return pow( x, lastExponent );
float fRet = pow( x, lastExponent );
Assert ( !IS_NAN( fRet ) );
return fRet;
}
@ -1374,7 +1479,9 @@ float Gain( float x, float biasAmt )
float SmoothCurve( float x )
{
return (1 - cos( x * M_PI )) * 0.5f;
// Actual smooth curve. Visualization:
// http://www.wolframalpha.com/input/?i=plot%5B+0.5+*+%281+-+cos%5B2+*+pi+*+x%5D%29+for+x+%3D+%280%2C+1%29+%5D
return 0.5f * (1 - cos( 2.0f * M_PI * x ) );
}
@ -1566,7 +1673,9 @@ float QuaternionAngleDiff( const Quaternion &p, const Quaternion &q )
QuaternionConjugate( q, qInv );
QuaternionMult( p, qInv, diff );
float sinang = sqrt( diff.x * diff.x + diff.y * diff.y + diff.z * diff.z );
// Note if the quaternion is slightly non-normalized the square root below may be more than 1,
// the value is clamped to one otherwise it may result in asin() returning an undefined result.
float sinang = MIN( 1.0f, sqrt( diff.x * diff.x + diff.y * diff.y + diff.z * diff.z ) );
float angle = RAD2DEG( 2 * asin( sinang ) );
return angle;
#else
@ -1666,7 +1775,7 @@ void QuaternionScale( const Quaternion &p, float t, Quaternion &q )
// FIXME: nick, this isn't overly sensitive to accuracy, and it may be faster to
// use the cos part (w) of the quaternion (sin(omega)*N,cos(omega)) to figure the new scale.
float sinom = sqrt( DotProduct( &p.x, &p.x ) );
sinom = MIN( sinom, 1.f );
sinom = V_min( sinom, 1.f );
float sinsom = sin( asin( sinom ) * t );
@ -1751,7 +1860,13 @@ void QuaternionMult( const Quaternion &p, const Quaternion &q, Quaternion &qt )
void QuaternionMatrix( const Quaternion &q, const Vector &pos, matrix3x4_t& matrix )
{
Assert( pos.IsValid() );
#ifdef DBGFLAG_ASSERT
static bool s_bHushAsserts = !!CommandLine()->FindParm("-hushasserts");
if (!s_bHushAsserts)
{
Assert( pos.IsValid() );
}
#endif
QuaternionMatrix( q, matrix );
@ -1763,7 +1878,13 @@ void QuaternionMatrix( const Quaternion &q, const Vector &pos, matrix3x4_t& matr
void QuaternionMatrix( const Quaternion &q, matrix3x4_t& matrix )
{
Assert( s_bMathlibInitialized );
Assert( q.IsValid() );
#ifdef DBGFLAG_ASSERT
static bool s_bHushAsserts = !!CommandLine()->FindParm("-hushasserts");
if ( !s_bHushAsserts )
{
Assert( q.IsValid() );
}
#endif
#ifdef _VPROF_MATHLIB
VPROF_BUDGET( "QuaternionMatrix", "Mathlib" );
@ -3211,7 +3332,7 @@ void MathLib_Init( float gamma, float texGamma, float brightness, int overbright
#if !defined( _X360 )
// Grab the processor information:
const CPUInformation& pi = GetCPUInformation();
const CPUInformation& pi = *GetCPUInformation();
// Select the default generic routines.
pfSqrt = _sqrtf;
@ -3240,6 +3361,8 @@ void MathLib_Init( float gamma, float texGamma, float brightness, int overbright
{
s_bSSEEnabled = true;
#ifndef PLATFORM_WINDOWS_PC64
// These are not yet available.
// Select the SSE specific routines if available
pfVectorNormalize = _VectorNormalize;
pfVectorNormalizeFast = _SSE_VectorNormalizeFast;
@ -3247,7 +3370,8 @@ void MathLib_Init( float gamma, float texGamma, float brightness, int overbright
pfSqrt = _SSE_Sqrt;
pfRSqrt = _SSE_RSqrtAccurate;
pfRSqrtFast = _SSE_RSqrtFast;
#ifdef _WIN32
#endif
#ifdef PLATFORM_WINDOWS_PC32
pfFastSinCos = _SSE_SinCos;
pfFastCos = _SSE_cos;
#endif
@ -3260,7 +3384,7 @@ void MathLib_Init( float gamma, float texGamma, float brightness, int overbright
if ( bAllowSSE2 && pi.m_bSSE2 )
{
s_bSSE2Enabled = true;
#ifdef _WIN32
#ifdef PLATFORM_WINDOWS_PC32
pfFastSinCos = _SSE2_SinCos;
pfFastCos = _SSE2_cos;
#endif
@ -3269,7 +3393,7 @@ void MathLib_Init( float gamma, float texGamma, float brightness, int overbright
{
s_bSSE2Enabled = false;
}
#endif
#endif // !_X360
s_bMathlibInitialized = true;
@ -3920,10 +4044,10 @@ void CalcTriangleTangentSpace( const Vector &p0, const Vector &p1, const Vector
//-----------------------------------------------------------------------------
void RGBtoHSV( const Vector &rgb, Vector &hsv )
{
float flMax = MAX( rgb.x, rgb.y );
flMax = MAX( flMax, rgb.z );
float flMin = MIN( rgb.x, rgb.y );
flMin = MIN( flMin, rgb.z );
float flMax = V_max( rgb.x, rgb.y );
flMax = V_max( flMax, rgb.z );
float flMin = V_min( rgb.x, rgb.y );
flMin = V_min( flMin, rgb.z );
// hsv.z is the value
hsv.z = flMax;
@ -4070,3 +4194,44 @@ void GetInterpolationData( float const *pKnotPositions,
*pInterpolationValue = FLerp( 0, 1, 0, flSizeOfGap, flOffsetFromStartOfGap );
return;
}
float RandomVectorInUnitSphere( Vector *pVector )
{
// Guarantee uniform random distribution within a sphere
// Graphics gems III contains this algorithm ("Nonuniform random point sets via warping")
float u = ((float)rand() / VALVE_RAND_MAX);
float v = ((float)rand() / VALVE_RAND_MAX);
float w = ((float)rand() / VALVE_RAND_MAX);
float flPhi = acos( 1 - 2 * u );
float flTheta = 2 * M_PI * v;
float flRadius = powf( w, 1.0f / 3.0f );
float flSinPhi, flCosPhi;
float flSinTheta, flCosTheta;
SinCos( flPhi, &flSinPhi, &flCosPhi );
SinCos( flTheta, &flSinTheta, &flCosTheta );
pVector->x = flRadius * flSinPhi * flCosTheta;
pVector->y = flRadius * flSinPhi * flSinTheta;
pVector->z = flRadius * flCosPhi;
return flRadius;
}
float RandomVectorInUnitCircle( Vector2D *pVector )
{
// Guarantee uniform random distribution within a sphere
// Graphics gems III contains this algorithm ("Nonuniform random point sets via warping")
float u = ((float)rand() / VALVE_RAND_MAX);
float v = ((float)rand() / VALVE_RAND_MAX);
float flTheta = 2 * M_PI * v;
float flRadius = powf( u, 1.0f / 2.0f );
float flSinTheta, flCosTheta;
SinCos( flTheta, &flSinTheta, &flCosTheta );
pVector->x = flRadius * flCosTheta;
pVector->y = flRadius * flSinTheta;
return flRadius;
}

View File

@ -34,7 +34,6 @@ CPolyhedron *ConvertLinkedGeometryToPolyhedron( GeneratePolyhedronFromPlanes_Uno
//#define DEBUG_DUMP_POLYHEDRONS_TO_NUMBERED_GLVIEWS //dumps successfully generated polyhedrons
#ifdef _DEBUG
#include "filesystem.h"
void DumpPolyhedronToGLView( const CPolyhedron *pPolyhedron, const char *pFilename, const VMatrix *pTransform );
void DumpPlaneToGlView( const float *pPlane, float fGrayScale, const char *pszFileName, const VMatrix *pTransform );
void DumpLineToGLView( const Vector &vPoint1, const Vector &vColor1, const Vector &vPoint2, const Vector &vColor2, float fThickness, FILE *pFile );
@ -103,19 +102,19 @@ CPolyhedron_AllocByNew *CPolyhedron_AllocByNew::Allocate( unsigned short iVertic
class CPolyhedron_TempMemory : public CPolyhedron
{
public:
#ifdef _DEBUG
#ifdef DBGFLAG_ASSERT
int iReferenceCount;
#endif
virtual void Release( void )
{
#ifdef _DEBUG
#ifdef DBGFLAG_ASSERT
--iReferenceCount;
#endif
}
CPolyhedron_TempMemory( void )
#ifdef _DEBUG
#ifdef DBGFLAG_ASSERT
: iReferenceCount( 0 )
#endif
{ };
@ -128,7 +127,7 @@ static CPolyhedron_TempMemory s_TempMemoryPolyhedron;
CPolyhedron *GetTempPolyhedron( unsigned short iVertices, unsigned short iLines, unsigned short iIndices, unsigned short iPolygons ) //grab the temporary polyhedron. Avoids new/delete for quick work. Can only be in use by one chunk of code at a time
{
AssertMsg( s_TempMemoryPolyhedron.iReferenceCount == 0, "Temporary polyhedron memory being rewritten before released" );
#ifdef _DEBUG
#ifdef DBGFLAG_ASSERT
++s_TempMemoryPolyhedron.iReferenceCount;
#endif
s_TempMemoryPolyhedron_Buffer.SetCount( (sizeof( Vector ) * iVertices) +
@ -857,8 +856,8 @@ const char * DumpPolyhedronCutHistory( const CUtlVector<CPolyhedron *> &DumpedHi
#else
#define AssertMsg_DumpPolyhedron(condition, message)
#define Assert_DumpPolyhedron(condition)
#define AssertMsg_DumpPolyhedron(condition, message) NULL;
#define Assert_DumpPolyhedron(condition) NULL;
#endif

View File

@ -6,6 +6,10 @@
#include "mathlib/ssemath.h"
// NOTE: This has to be the last file included!
#include "tier0/memdbgon.h"
fltx4 Pow_FixedPoint_Exponent_SIMD( const fltx4 & x, int exponent)
{
fltx4 rslt=Four_Ones; // x^0=1.0
@ -32,8 +36,61 @@ fltx4 Pow_FixedPoint_Exponent_SIMD( const fltx4 & x, int exponent)
break;
}
if (exponent<0)
return ReciprocalEstSIMD(rslt); // pow(x,-b)=1/pow(x,b)
return ReciprocalEstSaturateSIMD(rslt); // pow(x,-b)=1/pow(x,b)
else
return rslt;
}
/*
* (c) Ian Stephenson
*
* ian@dctsystems.co.uk
*
* Fast pow() reference implementation
*/
static float shift23=(1<<23);
static float OOshift23=1.0/(1<<23);
float FastLog2(float i)
{
float LogBodge=0.346607f;
float x;
float y;
x=*(int *)&i;
x*= OOshift23; //1/pow(2,23);
x=x-127;
y=x-floorf(x);
y=(y-y*y)*LogBodge;
return x+y;
}
float FastPow2(float i)
{
float PowBodge=0.33971f;
float x;
float y=i-floorf(i);
y=(y-y*y)*PowBodge;
x=i+127-y;
x*= shift23; //pow(2,23);
*(int*)&x=(int)x;
return x;
}
float FastPow(float a, float b)
{
if (a <= OOshift23)
{
return 0.0f;
}
return FastPow2(b*FastLog2(a));
}
float FastPow10( float i )
{
return FastPow2( i * 3.321928f );
}

View File

@ -18,11 +18,10 @@
#endif
#include <stdlib.h>
#include <minmax.h>
#include <math.h>
#include <tier0/basetypes.h>
static int current_ndims;
static struct QuantizedValue *current_root;
static int current_ssize;
@ -412,8 +411,8 @@ static void Label(struct QuantizedValue *q, int updatecolor)
else
for(int i=0;i<current_ndims;i++)
{
q->Mins[i]=MIN(q->Children[0]->Mins[i],q->Children[1]->Mins[i]);
q->Maxs[i]=MAX(q->Children[0]->Maxs[i],q->Children[1]->Maxs[i]);
q->Mins[i]=V_min(q->Children[0]->Mins[i],q->Children[1]->Mins[i]);
q->Maxs[i]=V_max(q->Children[0]->Maxs[i],q->Children[1]->Maxs[i]);
}
}
}

View File

@ -48,7 +48,7 @@ void CSIMDVectorMatrix::CreateFromRGBA_FloatImageData(int srcwidth, int srcheigh
{
for(int cp=0;cp<4; cp++)
{
int real_cp=MIN( cp, ntrailing_pixels_per_source_line-1 );
int real_cp=V_min( cp, ntrailing_pixels_per_source_line-1 );
data_out[4*c+cp]= data_in[c+4*real_cp];
}
}

124
mathlib/spherical.cpp Normal file
View File

@ -0,0 +1,124 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose: spherical math routines
//
//=====================================================================================//
#include <math.h>
#include <float.h> // Needed for FLT_EPSILON
#include "basetypes.h"
#include <memory.h>
#include "tier0/dbg.h"
#include "mathlib/mathlib.h"
#include "mathlib/vector.h"
#include "mathlib/spherical_geometry.h"
// memdbgon must be the last include file in a .cpp file!!!
#include "tier0/memdbgon.h"
float s_flFactorials[]={
1.,
1.,
2.,
6.,
24.,
120.,
720.,
5040.,
40320.,
362880.,
3628800.,
39916800.,
479001600.,
6227020800.,
87178291200.,
1307674368000.,
20922789888000.,
355687428096000.,
6402373705728000.,
121645100408832000.,
2432902008176640000.,
51090942171709440000.,
1124000727777607680000.,
25852016738884976640000.,
620448401733239439360000.,
15511210043330985984000000.,
403291461126605635584000000.,
10888869450418352160768000000.,
304888344611713860501504000000.,
8841761993739701954543616000000.,
265252859812191058636308480000000.,
8222838654177922817725562880000000.,
263130836933693530167218012160000000.,
8683317618811886495518194401280000000.
};
float AssociatedLegendrePolynomial( int nL, int nM, float flX )
{
// evaluate associated legendre polynomial at flX, using recurrence relation
float flPmm = 1.;
if ( nM > 0 )
{
float flSomX2 = sqrt( ( 1 - flX ) * ( 1 + flX ) );
float flFact = 1.;
for( int i = 0 ; i < nM; i++ )
{
flPmm *= -flFact * flSomX2;
flFact += 2.0;
}
}
if ( nL == nM )
return flPmm;
float flPmmp1 = flX * ( 2.0 * nM + 1.0 ) * flPmm;
if ( nL == nM + 1 )
return flPmmp1;
float flPll = 0.;
for( int nLL = nM + 2 ; nLL <= nL; nLL++ )
{
flPll = ( ( 2.0 * nLL - 1.0 ) * flX * flPmmp1 - ( nLL + nM - 1.0 ) * flPmm ) * ( 1.0 / ( nLL - nM ) );
flPmm = flPmmp1;
flPmmp1 = flPll;
}
return flPll;
}
static float SHNormalizationFactor( int nL, int nM )
{
double flTemp = ( ( 2. * nL + 1.0 ) * s_flFactorials[ nL - nM ] )/ ( 4. * M_PI * s_flFactorials[ nL + nM ] );
return sqrt( flTemp );
}
#define SQRT_2 1.414213562373095
FORCEINLINE float SphericalHarmonic( int nL, int nM, float flTheta, float flPhi, float flCosTheta )
{
if ( nM == 0 )
return SHNormalizationFactor( nL, 0 ) * AssociatedLegendrePolynomial( nL, nM, flCosTheta );
if ( nM > 0 )
return SQRT_2 * SHNormalizationFactor( nL, nM ) * cos ( nM * flPhi ) *
AssociatedLegendrePolynomial( nL, nM, flCosTheta );
return
SQRT_2 * SHNormalizationFactor( nL, -nM ) * sin( -nM * flPhi ) * AssociatedLegendrePolynomial( nL, -nM, flCosTheta );
}
float SphericalHarmonic( int nL, int nM, float flTheta, float flPhi )
{
return SphericalHarmonic( nL, nM, flTheta, flPhi, cos( flTheta ) );
}
float SphericalHarmonic( int nL, int nM, Vector const &vecDirection )
{
Assert( fabs( VectorLength( vecDirection ) - 1.0 ) < 0.0001 );
float flPhi = acos( vecDirection.z );
float flTheta = 0;
float S = Square( vecDirection.x ) + Square( vecDirection.y );
if ( S > 0 )
{
flTheta = atan2( vecDirection.y, vecDirection.x );
}
return SphericalHarmonic( nL, nM, flTheta, flPhi, cos( flTheta ) );
}

View File

@ -1,4 +1,4 @@
//========= Copyright © 1996-2005, Valve Corporation, All rights reserved. ============//
//========= Copyright <EFBFBD> 1996-2005, Valve Corporation, All rights reserved. ============//
//
// Purpose: SSE Math primitives.
//
@ -16,7 +16,10 @@
// memdbgon must be the last include file in a .cpp file!!!
#include "tier0/memdbgon.h"
#if defined ( _WIN32 ) && !defined ( _WIN64 )
#ifndef COMPILER_MSVC64
// Implement for 64-bit Windows if needed.
#ifdef _WIN32
static const uint32 _sincos_masks[] = { (uint32)0x0, (uint32)~0x0 };
static const uint32 _sincos_inv_masks[] = { (uint32)~0x0, (uint32)0x0 };
#endif
@ -37,21 +40,21 @@ static const uint32 _sincos_inv_masks[] = { (uint32)~0x0, (uint32)0x0 };
#define _PS_CONST(Name, Val) \
static const __declspec(align(16)) float _ps_##Name[4] = { Val, Val, Val, Val }
#elif defined _LINUX || defined __APPLE__
#elif POSIX
#define _PS_EXTERN_CONST(Name, Val) \
const __attribute__((aligned(16))) float _ps_##Name[4] = { Val, Val, Val, Val }
const float _ps_##Name[4] __attribute__((aligned(16))) = { Val, Val, Val, Val }
#define _PS_EXTERN_CONST_TYPE(Name, Type, Val) \
const __attribute__((aligned(16))) Type _ps_##Name[4] = { Val, Val, Val, Val }; \
const Type _ps_##Name[4] __attribute__((aligned(16))) = { Val, Val, Val, Val }; \
#define _EPI32_CONST(Name, Val) \
static const __attribute__((aligned(16))) int32 _epi32_##Name[4] = { Val, Val, Val, Val }
static const int32 _epi32_##Name[4] __attribute__((aligned(16))) = { Val, Val, Val, Val }
#define _PS_CONST(Name, Val) \
static const __attribute__((aligned(16))) float _ps_##Name[4] = { Val, Val, Val, Val }
static const float _ps_##Name[4] __attribute__((aligned(16))) = { Val, Val, Val, Val }
#endif
#if defined ( _WIN32 ) && !defined ( _WIN64 )
#ifdef _WIN32
_PS_EXTERN_CONST(am_0, 0.0f);
_PS_EXTERN_CONST(am_1, 1.0f);
_PS_EXTERN_CONST(am_m1, -1.0f);
@ -62,8 +65,8 @@ _PS_EXTERN_CONST(am_pi_o_2, (float)(M_PI / 2.0));
_PS_EXTERN_CONST(am_2_o_pi, (float)(2.0 / M_PI));
_PS_EXTERN_CONST(am_pi_o_4, (float)(M_PI / 4.0));
_PS_EXTERN_CONST(am_4_o_pi, (float)(4.0 / M_PI));
_PS_EXTERN_CONST_TYPE(am_sign_mask, int32, (int32)0x80000000);
_PS_EXTERN_CONST_TYPE(am_inv_sign_mask, int32, ~0x80000000);
_PS_EXTERN_CONST_TYPE(am_sign_mask, int32, static_cast<int32>(0x80000000));
_PS_EXTERN_CONST_TYPE(am_inv_sign_mask, int32, static_cast<int32>(~0x80000000));
_PS_EXTERN_CONST_TYPE(am_min_norm_pos,int32, 0x00800000);
_PS_EXTERN_CONST_TYPE(am_mant_mask, int32, 0x7f800000);
_PS_EXTERN_CONST_TYPE(am_inv_mant_mask, int32, ~0x7f800000);
@ -86,9 +89,6 @@ void __cdecl _SSE_VectorMA( const float *start, float scale, const float *direc
//-----------------------------------------------------------------------------
float _SSE_Sqrt(float x)
{
#if defined( _WIN64 )
return std::sqrt(x);
#else
Assert( s_bMathlibInitialized );
float root = 0.f;
#ifdef _WIN32
@ -97,17 +97,10 @@ float _SSE_Sqrt(float x)
sqrtss xmm0, x
movss root, xmm0
}
#elif defined _LINUX || defined __APPLE__
__asm__ __volatile__(
"movss %1,%%xmm2\n"
"sqrtss %%xmm2,%%xmm1\n"
"movss %%xmm1,%0"
: "=m" (root)
: "m" (x)
);
#elif POSIX
_mm_store_ss( &root, _mm_sqrt_ss( _mm_load_ss( &x ) ) );
#endif
return root;
#endif // _WIN64
}
// Single iteration NewtonRaphson reciprocal square root:
@ -128,17 +121,21 @@ float _SSE_RSqrtAccurate(float x)
return (0.5f * rroot) * (3.f - (x * rroot) * rroot);
}
#else
#ifdef POSIX
const __m128 f3 = _mm_set_ss(3.0f); // 3 as SSE value
const __m128 f05 = _mm_set_ss(0.5f); // 0.5 as SSE value
#endif
// Intel / Kipps SSE RSqrt. Significantly faster than above.
float _SSE_RSqrtAccurate(float a)
{
#if defined( _WIN64 )
return std::sqrt(a);
#else
#ifdef _WIN32
float x;
float half = 0.5f;
float three = 3.f;
#ifdef _WIN32
__asm
{
movss xmm3, a;
@ -154,27 +151,25 @@ float _SSE_RSqrtAccurate(float a)
movss x, xmm1;
}
#elif defined _LINUX || defined __APPLE__
__asm__ __volatile__(
"movss %1, %%xmm3 \n\t"
"movss %2, %%xmm1 \n\t"
"movss %3, %%xmm2 \n\t"
"rsqrtss %%xmm3, %%xmm0 \n\t"
"mulss %%xmm0, %%xmm3 \n\t"
"mulss %%xmm0, %%xmm1 \n\t"
"mulss %%xmm0, %%xmm3 \n\t"
"subss %%xmm3, %%xmm2 \n\t"
"mulss %%xmm2, %%xmm1 \n\t"
"movss %%xmm1, %0 \n\t"
: "=m" (x)
: "m" (a), "m" (half), "m" (three)
);
return x;
#elif POSIX
__m128 xx = _mm_load_ss( &a );
__m128 xr = _mm_rsqrt_ss( xx );
__m128 xt;
xt = _mm_mul_ss( xr, xr );
xt = _mm_mul_ss( xt, xx );
xt = _mm_sub_ss( f3, xt );
xt = _mm_mul_ss( xt, f05 );
xr = _mm_mul_ss( xr, xt );
_mm_store_ss( &a, xr );
return a;
#else
#error "Not Implemented"
#endif
return x;
#endif // _WIN64
}
#endif
@ -182,54 +177,40 @@ float _SSE_RSqrtAccurate(float a)
// or so, so ok for closed transforms. (ie, computing lighting normals)
float _SSE_RSqrtFast(float x)
{
#if defined( _WIN64 )
return std::sqrt(x);
#else
Assert( s_bMathlibInitialized );
float rroot = 0.0f;
float rroot;
#ifdef _WIN32
_asm
{
rsqrtss xmm0, x
movss rroot, xmm0
}
#elif defined _LINUX || defined __APPLE__
__asm__ __volatile__(
"rsqrtss %1, %%xmm0 \n\t"
"movss %%xmm0, %0 \n\t"
: "=m" (x)
: "m" (rroot)
: "%xmm0"
);
#elif POSIX
__asm__ __volatile__( "rsqrtss %0, %1" : "=x" (rroot) : "x" (x) );
#else
#error
#endif
return rroot;
#endif // _WIN64
}
float FASTCALL _SSE_VectorNormalize (Vector& vec)
{
#if defined( _WIN64 )
float l = std::sqrt(vec.x * vec.x + vec.y * vec.y + vec.z * vec.z);
vec.x /= l;
vec.y /= l;
vec.z /= l;
return l;
#else
Assert( s_bMathlibInitialized );
// NOTE: This is necessary to prevent an memory overwrite...
// sice vec only has 3 floats, we can't "movaps" directly into it.
#ifdef _WIN32
__declspec(align(16)) float result[4];
#elif defined _LINUX || defined __APPLE__
__attribute__((aligned(16))) float result[4];
#elif POSIX
float result[4] __attribute__((aligned(16)));
#endif
float *v = &vec[0];
#ifdef _WIN32
float *r = &result[0];
#endif
float radius = 0.f;
// Blah, get rid of these comparisons ... in reality, if you have all 3 as zero, it shouldn't
@ -237,7 +218,6 @@ float FASTCALL _SSE_VectorNormalize (Vector& vec)
if ( v[0] || v[1] || v[2] )
{
#ifdef _WIN32
float *r = &result[0];
_asm
{
mov eax, v
@ -262,7 +242,7 @@ float FASTCALL _SSE_VectorNormalize (Vector& vec)
mulps xmm4, xmm1 // r4 = vx * 1/radius, vy * 1/radius, vz * 1/radius, X
movaps [edx], xmm4 // v = vx * 1/radius, vy * 1/radius, vz * 1/radius, X
}
#elif defined _LINUX || defined __APPLE__
#elif POSIX
__asm__ __volatile__(
#ifdef ALIGNED_VECTOR
"movaps %2, %%xmm4 \n\t"
@ -285,6 +265,7 @@ float FASTCALL _SSE_VectorNormalize (Vector& vec)
"movaps %%xmm4, %1 \n\t"
: "=m" (radius), "=m" (result)
: "m" (*v)
: "xmm1", "xmm2", "xmm3", "xmm4"
);
#else
#error "Not Implemented"
@ -296,7 +277,6 @@ float FASTCALL _SSE_VectorNormalize (Vector& vec)
}
return radius;
#endif // _WIN64
}
void FASTCALL _SSE_VectorNormalizeFast (Vector& vec)
@ -310,10 +290,6 @@ void FASTCALL _SSE_VectorNormalizeFast (Vector& vec)
float _SSE_InvRSquared(const float* v)
{
#if defined( _WIN64 )
float r2 = DotProduct(v, v);
return r2 < 1.f ? 1.f : 1/r2;
#else
float inv_r2 = 1.f;
#ifdef _WIN32
_asm { // Intel SSE only routine
@ -331,12 +307,13 @@ float _SSE_InvRSquared(const float* v)
shufps xmm2, xmm2, 1 // x2 = vy * vy, X, X, X
addss xmm1, xmm2 // x1 = (vx * vx) + (vy * vy), X, X, X
addss xmm1, xmm3 // x1 = (vx * vx) + (vy * vy) + (vz * vz), X, X, X
maxss xmm1, xmm5 // x1 = MAX( 1.0, x1 )
rcpss xmm0, xmm1 // x0 = 1 / MAX( 1.0, x1 )
maxss xmm1, xmm5 // x1 = max( 1.0, x1 )
rcpss xmm0, xmm1 // x0 = 1 / max( 1.0, x1 )
movss inv_r2, xmm0 // inv_r2 = x0
}
#elif defined _LINUX || defined __APPLE__
#elif POSIX
__asm__ __volatile__(
"movss %0, %%xmm5 \n\t"
#ifdef ALIGNED_VECTOR
"movaps %1, %%xmm4 \n\t"
#else
@ -352,23 +329,64 @@ float _SSE_InvRSquared(const float* v)
"maxss %%xmm5, %%xmm1 \n\t"
"rcpss %%xmm1, %%xmm0 \n\t"
"movss %%xmm0, %0 \n\t"
: "=m" (inv_r2)
: "m" (*v), "m" (inv_r2)
: "+m" (inv_r2)
: "m" (*v)
: "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
#else
#error "Not Implemented"
#endif
return inv_r2;
#endif // _WIN64
}
#ifdef POSIX
// #define _PS_CONST(Name, Val) static const ALIGN16 float _ps_##Name[4] ALIGN16_POST = { Val, Val, Val, Val }
#define _PS_CONST_TYPE(Name, Type, Val) static const ALIGN16 Type _ps_##Name[4] ALIGN16_POST = { static_cast<Type>(Val), static_cast<Type>(Val), static_cast<Type>(Val), static_cast<Type>(Val) }
_PS_CONST_TYPE(sign_mask, int, 0x80000000);
_PS_CONST_TYPE(inv_sign_mask, int, ~0x80000000);
#define _PI32_CONST(Name, Val) static const ALIGN16 int _pi32_##Name[4] ALIGN16_POST = { Val, Val, Val, Val }
_PI32_CONST(1, 1);
_PI32_CONST(inv1, ~1);
_PI32_CONST(2, 2);
_PI32_CONST(4, 4);
#ifdef _WIN32
_PI32_CONST(0x7f, 0x7f);
#endif
_PS_CONST(1 , 1.0f);
_PS_CONST(0p5, 0.5f);
_PS_CONST(minus_cephes_DP1, -0.78515625);
_PS_CONST(minus_cephes_DP2, -2.4187564849853515625e-4);
_PS_CONST(minus_cephes_DP3, -3.77489497744594108e-8);
_PS_CONST(sincof_p0, -1.9515295891E-4);
_PS_CONST(sincof_p1, 8.3321608736E-3);
_PS_CONST(sincof_p2, -1.6666654611E-1);
_PS_CONST(coscof_p0, 2.443315711809948E-005);
_PS_CONST(coscof_p1, -1.388731625493765E-003);
_PS_CONST(coscof_p2, 4.166664568298827E-002);
_PS_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI
typedef union xmm_mm_union {
__m128 xmm;
__m64 mm[2];
} xmm_mm_union;
#define COPY_MM_TO_XMM(mm0_, mm1_, xmm_) { xmm_mm_union u; u.mm[0]=mm0_; u.mm[1]=mm1_; xmm_ = u.xmm; }
typedef __m128 v4sf; // vector of 4 float (sse1)
typedef __m64 v2si; // vector of 2 int (mmx)
#endif
void _SSE_SinCos(float x, float* s, float* c)
{
#if defined( _WIN64 )
*s = std::sin(x);
*c = std::cos(x);
#elif defined( _WIN32 )
#ifdef _WIN32
float t4, t8, t12;
__asm
@ -453,8 +471,121 @@ void _SSE_SinCos(float x, float* s, float* c)
movss [eax], xmm0
movss [edx], xmm4
}
#elif defined _LINUX || defined __APPLE__
// #warning "_SSE_sincos NOT implemented!"
#elif POSIX
Assert( "Needs testing, verify impl!\n" );
v4sf xx = _mm_load_ss( &x );
v4sf xmm1, xmm2, xmm3 = _mm_setzero_ps(), sign_bit_sin, y;
v2si mm0, mm1, mm2, mm3, mm4, mm5;
sign_bit_sin = xx;
/* take the absolute value */
xx = _mm_and_ps(xx, *(v4sf*)_ps_inv_sign_mask);
/* extract the sign bit (upper one) */
sign_bit_sin = _mm_and_ps(sign_bit_sin, *(v4sf*)_ps_sign_mask);
/* scale by 4/Pi */
y = _mm_mul_ps(xx, *(v4sf*)_ps_cephes_FOPI);
/* store the integer part of y in mm2:mm3 */
xmm3 = _mm_movehl_ps(xmm3, y);
mm2 = _mm_cvttps_pi32(y);
mm3 = _mm_cvttps_pi32(xmm3);
/* j=(j+1) & (~1) (see the cephes sources) */
mm2 = _mm_add_pi32(mm2, *(v2si*)_pi32_1);
mm3 = _mm_add_pi32(mm3, *(v2si*)_pi32_1);
mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_inv1);
mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_inv1);
y = _mm_cvtpi32x2_ps(mm2, mm3);
mm4 = mm2;
mm5 = mm3;
/* get the swap sign flag for the sine */
mm0 = _mm_and_si64(mm2, *(v2si*)_pi32_4);
mm1 = _mm_and_si64(mm3, *(v2si*)_pi32_4);
mm0 = _mm_slli_pi32(mm0, 29);
mm1 = _mm_slli_pi32(mm1, 29);
v4sf swap_sign_bit_sin;
COPY_MM_TO_XMM(mm0, mm1, swap_sign_bit_sin);
/* get the polynom selection mask for the sine */
mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_2);
mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_2);
mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
v4sf poly_mask;
COPY_MM_TO_XMM(mm2, mm3, poly_mask);
/* The magic pass: "Extended precision modular arithmetic"
x = ((x - y * DP1) - y * DP2) - y * DP3; */
xmm1 = *(v4sf*)_ps_minus_cephes_DP1;
xmm2 = *(v4sf*)_ps_minus_cephes_DP2;
xmm3 = *(v4sf*)_ps_minus_cephes_DP3;
xmm1 = _mm_mul_ps(y, xmm1);
xmm2 = _mm_mul_ps(y, xmm2);
xmm3 = _mm_mul_ps(y, xmm3);
xx = _mm_add_ps(xx, xmm1);
xx = _mm_add_ps(xx, xmm2);
xx = _mm_add_ps(xx, xmm3);
/* get the sign flag for the cosine */
mm4 = _mm_sub_pi32(mm4, *(v2si*)_pi32_2);
mm5 = _mm_sub_pi32(mm5, *(v2si*)_pi32_2);
mm4 = _mm_andnot_si64(mm4, *(v2si*)_pi32_4);
mm5 = _mm_andnot_si64(mm5, *(v2si*)_pi32_4);
mm4 = _mm_slli_pi32(mm4, 29);
mm5 = _mm_slli_pi32(mm5, 29);
v4sf sign_bit_cos;
COPY_MM_TO_XMM(mm4, mm5, sign_bit_cos);
_mm_empty(); /* good-bye mmx */
sign_bit_sin = _mm_xor_ps(sign_bit_sin, swap_sign_bit_sin);
/* Evaluate the first polynom (0 <= x <= Pi/4) */
v4sf z = _mm_mul_ps(xx,xx);
y = *(v4sf*)_ps_coscof_p0;
y = _mm_mul_ps(y, z);
y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p1);
y = _mm_mul_ps(y, z);
y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p2);
y = _mm_mul_ps(y, z);
y = _mm_mul_ps(y, z);
v4sf tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
y = _mm_sub_ps(y, tmp);
y = _mm_add_ps(y, *(v4sf*)_ps_1);
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
v4sf y2 = *(v4sf*)_ps_sincof_p0;
y2 = _mm_mul_ps(y2, z);
y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
y2 = _mm_mul_ps(y2, z);
y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
y2 = _mm_mul_ps(y2, z);
y2 = _mm_mul_ps(y2, xx);
y2 = _mm_add_ps(y2, xx);
/* select the correct result from the two polynoms */
xmm3 = poly_mask;
v4sf ysin2 = _mm_and_ps(xmm3, y2);
v4sf ysin1 = _mm_andnot_ps(xmm3, y);
y2 = _mm_sub_ps(y2,ysin2);
y = _mm_sub_ps(y, ysin1);
xmm1 = _mm_add_ps(ysin1,ysin2);
xmm2 = _mm_add_ps(y,y2);
/* update the sign */
_mm_store_ss( s, _mm_xor_ps(xmm1, sign_bit_sin) );
_mm_store_ss( c, _mm_xor_ps(xmm2, sign_bit_cos) );
#else
#error "Not Implemented"
#endif
@ -462,9 +593,7 @@ void _SSE_SinCos(float x, float* s, float* c)
float _SSE_cos( float x )
{
#if defined ( _WIN64 )
return std::cos(x);
#elif defined( _WIN32 )
#ifdef _WIN32
float temp;
__asm
{
@ -513,8 +642,102 @@ float _SSE_cos( float x )
movss x, xmm0
}
#elif defined _LINUX || defined __APPLE__
// #warning "_SSE_cos NOT implemented!"
#elif POSIX
Assert( "Needs testing, verify impl!\n" );
v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, y;
v2si mm0, mm1, mm2, mm3;
/* take the absolute value */
v4sf xx = _mm_load_ss( &x );
xx = _mm_and_ps(xx, *(v4sf*)_ps_inv_sign_mask);
/* scale by 4/Pi */
y = _mm_mul_ps(xx, *(v4sf*)_ps_cephes_FOPI);
/* store the integer part of y in mm0:mm1 */
xmm2 = _mm_movehl_ps(xmm2, y);
mm2 = _mm_cvttps_pi32(y);
mm3 = _mm_cvttps_pi32(xmm2);
/* j=(j+1) & (~1) (see the cephes sources) */
mm2 = _mm_add_pi32(mm2, *(v2si*)_pi32_1);
mm3 = _mm_add_pi32(mm3, *(v2si*)_pi32_1);
mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_inv1);
mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_inv1);
y = _mm_cvtpi32x2_ps(mm2, mm3);
mm2 = _mm_sub_pi32(mm2, *(v2si*)_pi32_2);
mm3 = _mm_sub_pi32(mm3, *(v2si*)_pi32_2);
/* get the swap sign flag in mm0:mm1 and the
polynom selection mask in mm2:mm3 */
mm0 = _mm_andnot_si64(mm2, *(v2si*)_pi32_4);
mm1 = _mm_andnot_si64(mm3, *(v2si*)_pi32_4);
mm0 = _mm_slli_pi32(mm0, 29);
mm1 = _mm_slli_pi32(mm1, 29);
mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_2);
mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_2);
mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
v4sf sign_bit, poly_mask;
COPY_MM_TO_XMM(mm0, mm1, sign_bit);
COPY_MM_TO_XMM(mm2, mm3, poly_mask);
_mm_empty(); /* good-bye mmx */
/* The magic pass: "Extended precision modular arithmetic"
x = ((x - y * DP1) - y * DP2) - y * DP3; */
xmm1 = *(v4sf*)_ps_minus_cephes_DP1;
xmm2 = *(v4sf*)_ps_minus_cephes_DP2;
xmm3 = *(v4sf*)_ps_minus_cephes_DP3;
xmm1 = _mm_mul_ps(y, xmm1);
xmm2 = _mm_mul_ps(y, xmm2);
xmm3 = _mm_mul_ps(y, xmm3);
xx = _mm_add_ps(xx, xmm1);
xx = _mm_add_ps(xx, xmm2);
xx = _mm_add_ps(xx, xmm3);
/* Evaluate the first polynom (0 <= x <= Pi/4) */
y = *(v4sf*)_ps_coscof_p0;
v4sf z = _mm_mul_ps(xx,xx);
y = _mm_mul_ps(y, z);
y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p1);
y = _mm_mul_ps(y, z);
y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p2);
y = _mm_mul_ps(y, z);
y = _mm_mul_ps(y, z);
v4sf tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
y = _mm_sub_ps(y, tmp);
y = _mm_add_ps(y, *(v4sf*)_ps_1);
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
v4sf y2 = *(v4sf*)_ps_sincof_p0;
y2 = _mm_mul_ps(y2, z);
y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
y2 = _mm_mul_ps(y2, z);
y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
y2 = _mm_mul_ps(y2, z);
y2 = _mm_mul_ps(y2, xx);
y2 = _mm_add_ps(y2, xx);
/* select the correct result from the two polynoms */
xmm3 = poly_mask;
y2 = _mm_and_ps(xmm3, y2); //, xmm3);
y = _mm_andnot_ps(xmm3, y);
y = _mm_add_ps(y,y2);
/* update the sign */
_mm_store_ss( &x, _mm_xor_ps(y, sign_bit) );
#else
#error "Not Implemented"
#endif
@ -525,12 +748,10 @@ float _SSE_cos( float x )
//-----------------------------------------------------------------------------
// SSE2 implementations of optimized routines:
//-----------------------------------------------------------------------------
#ifdef PLATFORM_WINDOWS_PC32
void _SSE2_SinCos(float x, float* s, float* c) // any x
{
#if defined( _WIN64 )
*s = std::sin(x);
*c = std::cos(x);
#elif defined( _WIN32 )
#ifdef _WIN32
__asm
{
movss xmm0, x
@ -606,18 +827,19 @@ void _SSE2_SinCos(float x, float* s, float* c) // any x
movss [eax], xmm0
movss [edx], xmm6
}
#elif defined _LINUX || defined __APPLE__
// #warning "_SSE2_SinCos NOT implemented!"
#elif POSIX
#warning "_SSE2_SinCos NOT implemented!"
Assert( 0 );
#else
#error "Not Implemented"
#endif
}
#endif // PLATFORM_WINDOWS_PC32
#ifdef PLATFORM_WINDOWS_PC32
float _SSE2_cos(float x)
{
#if defined ( _WIN64 )
return std::cos(x);
#elif defined( _WIN32 )
#ifdef _WIN32
__asm
{
movss xmm0, x
@ -663,25 +885,25 @@ float _SSE2_cos(float x)
mulss xmm0, xmm1
movss x, xmm0
}
#elif defined _LINUX || defined __APPLE__
// #warning "_SSE2_cos NOT implemented!"
#elif POSIX
#warning "_SSE2_cos NOT implemented!"
Assert( 0 );
#else
#error "Not Implemented"
#endif
return x;
}
#endif // PLATFORM_WINDOWS_PC32
#if 0
// SSE Version of VectorTransform
void VectorTransformSSE(const float *in1, const matrix3x4_t& in2, float *out1)
{
Assert( s_bMathlibInitialized );
Assert( in1 != out1 );
#if defined ( _WIN64 )
out1[0] = DotProduct(in1, in2[0]) + in2[0][3];
out1[1] = DotProduct(in1, in2[1]) + in2[1][3];
out1[2] = DotProduct(in1, in2[2]) + in2[2][3];
#elif defined( _WIN32 )
#ifdef _WIN32
__asm
{
mov eax, in1;
@ -723,8 +945,8 @@ void VectorTransformSSE(const float *in1, const matrix3x4_t& in2, float *out1)
addss xmm0, [ecx+12]
movss [edx+8], xmm0;
}
#elif defined _LINUX || defined __APPLE__
// #warning "VectorTransformSSE C implementation only"
#elif POSIX
#warning "VectorTransformSSE C implementation only"
out1[0] = DotProduct(in1, in2[0]) + in2[0][3];
out1[1] = DotProduct(in1, in2[1]) + in2[1][3];
out1[2] = DotProduct(in1, in2[2]) + in2[2][3];
@ -732,16 +954,15 @@ void VectorTransformSSE(const float *in1, const matrix3x4_t& in2, float *out1)
#error "Not Implemented"
#endif
}
#endif
#if 0
void VectorRotateSSE( const float *in1, const matrix3x4_t& in2, float *out1 )
{
Assert( s_bMathlibInitialized );
Assert( in1 != out1 );
#if defined ( _WIN64 )
out1[0] = DotProduct( in1, in2[0] );
out1[1] = DotProduct( in1, in2[1] );
out1[2] = DotProduct( in1, in2[2] );
#elif defined( _WIN32 )
#ifdef _WIN32
__asm
{
mov eax, in1;
@ -780,8 +1001,8 @@ void VectorRotateSSE( const float *in1, const matrix3x4_t& in2, float *out1 )
addss xmm0, xmm2;
movss [edx+8], xmm0;
}
#elif defined _LINUX || defined __APPLE__
// #warning "VectorRotateSSE C implementation only"
#elif POSIX
#warning "VectorRotateSSE C implementation only"
out1[0] = DotProduct( in1, in2[0] );
out1[1] = DotProduct( in1, in2[1] );
out1[2] = DotProduct( in1, in2[2] );
@ -789,8 +1010,9 @@ void VectorRotateSSE( const float *in1, const matrix3x4_t& in2, float *out1 )
#error "Not Implemented"
#endif
}
#endif
#if defined( _WIN32 ) && !defined( _WIN64 )
#ifdef _WIN32
void _declspec(naked) _SSE_VectorMA( const float *start, float scale, const float *direction, float *dest )
{
// FIXME: This don't work!! It will overwrite memory in the write to dest
@ -821,7 +1043,7 @@ void _declspec(naked) _SSE_VectorMA( const float *start, float scale, const floa
}
#endif
#if defined( _WIN32 ) && !defined( _WIN64 )
#ifdef _WIN32
#ifdef PFN_VECTORMA
void _declspec(naked) __cdecl _SSE_VectorMA( const Vector &start, float scale, const Vector &direction, Vector &dest )
{
@ -886,4 +1108,6 @@ vec_t DotProduct (const vec_t *a, const vec_t *c)
ret
}
}
*/
*/
#endif // COMPILER_MSVC64

View File

@ -15,9 +15,13 @@ void FASTCALL _SSE_VectorNormalizeFast(Vector& vec);
float _SSE_InvRSquared(const float* v);
void _SSE_SinCos(float x, float* s, float* c);
float _SSE_cos( float x);
#ifdef PLATFORM_WINDOWS_PC32
void _SSE2_SinCos(float x, float* s, float* c);
float _SSE2_cos(float x);
#endif
#if 0
void VectorTransformSSE(const float *in1, const matrix3x4_t& in2, float *out1);
void VectorRotateSSE( const float *in1, const matrix3x4_t& in2, float *out1 );
#endif
#endif // _SSE_H

View File

@ -1,4 +1,4 @@
//===== Copyright © 1996-2005, Valve Corporation, All rights reserved. ======//
//===== Copyright <EFBFBD> 1996-2005, Valve Corporation, All rights reserved. ======//
//
// Purpose:
//
@ -30,24 +30,33 @@ const fltx4 Four_FLT_MAX={FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
const fltx4 Four_Negative_FLT_MAX={-FLT_MAX,-FLT_MAX,-FLT_MAX,-FLT_MAX};
const fltx4 g_SIMD_0123 = { 0., 1., 2., 3. };
const int32 ALIGN16 g_SIMD_clear_signmask[4]= {(int32)0x7fffffff,(int32)0x7fffffff,(int32)0x7fffffff,(int32)0x7fffffff};
const int32 ALIGN16 g_SIMD_signmask[4]= { (int32)0x80000000, (int32)0x80000000, (int32)0x80000000, (int32)0x80000000 };
const int32 ALIGN16 g_SIMD_lsbmask[4]= { (int32)0xfffffffe, (int32)0xfffffffe, (int32)0xfffffffe, (int32)0xfffffffe };
const int32 ALIGN16 g_SIMD_clear_wmask[4]= { (int32)0xffffffff, (int32)0xffffffff, (int32)0xffffffff, 0 };
const int32 ALIGN16 g_SIMD_AllOnesMask[4]= { (int32)0xffffffff, (int32)0xffffffff, (int32)0xffffffff, (int32)0xffffffff }; // ~0,~0,~0,~0
const int32 ALIGN16 g_SIMD_Low16BitsMask[4]= { (int32)0xffff, (int32)0xffff,(int32) 0xffff, (int32)0xffff }; // 0xffff x 4
const int32 ALIGN16 g_SIMD_ComponentMask[4][4] =
extern const fltx4 g_QuatMultRowSign[4];
const fltx4 g_QuatMultRowSign[4] =
{
{ (int32)0xFFFFFFFF, 0, 0, 0 }, { 0, (int32)0xFFFFFFFF, 0, 0 }, { 0, 0, (int32)0xFFFFFFFF, 0 }, { 0, 0, 0, (int32)0xFFFFFFFF }
{ 1.0f, 1.0f, -1.0f, 1.0f },
{ -1.0f, 1.0f, 1.0f, 1.0f },
{ 1.0f, -1.0f, 1.0f, 1.0f },
{ -1.0f, -1.0f, -1.0f, 1.0f }
};
const int32 ALIGN16 g_SIMD_SkipTailMask[4][4] =
const int32 ALIGN16 g_SIMD_clear_signmask[4] ALIGN16_POST = {static_cast<int32>(0x7fffffff), static_cast<int32>(0x7fffffff), static_cast<int32>(0x7fffffff), static_cast<int32>(0x7fffffff)};
const int32 ALIGN16 g_SIMD_signmask[4] ALIGN16_POST = { static_cast<int32>(0x80000000), static_cast<int32>(0x80000000), static_cast<int32>(0x80000000), static_cast<int32>(0x80000000) };
const int32 ALIGN16 g_SIMD_lsbmask[4] ALIGN16_POST = { static_cast<int32>(0xfffffffe), static_cast<int32>(0xfffffffe), static_cast<int32>(0xfffffffe), static_cast<int32>(0xfffffffe) };
const int32 ALIGN16 g_SIMD_clear_wmask[4] ALIGN16_POST = { static_cast<int32>(0xffffffff), static_cast<int32>(0xffffffff), static_cast<int32>(0xffffffff), 0 };
const int32 ALIGN16 g_SIMD_AllOnesMask[4] ALIGN16_POST = { static_cast<int32>(0xffffffff), static_cast<int32>(0xffffffff), static_cast<int32>(0xffffffff), static_cast<int32>(0xffffffff) }; // ~0,~0,~0,~0
const int32 ALIGN16 g_SIMD_Low16BitsMask[4] ALIGN16_POST = { 0xffff, 0xffff, 0xffff, 0xffff }; // 0xffff x 4
const int32 ALIGN16 g_SIMD_ComponentMask[4][4] ALIGN16_POST =
{
{ (int32)0xffffffff, (int32)0xffffffff, (int32)0xffffffff, (int32)0xffffffff },
{ (int32)0xffffffff, (int32)0x00000000, (int32)0x00000000, (int32)0x00000000 },
{ (int32)0xffffffff, (int32)0xffffffff, (int32)0x00000000, (int32)0x00000000 },
{ (int32)0xffffffff, (int32)0xffffffff, (int32)0xffffffff, (int32)0x00000000 },
{ static_cast<int32>(0xFFFFFFFF), 0, 0, 0 }, { 0, static_cast<int32>(0xFFFFFFFF), 0, 0 }, { 0, 0, static_cast<int32>(0xFFFFFFFF), 0 }, { 0, 0, 0, static_cast<int32>(0xFFFFFFFF) }
};
const int32 ALIGN16 g_SIMD_SkipTailMask[4][4] ALIGN16_POST =
{
{ static_cast<int32>(0xffffffff), static_cast<int32>(0xffffffff), static_cast<int32>(0xffffffff), static_cast<int32>(0xffffffff) },
{ static_cast<int32>(0xffffffff), static_cast<int32>(0x00000000), static_cast<int32>(0x00000000), static_cast<int32>(0x00000000) },
{ static_cast<int32>(0xffffffff), static_cast<int32>(0xffffffff), static_cast<int32>(0x00000000), static_cast<int32>(0x00000000) },
{ static_cast<int32>(0xffffffff), static_cast<int32>(0xffffffff), static_cast<int32>(0xffffffff), static_cast<int32>(0x00000000) },
};

View File

@ -30,6 +30,10 @@ static ALIGN16 int32 idx_mask[4]= {0xffff, 0xffff, 0xffff, 0xffff};
// returns 0..1
static inline float GetLatticePointValue( int idx_x, int idx_y, int idx_z )
{
NOTE_UNUSED(perm_d);
NOTE_UNUSED(impulse_ycoords);
NOTE_UNUSED(impulse_zcoords);
int ret_idx = perm_a[idx_x & 0xff];
ret_idx = perm_b[( idx_y + ret_idx ) & 0xff];
ret_idx = perm_c[( idx_z + ret_idx ) & 0xff];

View File

@ -306,7 +306,7 @@ bool MatrixInverseGeneral(const VMatrix& src, VMatrix& dst)
for(iRow=0; iRow < 4; iRow++)
{
// Find the row with the largest element in this column.
fLargest = 0.001f;
fLargest = 0.00001f;
iLargest = -1;
for(iTest=iRow; iTest < 4; iTest++)
{
@ -509,7 +509,7 @@ bool VMatrix::IsRotationMatrix() const
FloatMakePositive( v2.Dot(v3) ) < 0.01f;
}
void VMatrix::SetupMatrixOrgAngles( const Vector &origin, const QAngle &vAngles )
static void SetupMatrixAnglesInternal( vec_t m[4][4], const QAngle & vAngles )
{
float sr, sp, sy, cr, cp, cy;
@ -530,6 +530,11 @@ void VMatrix::SetupMatrixOrgAngles( const Vector &origin, const QAngle &vAngles
m[0][3] = 0.f;
m[1][3] = 0.f;
m[2][3] = 0.f;
}
void VMatrix::SetupMatrixOrgAngles( const Vector &origin, const QAngle &vAngles )
{
SetupMatrixAnglesInternal( m, vAngles );
// Add translation
m[0][3] = origin.x;
@ -542,6 +547,21 @@ void VMatrix::SetupMatrixOrgAngles( const Vector &origin, const QAngle &vAngles
}
void VMatrix::SetupMatrixAngles( const QAngle &vAngles )
{
SetupMatrixAnglesInternal( m, vAngles );
// Zero everything else
m[0][3] = 0.0f;
m[1][3] = 0.0f;
m[2][3] = 0.0f;
m[3][0] = 0.0f;
m[3][1] = 0.0f;
m[3][2] = 0.0f;
m[3][3] = 1.0f;
}
//-----------------------------------------------------------------------------
// Sets matrix to identity
//-----------------------------------------------------------------------------
@ -728,7 +748,7 @@ void Vector4DMultiplyPosition( const VMatrix& src1, Vector const& src2, Vector4D
{
// Make sure it works if src2 == dst
Vector tmp;
Vector const&v = ( &src2 == &dst.AsVector3D() ) ? tmp : src2;
Vector const&v = ( &src2 == &dst.AsVector3D() ) ? static_cast<const Vector&>(tmp) : src2;
if (&src2 == &dst.AsVector3D())
{
@ -751,7 +771,7 @@ void Vector3DMultiply( const VMatrix &src1, const Vector &src2, Vector &dst )
{
// Make sure it works if src2 == dst
Vector tmp;
const Vector &v = (&src2 == &dst) ? tmp : src2;
const Vector &v = (&src2 == &dst) ? static_cast<const Vector&>(tmp) : src2;
if( &src2 == &dst )
{
@ -772,7 +792,7 @@ void Vector3DMultiplyPositionProjective( const VMatrix& src1, const Vector &src2
{
// Make sure it works if src2 == dst
Vector tmp;
const Vector &v = (&src2 == &dst) ? tmp: src2;
const Vector &v = (&src2 == &dst) ? static_cast<const Vector&>(tmp): src2;
if( &src2 == &dst )
{
VectorCopy( src2, tmp );
@ -799,7 +819,7 @@ void Vector3DMultiplyProjective( const VMatrix& src1, const Vector &src2, Vector
{
// Make sure it works if src2 == dst
Vector tmp;
const Vector &v = (&src2 == &dst) ? tmp : src2;
const Vector &v = (&src2 == &dst) ? static_cast<const Vector&>(tmp) : src2;
if( &src2 == &dst )
{
VectorCopy( src2, tmp );
@ -852,7 +872,7 @@ void Vector3DMultiplyTranspose( const VMatrix& src1, const Vector& src2, Vector&
bool srcEqualsDst = (&src2 == &dst);
Vector tmp;
const Vector&v = srcEqualsDst ? tmp : src2;
const Vector&v = srcEqualsDst ? static_cast<const Vector&>(tmp) : src2;
if (srcEqualsDst)
{
@ -937,7 +957,7 @@ void MatrixBuildTranslation( VMatrix& dst, const Vector &translation )
//-----------------------------------------------------------------------------
void MatrixBuildRotationAboutAxis( VMatrix &dst, const Vector &vAxisOfRot, float angleDegrees )
{
MatrixBuildRotationAboutAxis( vAxisOfRot, angleDegrees, dst.As3x4() );
MatrixBuildRotationAboutAxis( vAxisOfRot, angleDegrees, const_cast< matrix3x4_t &> ( dst.As3x4() ) );
dst[3][0] = 0;
dst[3][1] = 0;
dst[3][2] = 0;
@ -1233,19 +1253,29 @@ void MatrixBuildOrtho( VMatrix& dst, double left, double top, double right, doub
0.0f, 0.0f, 0.0f, 1.0f );
}
void MatrixBuildPerspectiveZRange( VMatrix& dst, double flZNear, double flZFar )
{
dst.m[2][0] = 0.0f;
dst.m[2][1] = 0.0f;
dst.m[2][2] = flZFar / ( flZNear - flZFar );
dst.m[2][3] = flZNear * flZFar / ( flZNear - flZFar );
}
void MatrixBuildPerspectiveX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar )
{
float flWidth = 2.0f * flZNear * tanf( flFovX * M_PI / 360.0f );
float flHeight = flWidth / flAspect;
dst.Init( 2.0f * flZNear / flWidth, 0.0f, 0.0f, 0.0f,
0.0f, 2.0f * flZNear/ flHeight, 0.0f, 0.0f,
0.0f, 0.0f, flZFar / ( flZNear - flZFar ), flZNear * flZFar / ( flZNear - flZFar ),
float flWidthScale = 1.0f / tanf( flFovX * M_PI / 360.0f );
float flHeightScale = flAspect * flWidthScale;
dst.Init( flWidthScale, 0.0f, 0.0f, 0.0f,
0.0f, flHeightScale, 0.0f, 0.0f,
0.0f, 0.0f, 0.0f, 0.0f,
0.0f, 0.0f, -1.0f, 0.0f );
MatrixBuildPerspectiveZRange ( dst, flZNear, flZFar );
}
void MatrixBuildPerspectiveOffCenterX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar, double bottom, double top, double left, double right )
{
float flWidth = 2.0f * flZNear * tanf( flFovX * M_PI / 360.0f );
float flWidth = tanf( flFovX * M_PI / 360.0f );
float flHeight = flWidth / flAspect;
// bottom, top, left, right are 0..1 so convert to -<val>/2..<val>/2
@ -1254,10 +1284,12 @@ void MatrixBuildPerspectiveOffCenterX( VMatrix& dst, double flFovX, double flAsp
float flBottom = -(flHeight/2.0f) * (1.0f - bottom) + bottom * (flHeight/2.0f);
float flTop = -(flHeight/2.0f) * (1.0f - top) + top * (flHeight/2.0f);
dst.Init( (2.0f * flZNear) / (flRight-flLeft), 0.0f, (flLeft+flRight)/(flRight-flLeft), 0.0f,
0.0f, 2.0f*flZNear/(flTop-flBottom), (flTop+flBottom)/(flTop-flBottom), 0.0f,
0.0f, 0.0f, flZFar/(flZNear-flZFar), flZNear*flZFar/(flZNear-flZFar),
0.0f, 0.0f, -1.0f, 0.0f );
dst.Init( 1.0f / (flRight-flLeft), 0.0f, (flLeft+flRight)/(flRight-flLeft), 0.0f,
0.0f, 1.0f /(flTop-flBottom), (flTop+flBottom)/(flTop-flBottom), 0.0f,
0.0f, 0.0f, 0.0f, 0.0f,
0.0f, 0.0f, -1.0f, 0.0f );
MatrixBuildPerspectiveZRange ( dst, flZNear, flZFar );
}
#endif // !_STATIC_LINKED || _SHARED_LIB

1190
public/mathlib/amd3dx.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -58,8 +58,8 @@ inline Vector32& Vector32::operator=(const Vector &vOther)
static float expScale[4] = { 4.0f, 16.0f, 32.f, 64.f };
float fmax = MAX( fabs( vOther.x ), fabs( vOther.y ) );
fmax = fpmax( fmax, fabs( vOther.z ) );
float fmax = Max( fabs( vOther.x ), fabs( vOther.y ) );
fmax = Max( fmax, (float)fabs( vOther.z ) );
for (exp = 0; exp < 3; exp++)
{
@ -70,9 +70,9 @@ inline Vector32& Vector32::operator=(const Vector &vOther)
float fexp = 512.0f / expScale[exp];
x = clamp( (int)(vOther.x * fexp) + 512, 0, 1023 );
y = clamp( (int)(vOther.y * fexp) + 512, 0, 1023 );
z = clamp( (int)(vOther.z * fexp) + 512, 0, 1023 );
x = Clamp( (int)(vOther.x * fexp) + 512, 0, 1023 );
y = Clamp( (int)(vOther.y * fexp) + 512, 0, 1023 );
z = Clamp( (int)(vOther.z * fexp) + 512, 0, 1023 );
return *this;
}
@ -118,8 +118,8 @@ inline Normal32& Normal32::operator=(const Vector &vOther)
{
CHECK_VALID(vOther);
x = clamp( (int)(vOther.x * 16384) + 16384, 0, 32767 );
y = clamp( (int)(vOther.y * 16384) + 16384, 0, 32767 );
x = Clamp( (int)(vOther.x * 16384) + 16384, 0, 32767 );
y = Clamp( (int)(vOther.y * 16384) + 16384, 0, 32767 );
zneg = (vOther.z < 0);
//x = vOther.x;
//y = vOther.y;
@ -182,9 +182,9 @@ inline Quaternion64& Quaternion64::operator=(const Quaternion &vOther)
{
CHECK_VALID(vOther);
x = clamp( (int)(vOther.x * 1048576) + 1048576, 0, 2097151 );
y = clamp( (int)(vOther.y * 1048576) + 1048576, 0, 2097151 );
z = clamp( (int)(vOther.z * 1048576) + 1048576, 0, 2097151 );
x = Clamp( (int)(vOther.x * 1048576) + 1048576, 0, 2097151 );
y = Clamp( (int)(vOther.y * 1048576) + 1048576, 0, 2097151 );
z = Clamp( (int)(vOther.z * 1048576) + 1048576, 0, 2097151 );
wneg = (vOther.w < 0);
return *this;
}
@ -229,9 +229,9 @@ inline Quaternion48& Quaternion48::operator=(const Quaternion &vOther)
{
CHECK_VALID(vOther);
x = clamp( (int)(vOther.x * 32768) + 32768, 0, 65535 );
y = clamp( (int)(vOther.y * 32768) + 32768, 0, 65535 );
z = clamp( (int)(vOther.z * 16384) + 16384, 0, 32767 );
x = Clamp( (int)(vOther.x * 32768) + 32768, 0, 65535 );
y = Clamp( (int)(vOther.y * 32768) + 32768, 0, 65535 );
z = Clamp( (int)(vOther.z * 16384) + 16384, 0, 32767 );
wneg = (vOther.w < 0);
return *this;
}
@ -276,9 +276,9 @@ inline Quaternion32& Quaternion32::operator=(const Quaternion &vOther)
{
CHECK_VALID(vOther);
x = clamp( (int)(vOther.x * 1024) + 1024, 0, 2047 );
y = clamp( (int)(vOther.y * 512) + 512, 0, 1023 );
z = clamp( (int)(vOther.z * 512) + 512, 0, 1023 );
x = Clamp( (int)(vOther.x * 1024) + 1024, 0, 2047 );
y = Clamp( (int)(vOther.y * 512) + 512, 0, 1023 );
z = Clamp( (int)(vOther.z * 512) + 512, 0, 1023 );
wneg = (vOther.w < 0);
return *this;
}

View File

@ -28,6 +28,7 @@ enum LightType_OptimizationFlags_t
LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION0 = 1,
LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION1 = 2,
LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION2 = 4,
LIGHTTYPE_OPTIMIZATIONFLAGS_DERIVED_VALUES_CALCED = 8,
};
struct LightDesc_t
@ -102,6 +103,11 @@ public:
{
return ((m_Type!=MATERIAL_LIGHT_SPOT) || (rdir.Dot(m_Direction)>=m_PhiDot));
}
float OneOverThetaDotMinusPhiDot() const
{
return OneOver_ThetaDot_Minus_PhiDot;
}
};

View File

@ -28,6 +28,14 @@ extern float (*pfFastCos)(float x);
#define FastSinCos(x,s,c) (*pfFastSinCos)(x,s,c)
#define FastCos(x) (*pfFastCos)(x)
#if defined(__i386__) || defined(_M_IX86)
// On x86, the inline FPU or SSE sqrt instruction is faster than
// the overhead of setting up a function call and saving/restoring
// the FPU or SSE register state and can be scheduled better, too.
#undef FastSqrt
#define FastSqrt(x) ::sqrtf(x)
#endif
#endif // !_X360
#if defined( _X360 )

View File

@ -16,6 +16,61 @@
#include "mathlib/math_pfns.h"
#if defined(__i386__) || defined(_M_IX86)
// For MMX intrinsics
#include <xmmintrin.h>
#endif
// XXX remove me
#undef clamp
#ifdef DEBUG // stop crashing edit-and-continue
FORCEINLINE float clamp( float val, float minVal, float maxVal )
{
if ( maxVal < minVal )
return maxVal;
else if( val < minVal )
return minVal;
else if( val > maxVal )
return maxVal;
else
return val;
}
#else // DEBUG
FORCEINLINE float clamp( float val, float minVal, float maxVal )
{
#if defined(__i386__) || defined(_M_IX86)
_mm_store_ss( &val,
_mm_min_ss(
_mm_max_ss(
_mm_load_ss(&val),
_mm_load_ss(&minVal) ),
_mm_load_ss(&maxVal) ) );
#else
val = fpmax(minVal, val);
val = fpmin(maxVal, val);
#endif
return val;
}
#endif // DEBUG
//
// Returns a clamped value in the range [min, max].
//
template< class T >
inline T clamp( T const &val, T const &minVal, T const &maxVal )
{
if ( maxVal < minVal )
return maxVal;
else if( val < minVal )
return minVal;
else if( val > maxVal )
return maxVal;
else
return val;
}
// plane_t structure
// !!! if this is changed, it must be changed in asm code too !!!
// FIXME: does the asm code even exist anymore?
@ -225,12 +280,12 @@ FORCEINLINE void VectorClear(vec_t *a)
FORCEINLINE float VectorMaximum(const vec_t *v)
{
return MAX( v[0], MAX( v[1], v[2] ) );
return V_max( v[0], V_max( v[1], v[2] ) );
}
FORCEINLINE float VectorMaximum(const Vector& v)
{
return MAX( v.x, MAX( v.y, v.z ) );
return V_max( v.x, V_max( v.y, v.z ) );
}
FORCEINLINE void VectorScale (const float* in, vec_t scale, float* out)
@ -255,7 +310,7 @@ inline void VectorNegate(vec_t *a)
}
//#define VectorMaximum(a) ( MAX( (a)[0], MAX( (a)[1], (a)[2] ) ) )
//#define VectorMaximum(a) ( V_max( (a)[0], V_max( (a)[1], (a)[2] ) ) )
#define Vector2Clear(x) {(x)[0]=(x)[1]=0;}
#define Vector2Negate(x) {(x)[0]=-((x)[0]);(x)[1]=-((x)[1]);}
#define Vector2Copy(a,b) {(b)[0]=(a)[0];(b)[1]=(a)[1];}
@ -282,10 +337,10 @@ FORCEINLINE void VectorMAInline( const Vector& start, float scale, const Vector&
dest.z=start.z+direction.z*scale;
}
//FORCEINLINE void VectorMA( const Vector& start, float scale, const Vector& direction, Vector& dest )
//{
// VectorMAInline(start, scale, direction, dest);
//}
FORCEINLINE void VectorMA( const Vector& start, float scale, const Vector& direction, Vector& dest )
{
VectorMAInline(start, scale, direction, dest);
}
FORCEINLINE void VectorMA( const float * start, float scale, const float *direction, float *dest )
{
@ -314,12 +369,9 @@ int Q_log2(int val);
// Math routines done in optimized assembly math package routines
void inline SinCos( float radians, float *sine, float *cosine )
{
#if defined( _WIN64 )
*sine = sinf(radians);
*cosine = cosf(radians);
#elif defined( _X360 )
#if defined( _X360 )
XMScalarSinCos( sine, cosine, radians );
#elif defined( _WIN32 )
#elif defined( PLATFORM_WINDOWS_PC32 )
_asm
{
fld DWORD PTR [radians]
@ -331,11 +383,12 @@ void inline SinCos( float radians, float *sine, float *cosine )
fstp DWORD PTR [edx]
fstp DWORD PTR [eax]
}
#elif defined( _LINUX ) || defined ( __APPLE__ )
#elif defined( PLATFORM_WINDOWS_PC64 )
*sine = sin( radians );
*cosine = cos( radians );
#elif defined( POSIX )
double __cosr, __sinr;
__asm __volatile__
("fsincos"
: "=t" (__cosr), "=u" (__sinr) : "0" (radians));
__asm ("fsincos" : "=t" (__cosr), "=u" (__sinr) : "0" (radians));
*sine = __sinr;
*cosine = __cosr;
@ -379,11 +432,6 @@ FORCEINLINE T Square( T const &a )
}
FORCEINLINE bool IsPowerOfTwo( uint x )
{
return ( x & ( x - 1 ) ) == 0;
}
// return the smallest power of two >= x.
// returns 0 if x == 0 or x > 0x80000000 (ie numbers that would be negative if x was signed)
// NOTE: the old code took an int, and if you pass in an int of 0x80000000 casted to a uint,
@ -450,6 +498,19 @@ bool MatricesAreEqual( const matrix3x4_t &src1, const matrix3x4_t &src2, float f
void MatrixGetColumn( const matrix3x4_t &in, int column, Vector &out );
void MatrixSetColumn( const Vector &in, int column, matrix3x4_t &out );
inline void MatrixGetTranslation( const matrix3x4_t &in, Vector &out )
{
MatrixGetColumn ( in, 3, out );
}
inline void MatrixSetTranslation( const Vector &in, matrix3x4_t &out )
{
MatrixSetColumn ( in, 3, out );
}
void MatrixScaleBy ( const float flScale, matrix3x4_t &out );
void MatrixScaleByZero ( matrix3x4_t &out );
//void DecomposeRotation( const matrix3x4_t &mat, float *out );
void ConcatRotations (const matrix3x4_t &in1, const matrix3x4_t &in2, matrix3x4_t &out);
void ConcatTransforms (const matrix3x4_t &in1, const matrix3x4_t &in2, matrix3x4_t &out);
@ -625,15 +686,11 @@ template <class T> FORCEINLINE T AVG(T a, T b)
}
// number of elements in an array of static size
#define NELEMS(x) ((sizeof(x))/sizeof(x[0]))
#define NELEMS(x) ARRAYSIZE(x)
// XYZ macro, for printf type functions - ex printf("%f %f %f",XYZ(myvector));
#define XYZ(v) (v).x,(v).y,(v).z
//
// Returns a clamped value in the range [min, max].
//
#define V_clamp(val, min, max) (((val) > (max)) ? (max) : (((val) < (min)) ? (min) : (val)))
inline float Sign( float x )
{
@ -1070,14 +1127,14 @@ inline float SimpleSplineRemapValClamped( float val, float A, float B, float C,
if ( A == B )
return val >= B ? D : C;
float cVal = (val - A) / (B - A);
cVal = V_clamp( cVal, 0.0f, 1.0f );
cVal = clamp( cVal, 0.0f, 1.0f );
return C + (D - C) * SimpleSpline( cVal );
}
FORCEINLINE int RoundFloatToInt(float f)
{
#if defined( _WIN64 )
return std::round(f);
#if defined(__i386__) || defined(_M_IX86) || defined( PLATFORM_WINDOWS_PC64 ) || defined(__x86_64__)
return _mm_cvtss_si32(_mm_load_ss(&f));
#elif defined( _X360 )
#ifdef Assert
Assert( IsFPUControlWordSet() );
@ -1089,72 +1146,23 @@ FORCEINLINE int RoundFloatToInt(float f)
};
flResult = __fctiw( f );
return pResult[1];
#else // !X360
int nResult;
#if defined( _WIN32 )
__asm
{
fld f
fistp nResult
}
#elif defined( _LINUX ) || defined( __APPLE__ )
__asm __volatile__ (
"fistpl %0;": "=m" (nResult): "t" (f) : "st"
);
#endif
return nResult;
#else
#error Unknown architecture
#endif
}
FORCEINLINE unsigned char RoundFloatToByte(float f)
{
#if defined( _WIN64 )
return std::round(f);
#elif defined( _X360 )
int nResult = RoundFloatToInt(f);
#ifdef Assert
Assert( IsFPUControlWordSet() );
#endif
union
{
double flResult;
int pIntResult[2];
unsigned char pResult[8];
};
flResult = __fctiw( f );
#ifdef Assert
Assert( pIntResult[1] >= 0 && pIntResult[1] <= 255 );
#endif
return pResult[8];
#else // !X360
int nResult;
#if defined( _WIN32 )
__asm
{
fld f
fistp nResult
}
#elif defined( _LINUX ) || defined( __APPLE__ )
__asm __volatile__ (
"fistpl %0;": "=m" (nResult): "t" (f) : "st"
);
#endif
#ifdef Assert
Assert( nResult >= 0 && nResult <= 255 );
#endif
return nResult;
Assert( (nResult & ~0xFF) == 0 );
#endif
return (unsigned char) nResult;
}
FORCEINLINE uint32_t RoundFloatToUnsignedLong(float f)
FORCEINLINE unsigned long RoundFloatToUnsignedLong(float f)
{
#if defined( _WIN64 )
return std::round(f);
#elif defined( _X360 )
#if defined( _X360 )
#ifdef Assert
Assert( IsFPUControlWordSet() );
#endif
@ -1162,29 +1170,48 @@ FORCEINLINE uint32_t RoundFloatToUnsignedLong(float f)
{
double flResult;
int pIntResult[2];
uint32_t pResult[2];
unsigned long pResult[2];
};
flResult = __fctiw( f );
Assert( pIntResult[1] >= 0 );
return pResult[1];
#else // !X360
#if defined( PLATFORM_WINDOWS_PC64 )
uint nRet = ( uint ) f;
if ( nRet & 1 )
{
if ( ( f - floor( f ) >= 0.5 ) )
{
nRet++;
}
}
else
{
if ( ( f - floor( f ) > 0.5 ) )
{
nRet++;
}
}
return nRet;
#else // PLATFORM_WINDOWS_PC64
unsigned char nResult[8];
#if defined( _WIN32 )
__asm
{
fld f
fistp qword ptr nResult
}
#elif defined( _LINUX ) || defined( __APPLE__ )
__asm __volatile__ (
"fistpl %0;": "=m" (nResult): "t" (f) : "st"
);
#endif
#if defined( _WIN32 )
__asm
{
fld f
fistp qword ptr nResult
}
#elif POSIX
__asm __volatile__ (
"fistpl %0;": "=m" (nResult): "t" (f) : "st"
);
#endif
return *((uint32_t*)nResult);
#endif
return *((unsigned long*)nResult);
#endif // PLATFORM_WINDOWS_PC64
#endif // !X360
}
FORCEINLINE bool IsIntegralValue( float flValue, float flTolerance = 0.001f )
@ -1195,9 +1222,7 @@ FORCEINLINE bool IsIntegralValue( float flValue, float flTolerance = 0.001f )
// Fast, accurate ftol:
FORCEINLINE int Float2Int( float a )
{
#if defined ( _WIN64 )
return a;
#elif defined( _X360 )
#if defined( _X360 )
union
{
double flResult;
@ -1206,78 +1231,54 @@ FORCEINLINE int Float2Int( float a )
flResult = __fctiwz( a );
return pResult[1];
#else // !X360
int RetVal;
#if defined( _WIN32 )
int CtrlwdHolder;
int CtrlwdSetter;
__asm
{
fld a // push 'a' onto the FP stack
fnstcw CtrlwdHolder // store FPU control word
movzx eax, CtrlwdHolder // move and zero extend word into eax
and eax, 0xFFFFF3FF // set all bits except rounding bits to 1
or eax, 0x00000C00 // set rounding mode bits to round towards zero
mov CtrlwdSetter, eax // Prepare to set the rounding mode -- prepare to enter plaid!
fldcw CtrlwdSetter // Entering plaid!
fistp RetVal // Store and converted (to int) result
fldcw CtrlwdHolder // Restore control word
}
#elif defined( _LINUX ) || defined ( __APPLE__ )
RetVal = static_cast<int>( a );
#endif
return RetVal;
// Rely on compiler to generate CVTTSS2SI on x86
return (int) a;
#endif
}
// Over 15x faster than: (int)floor(value)
inline int Floor2Int( float a )
{
#if defined ( _WIN64 )
return std::floor(a);
#else
int RetVal;
#if defined( _X360 )
RetVal = (int)floor( a );
#elif defined( _WIN32 )
int CtrlwdHolder;
int CtrlwdSetter;
__asm
{
fld a // push 'a' onto the FP stack
fnstcw CtrlwdHolder // store FPU control word
movzx eax, CtrlwdHolder // move and zero extend word into eax
and eax, 0xFFFFF3FF // set all bits except rounding bits to 1
or eax, 0x00000400 // set rounding mode bits to round down
mov CtrlwdSetter, eax // Prepare to set the rounding mode -- prepare to enter plaid!
fldcw CtrlwdSetter // Entering plaid!
fistp RetVal // Store floored and converted (to int) result
fldcw CtrlwdHolder // Restore control word
}
#elif defined( _LINUX ) || defined( __APPLE__ )
#if defined( __i386__ )
// Convert to int and back, compare, subtract one if too big
__m128 a128 = _mm_set_ss(a);
RetVal = _mm_cvtss_si32(a128);
__m128 rounded128 = _mm_cvt_si2ss(_mm_setzero_ps(), RetVal);
RetVal -= _mm_comigt_ss( rounded128, a128 );
#else
RetVal = static_cast<int>( floor(a) );
#endif
return RetVal;
#endif // _WIN64
}
//-----------------------------------------------------------------------------
// Fast color conversion from float to unsigned char
//-----------------------------------------------------------------------------
FORCEINLINE unsigned char FastFToC( float c )
FORCEINLINE unsigned int FastFToC( float c )
{
volatile float dc;
// ieee trick
dc = c * 255.0f + (float)(1 << 23);
// return the lsb
#if defined( _X360 )
return ((unsigned char*)&dc)[3];
#if defined( __i386__ )
// IEEE float bit manipulation works for values between [0, 1<<23)
union { float f; int i; } convert = { c*255.0f + (float)(1<<23) };
return convert.i & 255;
#else
return *(unsigned char*)&dc;
// consoles CPUs suffer from load-hit-store penalty
return Float2Int( c * 255.0f );
#endif
}
//-----------------------------------------------------------------------------
// Fast conversion from float to integer with magnitude less than 2**22
//-----------------------------------------------------------------------------
FORCEINLINE int FastFloatToSmallInt( float c )
{
#if defined( __i386__ )
// IEEE float bit manipulation works for values between [-1<<22, 1<<22)
union { float f; int i; } convert = { c + (float)(3<<22) };
return (convert.i & ((1<<23)-1)) - (1<<22);
#else
// consoles CPUs suffer from load-hit-store penalty
return Float2Int( c );
#endif
}
@ -1289,39 +1290,23 @@ FORCEINLINE unsigned char FastFToC( float c )
inline float ClampToMsec( float in )
{
int msec = Floor2Int( in * 1000.0f + 0.5f );
return msec / 1000.0f;
return 0.001f * msec;
}
// Over 15x faster than: (int)ceil(value)
inline int Ceil2Int( float a )
{
#if defined ( _WIN64 )
return std::ceil(a);
#else
int RetVal;
#if defined( _X360 )
RetVal = (int)ceil( a );
#elif defined( _WIN32 )
int CtrlwdHolder;
int CtrlwdSetter;
__asm
{
fld a // push 'a' onto the FP stack
fnstcw CtrlwdHolder // store FPU control word
movzx eax, CtrlwdHolder // move and zero extend word into eax
and eax, 0xFFFFF3FF // set all bits except rounding bits to 1
or eax, 0x00000800 // set rounding mode bits to round down
mov CtrlwdSetter, eax // Prepare to set the rounding mode -- prepare to enter plaid!
fldcw CtrlwdSetter // Entering plaid!
fistp RetVal // Store floored and converted (to int) result
fldcw CtrlwdHolder // Restore control word
}
#elif defined( _LINUX ) || defined( __APPLE__ )
RetVal = static_cast<int>( ceil(a) );
#if defined( __i386__ )
// Convert to int and back, compare, add one if too small
__m128 a128 = _mm_load_ss(&a);
RetVal = _mm_cvtss_si32(a128);
__m128 rounded128 = _mm_cvt_si2ss(_mm_setzero_ps(), RetVal);
RetVal += _mm_comilt_ss( rounded128, a128 );
#else
RetVal = static_cast<int>( ceil(a) );
#endif
return RetVal;
#endif // _WIN64
}
@ -1436,7 +1421,7 @@ FORCEINLINE unsigned char LinearToLightmap( float f )
FORCEINLINE void ColorClamp( Vector& color )
{
float maxc = MAX( color.x, MAX( color.y, color.z ) );
float maxc = V_max( color.x, V_max( color.y, color.z ) );
if ( maxc > 1.0f )
{
float ooMax = 1.0f / maxc;
@ -1565,7 +1550,7 @@ float Hermite_Spline(
float t );
void Hermite_SplineBasis( float t, float basis[4] );
void Hermite_SplineBasis( float t, float basis[] );
void Hermite_Spline(
const Quaternion &q0,
@ -1932,10 +1917,10 @@ FORCEINLINE unsigned int * PackNormal_SHORT2( float nx, float ny, float nz, unsi
ny *= 16384.0f;
// '0' and '32768' values are invalid encodings
nx = MAX( nx, 1.0f ); // Make sure there are no zero values
ny = MAX( ny, 1.0f );
nx = MIN( nx, 32767.0f ); // Make sure there are no 32768 values
ny = MIN( ny, 32767.0f );
nx = V_max( nx, 1.0f ); // Make sure there are no zero values
ny = V_max( ny, 1.0f );
nx = V_min( nx, 32767.0f ); // Make sure there are no 32768 values
ny = V_min( ny, 32767.0f );
if ( nz < 0.0f )
nx = -nx; // Set the sign bit for z
@ -2085,6 +2070,46 @@ void RGBtoHSV( const Vector &rgb, Vector &hsv );
void HSVtoRGB( const Vector &hsv, Vector &rgb );
//-----------------------------------------------------------------------------
// Fast version of pow and log
//-----------------------------------------------------------------------------
float FastLog2(float i); // log2( i )
float FastPow2(float i); // 2^i
float FastPow(float a, float b); // a^b
float FastPow10( float i ); // 10^i
//-----------------------------------------------------------------------------
// For testing float equality
//-----------------------------------------------------------------------------
inline bool CloseEnough( float a, float b, float epsilon = EQUAL_EPSILON )
{
return fabs( a - b ) <= epsilon;
}
inline bool CloseEnough( const Vector &a, const Vector &b, float epsilon = EQUAL_EPSILON )
{
return fabs( a.x - b.x ) <= epsilon &&
fabs( a.y - b.y ) <= epsilon &&
fabs( a.z - b.z ) <= epsilon;
}
// Fast compare
// maxUlps is the maximum error in terms of Units in the Last Place. This
// specifies how big an error we are willing to accept in terms of the value
// of the least significant digit of the floating point numbers
// representation. maxUlps can also be interpreted in terms of how many
// representable floats we are willing to accept between A and B.
// This function will allow maxUlps-1 floats between A and B.
bool AlmostEqual(float a, float b, int maxUlps = 10);
inline bool AlmostEqual( const Vector &a, const Vector &b, int maxUlps = 10)
{
return AlmostEqual( a.x, b.x, maxUlps ) &&
AlmostEqual( a.y, b.y, maxUlps ) &&
AlmostEqual( a.z, b.z, maxUlps );
}
#endif // MATH_BASE_H

385
public/mathlib/matrixmath.h Normal file
View File

@ -0,0 +1,385 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose:
//
// A set of generic, template-based matrix functions.
//===========================================================================//
#ifndef MATRIXMATH_H
#define MATRIXMATH_H
#include <stdarg.h>
// The operations in this file can perform basic matrix operations on matrices represented
// using any class that supports the necessary operations:
//
// .Element( row, col ) - return the element at a given matrox position
// .SetElement( row, col, val ) - modify an element
// .Width(), .Height() - get dimensions
// .SetDimensions( nrows, ncols) - set a matrix to be un-initted and the appropriate size
//
// Generally, vectors can be used with these functions by using N x 1 matrices to represent them.
// Matrices are addressed as row, column, and indices are 0-based
//
//
// Note that the template versions of these routines are defined for generality - it is expected
// that template specialization is used for common high performance cases.
namespace MatrixMath
{
/// M *= flScaleValue
template<class MATRIXCLASS>
void ScaleMatrix( MATRIXCLASS &matrix, float flScaleValue )
{
for( int i = 0; i < matrix.Height(); i++ )
{
for( int j = 0; j < matrix.Width(); j++ )
{
matrix.SetElement( i, j, flScaleValue * matrix.Element( i, j ) );
}
}
}
/// AppendElementToMatrix - same as setting the element, except only works when all calls
/// happen in top to bottom left to right order, end you have to call FinishedAppending when
/// done. For normal matrix classes this is not different then SetElement, but for
/// CSparseMatrix, it is an accelerated way to fill a matrix from scratch.
template<class MATRIXCLASS>
FORCEINLINE void AppendElement( MATRIXCLASS &matrix, int nRow, int nCol, float flValue )
{
matrix.SetElement( nRow, nCol, flValue ); // default implementation
}
template<class MATRIXCLASS>
FORCEINLINE void FinishedAppending( MATRIXCLASS &matrix ) {} // default implementation
/// M += fl
template<class MATRIXCLASS>
void AddToMatrix( MATRIXCLASS &matrix, float flAddend )
{
for( int i = 0; i < matrix.Height(); i++ )
{
for( int j = 0; j < matrix.Width(); j++ )
{
matrix.SetElement( i, j, flAddend + matrix.Element( i, j ) );
}
}
}
/// transpose
template<class MATRIXCLASSIN, class MATRIXCLASSOUT>
void TransposeMatrix( MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut )
{
pMatrixOut->SetDimensions( matrixIn.Width(), matrixIn.Height() );
for( int i = 0; i < pMatrixOut->Height(); i++ )
{
for( int j = 0; j < pMatrixOut->Width(); j++ )
{
AppendElement( *pMatrixOut, i, j, matrixIn.Element( j, i ) );
}
}
FinishedAppending( *pMatrixOut );
}
/// copy
template<class MATRIXCLASSIN, class MATRIXCLASSOUT>
void CopyMatrix( MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut )
{
pMatrixOut->SetDimensions( matrixIn.Height(), matrixIn.Width() );
for( int i = 0; i < matrixIn.Height(); i++ )
{
for( int j = 0; j < matrixIn.Width(); j++ )
{
AppendElement( *pMatrixOut, i, j, matrixIn.Element( i, j ) );
}
}
FinishedAppending( *pMatrixOut );
}
/// M+=M
template<class MATRIXCLASSIN, class MATRIXCLASSOUT>
void AddMatrixToMatrix( MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut )
{
for( int i = 0; i < matrixIn.Height(); i++ )
{
for( int j = 0; j < matrixIn.Width(); j++ )
{
pMatrixOut->SetElement( i, j, pMatrixOut->Element( i, j ) + matrixIn.Element( i, j ) );
}
}
}
// M += scale * M
template<class MATRIXCLASSIN, class MATRIXCLASSOUT>
void AddScaledMatrixToMatrix( float flScale, MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut )
{
for( int i = 0; i < matrixIn.Height(); i++ )
{
for( int j = 0; j < matrixIn.Width(); j++ )
{
pMatrixOut->SetElement( i, j, pMatrixOut->Element( i, j ) + flScale * matrixIn.Element( i, j ) );
}
}
}
// simple way to initialize a matrix with constants from code.
template<class MATRIXCLASSOUT>
void SetMatrixToIdentity( MATRIXCLASSOUT *pMatrixOut, float flDiagonalValue = 1.0 )
{
for( int i = 0; i < pMatrixOut->Height(); i++ )
{
for( int j = 0; j < pMatrixOut->Width(); j++ )
{
AppendElement( *pMatrixOut, i, j, ( i == j ) ? flDiagonalValue : 0 );
}
}
FinishedAppending( *pMatrixOut );
}
//// simple way to initialize a matrix with constants from code
template<class MATRIXCLASSOUT>
void SetMatrixValues( MATRIXCLASSOUT *pMatrix, int nRows, int nCols, ... )
{
va_list argPtr;
va_start( argPtr, nCols );
pMatrix->SetDimensions( nRows, nCols );
for( int nRow = 0; nRow < nRows; nRow++ )
{
for( int nCol = 0; nCol < nCols; nCol++ )
{
double flNewValue = va_arg( argPtr, double );
pMatrix->SetElement( nRow, nCol, flNewValue );
}
}
va_end( argPtr );
}
/// row and colum accessors. treat a row or a column as a column vector
template<class MATRIXTYPE> class MatrixRowAccessor
{
public:
FORCEINLINE MatrixRowAccessor( MATRIXTYPE const &matrix, int nRow )
{
m_pMatrix = &matrix;
m_nRow = nRow;
}
FORCEINLINE float Element( int nRow, int nCol ) const
{
Assert( nCol == 0 );
return m_pMatrix->Element( m_nRow, nRow );
}
FORCEINLINE int Width( void ) const { return 1; };
FORCEINLINE int Height( void ) const { return m_pMatrix->Width(); }
private:
MATRIXTYPE const *m_pMatrix;
int m_nRow;
};
template<class MATRIXTYPE> class MatrixColumnAccessor
{
public:
FORCEINLINE MatrixColumnAccessor( MATRIXTYPE const &matrix, int nColumn )
{
m_pMatrix = &matrix;
m_nColumn = nColumn;
}
FORCEINLINE float Element( int nRow, int nColumn ) const
{
Assert( nColumn == 0 );
return m_pMatrix->Element( nRow, m_nColumn );
}
FORCEINLINE int Width( void ) const { return 1; }
FORCEINLINE int Height( void ) const { return m_pMatrix->Height(); }
private:
MATRIXTYPE const *m_pMatrix;
int m_nColumn;
};
/// this translator acts as a proxy for the transposed matrix
template<class MATRIXTYPE> class MatrixTransposeAccessor
{
public:
FORCEINLINE MatrixTransposeAccessor( MATRIXTYPE const & matrix )
{
m_pMatrix = &matrix;
}
FORCEINLINE float Element( int nRow, int nColumn ) const
{
return m_pMatrix->Element( nColumn, nRow );
}
FORCEINLINE int Width( void ) const { return m_pMatrix->Height(); }
FORCEINLINE int Height( void ) const { return m_pMatrix->Width(); }
private:
MATRIXTYPE const *m_pMatrix;
};
/// this tranpose returns a wrapper around it's argument, allowing things like AddMatrixToMatrix( Transpose( matA ), &matB ) without an extra copy
template<class MATRIXCLASSIN>
MatrixTransposeAccessor<MATRIXCLASSIN> TransposeMatrix( MATRIXCLASSIN const &matrixIn )
{
return MatrixTransposeAccessor<MATRIXCLASSIN>( matrixIn );
}
/// retrieve rows and columns
template<class MATRIXTYPE>
FORCEINLINE MatrixColumnAccessor<MATRIXTYPE> MatrixColumn( MATRIXTYPE const &matrix, int nColumn )
{
return MatrixColumnAccessor<MATRIXTYPE>( matrix, nColumn );
}
template<class MATRIXTYPE>
FORCEINLINE MatrixRowAccessor<MATRIXTYPE> MatrixRow( MATRIXTYPE const &matrix, int nRow )
{
return MatrixRowAccessor<MATRIXTYPE>( matrix, nRow );
}
//// dot product between vectors (or rows and/or columns via accessors)
template<class MATRIXACCESSORATYPE, class MATRIXACCESSORBTYPE >
float InnerProduct( MATRIXACCESSORATYPE const &vecA, MATRIXACCESSORBTYPE const &vecB )
{
Assert( vecA.Width() == 1 );
Assert( vecB.Width() == 1 );
Assert( vecA.Height() == vecB.Height() );
double flResult = 0;
for( int i = 0; i < vecA.Height(); i++ )
{
flResult += vecA.Element( i, 0 ) * vecB.Element( i, 0 );
}
return flResult;
}
/// matrix x matrix multiplication
template<class MATRIXATYPE, class MATRIXBTYPE, class MATRIXOUTTYPE>
void MatrixMultiply( MATRIXATYPE const &matA, MATRIXBTYPE const &matB, MATRIXOUTTYPE *pMatrixOut )
{
Assert( matA.Width() == matB.Height() );
pMatrixOut->SetDimensions( matA.Height(), matB.Width() );
for( int i = 0; i < matA.Height(); i++ )
{
for( int j = 0; j < matB.Width(); j++ )
{
pMatrixOut->SetElement( i, j, InnerProduct( MatrixRow( matA, i ), MatrixColumn( matB, j ) ) );
}
}
}
/// solve Ax=B via the conjugate graident method. Code and naming conventions based on the
/// wikipedia article.
template<class ATYPE, class XTYPE, class BTYPE>
void ConjugateGradient( ATYPE const &matA, BTYPE const &vecB, XTYPE &vecX, float flTolerance = 1.0e-20 )
{
XTYPE vecR;
vecR.SetDimensions( vecX.Height(), 1 );
MatrixMultiply( matA, vecX, &vecR );
ScaleMatrix( vecR, -1 );
AddMatrixToMatrix( vecB, &vecR );
XTYPE vecP;
CopyMatrix( vecR, &vecP );
float flRsOld = InnerProduct( vecR, vecR );
for( int nIter = 0; nIter < 100; nIter++ )
{
XTYPE vecAp;
MatrixMultiply( matA, vecP, &vecAp );
float flDivisor = InnerProduct( vecAp, vecP );
float flAlpha = flRsOld / flDivisor;
AddScaledMatrixToMatrix( flAlpha, vecP, &vecX );
AddScaledMatrixToMatrix( -flAlpha, vecAp, &vecR );
float flRsNew = InnerProduct( vecR, vecR );
if ( flRsNew < flTolerance )
{
break;
}
ScaleMatrix( vecP, flRsNew / flRsOld );
AddMatrixToMatrix( vecR, &vecP );
flRsOld = flRsNew;
}
}
/// solve (A'*A) x=B via the conjugate gradient method. Code and naming conventions based on
/// the wikipedia article. Same as Conjugate gradient but allows passing in two matrices whose
/// product is used as the A matrix (in order to preserve sparsity)
template<class ATYPE, class APRIMETYPE, class XTYPE, class BTYPE>
void ConjugateGradient( ATYPE const &matA, APRIMETYPE const &matAPrime, BTYPE const &vecB, XTYPE &vecX, float flTolerance = 1.0e-20 )
{
XTYPE vecR1;
vecR1.SetDimensions( vecX.Height(), 1 );
MatrixMultiply( matA, vecX, &vecR1 );
XTYPE vecR;
vecR.SetDimensions( vecR1.Height(), 1 );
MatrixMultiply( matAPrime, vecR1, &vecR );
ScaleMatrix( vecR, -1 );
AddMatrixToMatrix( vecB, &vecR );
XTYPE vecP;
CopyMatrix( vecR, &vecP );
float flRsOld = InnerProduct( vecR, vecR );
for( int nIter = 0; nIter < 100; nIter++ )
{
XTYPE vecAp1;
MatrixMultiply( matA, vecP, &vecAp1 );
XTYPE vecAp;
MatrixMultiply( matAPrime, vecAp1, &vecAp );
float flDivisor = InnerProduct( vecAp, vecP );
float flAlpha = flRsOld / flDivisor;
AddScaledMatrixToMatrix( flAlpha, vecP, &vecX );
AddScaledMatrixToMatrix( -flAlpha, vecAp, &vecR );
float flRsNew = InnerProduct( vecR, vecR );
if ( flRsNew < flTolerance )
{
break;
}
ScaleMatrix( vecP, flRsNew / flRsOld );
AddMatrixToMatrix( vecR, &vecP );
flRsOld = flRsNew;
}
}
template<class ATYPE, class XTYPE, class BTYPE>
void LeastSquaresFit( ATYPE const &matA, BTYPE const &vecB, XTYPE &vecX )
{
// now, generate the normal equations
BTYPE vecBeta;
MatrixMath::MatrixMultiply( MatrixMath::TransposeMatrix( matA ), vecB, &vecBeta );
vecX.SetDimensions( matA.Width(), 1 );
MatrixMath::SetMatrixToIdentity( &vecX );
ATYPE matATransposed;
TransposeMatrix( matA, &matATransposed );
ConjugateGradient( matA, matATransposed, vecBeta, vecX, 1.0e-20 );
}
};
/// a simple fixed-size matrix class
template<int NUMROWS, int NUMCOLS> class CFixedMatrix
{
public:
FORCEINLINE int Width( void ) const { return NUMCOLS; }
FORCEINLINE int Height( void ) const { return NUMROWS; }
FORCEINLINE float Element( int nRow, int nCol ) const { return m_flValues[nRow][nCol]; }
FORCEINLINE void SetElement( int nRow, int nCol, float flValue ) { m_flValues[nRow][nCol] = flValue; }
FORCEINLINE void SetDimensions( int nNumRows, int nNumCols ) { Assert( ( nNumRows == NUMROWS ) && ( nNumCols == NUMCOLS ) ); }
private:
float m_flValues[NUMROWS][NUMCOLS];
};
#endif //matrixmath_h

View File

@ -1,4 +1,4 @@
//===== Copyright © 1996-2005, Valve Corporation, All rights reserved. ======//
//===== Copyright <EFBFBD> 1996-2005, Valve Corporation, All rights reserved. ======//
//
// Purpose: - defines SIMD "structure of arrays" classes and functions.
//
@ -15,7 +15,7 @@
#include <mathlib/vector.h>
#include <mathlib/mathlib.h>
#if defined(_LINUX) || defined(__APPLE__)
#if defined(GNUC)
#define USE_STDC_FOR_SIMD 0
#else
#define USE_STDC_FOR_SIMD 0
@ -108,7 +108,7 @@ struct ALIGN16 intx4
m_i32[2] == other.m_i32[2] &&
m_i32[3] == other.m_i32[3] ;
}
};
} ALIGN16_POST;
#if defined( _DEBUG ) && defined( _X360 )
@ -136,13 +136,13 @@ FORCEINLINE void TestVPUFlags() {}
// miss.)
#ifndef _X360
extern const fltx4 Four_Zeros; // 0 0 0 0
extern const fltx4 Four_Ones; // 1 1 1 1
extern const fltx4 Four_Twos; // 2 2 2 2
extern const fltx4 Four_Ones; // 1 1 1 1
extern const fltx4 Four_Twos; // 2 2 2 2
extern const fltx4 Four_Threes; // 3 3 3 3
extern const fltx4 Four_Fours; // guess.
extern const fltx4 Four_Point225s; // .225 .225 .225 .225
extern const fltx4 Four_PointFives; // .5 .5 .5 .5
extern const fltx4 Four_Epsilons; // FLT_EPSILON FLT_EPSILON FLT_EPSILON FLT_EPSILON
extern const fltx4 Four_Epsilons; // FLT_EPSILON FLT_EPSILON FLT_EPSILON FLT_EPSILON
extern const fltx4 Four_2ToThe21s; // (1<<21)..
extern const fltx4 Four_2ToThe22s; // (1<<22)..
extern const fltx4 Four_2ToThe23s; // (1<<23)..
@ -157,7 +157,7 @@ extern const fltx4 Four_Threes; // 3 3 3 3
extern const fltx4 Four_Fours; // guess.
extern const fltx4 Four_Point225s; // .225 .225 .225 .225
extern const fltx4 Four_PointFives; // .5 .5 .5 .5
extern const fltx4 Four_Epsilons; // FLT_EPSILON FLT_EPSILON FLT_EPSILON FLT_EPSILON
extern const fltx4 Four_Epsilons; // FLT_EPSILON FLT_EPSILON FLT_EPSILON FLT_EPSILON
extern const fltx4 Four_2ToThe21s; // (1<<21)..
extern const fltx4 Four_2ToThe22s; // (1<<22)..
extern const fltx4 Four_2ToThe23s; // (1<<23)..
@ -167,20 +167,20 @@ extern const fltx4 Four_NegativeOnes; // -1 -1 -1 -1
#endif
extern const fltx4 Four_FLT_MAX; // FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX
extern const fltx4 Four_Negative_FLT_MAX; // -FLT_MAX, -FLT_MAX, -FLT_MAX, -FLT_MAX
extern const fltx4 g_SIMD_0123; // 0 1 2 3 as float
extern const fltx4 g_SIMD_0123; // 0 1 2 3 as float
// external aligned integer constants
extern const ALIGN16 int32 g_SIMD_clear_signmask[]; // 0x7fffffff x 4
extern const ALIGN16 int32 g_SIMD_signmask[]; // 0x80000000 x 4
extern const ALIGN16 int32 g_SIMD_lsbmask[]; // 0xfffffffe x 4
extern const ALIGN16 int32 g_SIMD_clear_wmask[]; // -1 -1 -1 0
extern const ALIGN16 int32 g_SIMD_ComponentMask[4][4]; // [0xFFFFFFFF 0 0 0], [0 0xFFFFFFFF 0 0], [0 0 0xFFFFFFFF 0], [0 0 0 0xFFFFFFFF]
extern const ALIGN16 int32 g_SIMD_AllOnesMask[]; // ~0,~0,~0,~0
extern const ALIGN16 int32 g_SIMD_Low16BitsMask[]; // 0xffff x 4
extern const ALIGN16 int32 g_SIMD_clear_signmask[] ALIGN16_POST; // 0x7fffffff x 4
extern const ALIGN16 int32 g_SIMD_signmask[] ALIGN16_POST; // 0x80000000 x 4
extern const ALIGN16 int32 g_SIMD_lsbmask[] ALIGN16_POST; // 0xfffffffe x 4
extern const ALIGN16 int32 g_SIMD_clear_wmask[] ALIGN16_POST; // -1 -1 -1 0
extern const ALIGN16 int32 g_SIMD_ComponentMask[4][4] ALIGN16_POST; // [0xFFFFFFFF 0 0 0], [0 0xFFFFFFFF 0 0], [0 0 0xFFFFFFFF 0], [0 0 0 0xFFFFFFFF]
extern const ALIGN16 int32 g_SIMD_AllOnesMask[] ALIGN16_POST; // ~0,~0,~0,~0
extern const ALIGN16 int32 g_SIMD_Low16BitsMask[] ALIGN16_POST; // 0xffff x 4
// this mask is used for skipping the tail of things. If you have N elements in an array, and wish
// to mask out the tail, g_SIMD_SkipTailMask[N & 3] what you want to use for the last iteration.
extern const int32 ALIGN16 g_SIMD_SkipTailMask[4][4];
extern const int32 ALIGN16 g_SIMD_SkipTailMask[4][4] ALIGN16_POST;
// Define prefetch macros.
// The characteristics of cache and prefetch are completely
@ -436,23 +436,23 @@ FORCEINLINE fltx4 ArcTan2SIMD( const fltx4 &a, const fltx4 &b )
return result;
}
FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // MAX(a,b)
FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // max(a,b)
{
fltx4 retVal;
SubFloat( retVal, 0 ) = MAX( SubFloat( a, 0 ), SubFloat( b, 0 ) );
SubFloat( retVal, 1 ) = MAX( SubFloat( a, 1 ), SubFloat( b, 1 ) );
SubFloat( retVal, 2 ) = MAX( SubFloat( a, 2 ), SubFloat( b, 2 ) );
SubFloat( retVal, 3 ) = MAX( SubFloat( a, 3 ), SubFloat( b, 3 ) );
SubFloat( retVal, 0 ) = max( SubFloat( a, 0 ), SubFloat( b, 0 ) );
SubFloat( retVal, 1 ) = max( SubFloat( a, 1 ), SubFloat( b, 1 ) );
SubFloat( retVal, 2 ) = max( SubFloat( a, 2 ), SubFloat( b, 2 ) );
SubFloat( retVal, 3 ) = max( SubFloat( a, 3 ), SubFloat( b, 3 ) );
return retVal;
}
FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // MIN(a,b)
FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // min(a,b)
{
fltx4 retVal;
SubFloat( retVal, 0 ) = MIN( SubFloat( a, 0 ), SubFloat( b, 0 ) );
SubFloat( retVal, 1 ) = MIN( SubFloat( a, 1 ), SubFloat( b, 1 ) );
SubFloat( retVal, 2 ) = MIN( SubFloat( a, 2 ), SubFloat( b, 2 ) );
SubFloat( retVal, 3 ) = MIN( SubFloat( a, 3 ), SubFloat( b, 3 ) );
SubFloat( retVal, 0 ) = min( SubFloat( a, 0 ), SubFloat( b, 0 ) );
SubFloat( retVal, 1 ) = min( SubFloat( a, 1 ), SubFloat( b, 1 ) );
SubFloat( retVal, 2 ) = min( SubFloat( a, 2 ), SubFloat( b, 2 ) );
SubFloat( retVal, 3 ) = min( SubFloat( a, 3 ), SubFloat( b, 3 ) );
return retVal;
}
@ -858,7 +858,7 @@ FORCEINLINE void TransposeSIMD( fltx4 & x, fltx4 & y, fltx4 & z, fltx4 & w )
// and replicate it to the whole return value.
FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 & a )
{
float lowest = MIN( MIN( SubFloat(a, 0), SubFloat(a, 1) ), SubFloat(a, 2));
float lowest = min( min( SubFloat(a, 0), SubFloat(a, 1) ), SubFloat(a, 2));
return ReplicateX4(lowest);
}
@ -866,7 +866,7 @@ FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 & a )
// and replicate it to the whole return value.
FORCEINLINE fltx4 FindHighestSIMD3( const fltx4 & a )
{
float highest = MAX( MAX( SubFloat(a, 0), SubFloat(a, 1) ), SubFloat(a, 2));
float highest = max( max( SubFloat(a, 0), SubFloat(a, 1) ), SubFloat(a, 2));
return ReplicateX4(highest);
}
@ -1067,12 +1067,12 @@ FORCEINLINE fltx4 ArcTan2SIMD( const fltx4 &a, const fltx4 &b )
// DivSIMD defined further down, since it uses ReciprocalSIMD
FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // MAX(a,b)
FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // max(a,b)
{
return __vmaxfp( a, b );
}
FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // MIN(a,b)
FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // min(a,b)
{
return __vminfp( a, b );
}
@ -1520,11 +1520,11 @@ FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 & a )
compareOne = __vrlimi( compareOne, a, 8 | 4 , 1 );
// compareOne is [y,z,G,G]
fltx4 retval = MinSIMD( a, compareOne );
// retVal is [MIN(x,y), MIN(y,z), G, G]
// retVal is [min(x,y), min(y,z), G, G]
compareOne = __vrlimi( compareOne, a, 8 , 2);
// compareOne is [z, G, G, G]
retval = MinSIMD( retval, compareOne );
// retVal = [ MIN(MIN(x,y),z), G, G, G ]
// retVal = [ min(min(x,y),z), G, G, G ]
// splat the x component out to the whole vector and return
return SplatXSIMD( retval );
@ -1544,11 +1544,11 @@ FORCEINLINE fltx4 FindHighestSIMD3( const fltx4 & a )
compareOne = __vrlimi( compareOne, a, 8 | 4 , 1 );
// compareOne is [y,z,G,G]
fltx4 retval = MaxSIMD( a, compareOne );
// retVal is [MAX(x,y), MAX(y,z), G, G]
// retVal is [max(x,y), max(y,z), G, G]
compareOne = __vrlimi( compareOne, a, 8 , 2);
// compareOne is [z, G, G, G]
retval = MaxSIMD( retval, compareOne );
// retVal = [ MAX(MAX(x,y),z), G, G, G ]
// retVal = [ max(max(x,y),z), G, G, G ]
// splat the x component out to the whole vector and return
return SplatXSIMD( retval );
@ -1757,7 +1757,7 @@ FORCEINLINE fltx4 AndSIMD( const fltx4 & a, const fltx4 & b ) // a & b
return _mm_and_ps( a, b );
}
FORCEINLINE fltx4 AndNotSIMD( const fltx4 & a, const fltx4 & b ) // a & ~b
FORCEINLINE fltx4 AndNotSIMD( const fltx4 & a, const fltx4 & b ) // ~a & b
{
return _mm_andnot_ps( a, b );
}
@ -1813,7 +1813,7 @@ FORCEINLINE fltx4 ReplicateX4( float flValue )
FORCEINLINE float SubFloat( const fltx4 & a, int idx )
{
// NOTE: if the output goes into a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!)
#if !defined _LINUX && !defined __APPLE__
#ifndef POSIX
return a.m128_f32[ idx ];
#else
return (reinterpret_cast<float const *>(&a))[idx];
@ -1822,7 +1822,7 @@ FORCEINLINE float SubFloat( const fltx4 & a, int idx )
FORCEINLINE float & SubFloat( fltx4 & a, int idx )
{
#if !defined _LINUX && !defined __APPLE__
#ifndef POSIX
return a.m128_f32[ idx ];
#else
return (reinterpret_cast<float *>(&a))[idx];
@ -1836,7 +1836,7 @@ FORCEINLINE uint32 SubFloatConvertToInt( const fltx4 & a, int idx )
FORCEINLINE uint32 SubInt( const fltx4 & a, int idx )
{
#if !defined _LINUX && !defined __APPLE__
#ifndef POSIX
return a.m128_u32[idx];
#else
return (reinterpret_cast<uint32 const *>(&a))[idx];
@ -1845,7 +1845,7 @@ FORCEINLINE uint32 SubInt( const fltx4 & a, int idx )
FORCEINLINE uint32 & SubInt( fltx4 & a, int idx )
{
#if !defined _LINUX && !defined __APPLE__
#ifndef POSIX
return a.m128_u32[idx];
#else
return (reinterpret_cast<uint32 *>(&a))[idx];
@ -2120,12 +2120,12 @@ FORCEINLINE fltx4 CmpInBoundsSIMD( const fltx4 & a, const fltx4 & b ) // (a <=
return AndSIMD( CmpLeSIMD(a,b), CmpGeSIMD(a, NegSIMD(b)) );
}
FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // MIN(a,b)
FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // min(a,b)
{
return _mm_min_ps( a, b );
}
FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // MAX(a,b)
FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // max(a,b)
{
return _mm_max_ps( a, b );
}
@ -2271,11 +2271,11 @@ FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 &a )
fltx4 compareOne = RotateLeft( a );
// compareOne is [y,z,G,x]
fltx4 retval = MinSIMD( a, compareOne );
// retVal is [MIN(x,y), ... ]
// retVal is [min(x,y), ... ]
compareOne = RotateLeft2( a );
// compareOne is [z, G, x, y]
retval = MinSIMD( retval, compareOne );
// retVal = [ MIN(MIN(x,y),z)..]
// retVal = [ min(min(x,y),z)..]
// splat the x component out to the whole vector and return
return SplatXSIMD( retval );
@ -2288,11 +2288,11 @@ FORCEINLINE fltx4 FindHighestSIMD3( const fltx4 &a )
fltx4 compareOne = RotateLeft( a );
// compareOne is [y,z,G,x]
fltx4 retval = MaxSIMD( a, compareOne );
// retVal is [MAX(x,y), ... ]
// retVal is [max(x,y), ... ]
compareOne = RotateLeft2( a );
// compareOne is [z, G, x, y]
retval = MaxSIMD( retval, compareOne );
// retVal = [ MAX(MAX(x,y),z)..]
// retVal = [ max(max(x,y),z)..]
// splat the x component out to the whole vector and return
return SplatXSIMD( retval );

View File

@ -233,7 +233,7 @@ FORCEINLINE fltx4 QuaternionScaleSIMD( const fltx4 &p, float t )
// FIXME: nick, this isn't overly sensitive to accuracy, and it may be faster to
// use the cos part (w) of the quaternion (sin(omega)*N,cos(omega)) to figure the new scale.
float sinom = sqrt( SubFloat( p, 0 ) * SubFloat( p, 0 ) + SubFloat( p, 1 ) * SubFloat( p, 1 ) + SubFloat( p, 2 ) * SubFloat( p, 2 ) );
sinom = MIN( sinom, 1.f );
sinom = min( sinom, 1.f );
float sinsom = sin( asin( sinom ) * t );

View File

@ -31,6 +31,7 @@
#include "tier0/threadtools.h"
#include "mathlib/vector2d.h"
#include "mathlib/math_pfns.h"
#include "minmax.h"
// Uncomment this to add extra Asserts to check for NANs, uninitialized vecs, etc.
//#define VECTOR_PARANOIA 1
@ -48,7 +49,11 @@
#ifdef VECTOR_PARANOIA
#define CHECK_VALID( _v) Assert( (_v).IsValid() )
#else
#ifdef GNUC
#define CHECK_VALID( _v)
#else
#define CHECK_VALID( _v) 0
#endif
#endif
#define VecToString(v) (static_cast<const char *>(CFmtStr("(%f, %f, %f)", (v).x, (v).y, (v).z))) // ** Note: this generates a temporary, don't hold reference!
@ -129,6 +134,7 @@ public:
}
vec_t NormalizeInPlace();
Vector Normalized() const;
bool IsLengthGreaterThan( float val ) const;
bool IsLengthLessThan( float val ) const;
@ -202,6 +208,7 @@ private:
#endif
};
FORCEINLINE void NetworkVarConstruct( Vector &v ) { v.Zero(); }
#if ( ( !defined( _X360 ) ) && ( ! defined( _LINUX) ) )
#define USE_M64S 1
@ -260,7 +267,7 @@ private:
// No assignment operators either...
// ShortVector& operator=( ShortVector const& src );
};
} ALIGN8_POST;
@ -396,7 +403,7 @@ public:
#endif
float w; // this space is used anyway
};
} ALIGN16_POST;
//-----------------------------------------------------------------------------
// Vector related operations
@ -416,7 +423,9 @@ FORCEINLINE void VectorMultiply( const Vector& a, const Vector& b, Vector& resul
FORCEINLINE void VectorDivide( const Vector& a, vec_t b, Vector& result );
FORCEINLINE void VectorDivide( const Vector& a, const Vector& b, Vector& result );
inline void VectorScale ( const Vector& in, vec_t scale, Vector& result );
inline void VectorMA( const Vector& start, float scale, const Vector& direction, Vector& dest );
// Don't mark this as inline in its function declaration. That's only necessary on its
// definition, and 'inline' here leads to gcc warnings.
void VectorMA( const Vector& start, float scale, const Vector& direction, Vector& dest );
// Vector equality with tolerance
bool VectorsAreEqual( const Vector& src1, const Vector& src2, float tolerance = 0.0f );
@ -443,6 +452,31 @@ void VectorMax( const Vector &a, const Vector &b, Vector &result );
// Linearly interpolate between two vectors
void VectorLerp(const Vector& src1, const Vector& src2, vec_t t, Vector& dest );
Vector VectorLerp(const Vector& src1, const Vector& src2, vec_t t );
FORCEINLINE Vector ReplicateToVector( float x )
{
return Vector( x, x, x );
}
// check if a point is in the field of a view of an object. supports up to 180 degree fov.
FORCEINLINE bool PointWithinViewAngle( Vector const &vecSrcPosition,
Vector const &vecTargetPosition,
Vector const &vecLookDirection, float flCosHalfFOV )
{
Vector vecDelta = vecTargetPosition - vecSrcPosition;
float cosDiff = DotProduct( vecLookDirection, vecDelta );
if ( cosDiff < 0 )
return false;
float flLen2 = vecDelta.LengthSqr();
// a/sqrt(b) > c == a^2 > b * c ^2
return ( cosDiff * cosDiff > flLen2 * flCosHalfFOV * flCosHalfFOV );
}
#ifndef VECTOR_NO_SLOW_OPERATIONS
@ -454,6 +488,10 @@ Vector RandomVector( vec_t minVal, vec_t maxVal );
#endif
float RandomVectorInUnitSphere( Vector *pVector );
float RandomVectorInUnitCircle( Vector2D *pVector );
//-----------------------------------------------------------------------------
//
// Inlined Vector methods
@ -517,9 +555,9 @@ inline void Vector::Init( vec_t ix, vec_t iy, vec_t iz )
inline void Vector::Random( vec_t minVal, vec_t maxVal )
{
x = minVal + ((float)rand() / (float)RAND_MAX) * (maxVal - minVal);
y = minVal + ((float)rand() / (float)RAND_MAX) * (maxVal - minVal);
z = minVal + ((float)rand() / (float)RAND_MAX) * (maxVal - minVal);
x = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
y = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
z = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
CHECK_VALID(*this);
}
@ -1082,14 +1120,6 @@ inline void VectorScale ( const Vector& in, vec_t scale, Vector& result )
VectorMultiply( in, scale, result );
}
inline void VectorMA( const Vector& start, float scale, const Vector& direction, Vector& dest )
{
CHECK_VALID(start);
CHECK_VALID(direction);
dest.x = start.x + scale * direction.x;
dest.y = start.y + scale * direction.y;
dest.z = start.z + scale * direction.z;
}
FORCEINLINE void VectorDivide( const Vector& a, vec_t b, Vector& c )
{
@ -1131,6 +1161,12 @@ inline void VectorLerp(const Vector& src1, const Vector& src2, vec_t t, Vector&
dest.z = src1.z + (src2.z - src1.z) * t;
}
inline Vector VectorLerp(const Vector& src1, const Vector& src2, vec_t t )
{
Vector result;
VectorLerp( src1, src2, t, result );
return result;
}
//-----------------------------------------------------------------------------
// Temporary storage for vector results so const Vector& results can be returned
@ -1431,6 +1467,13 @@ inline void VectorMax( const Vector &a, const Vector &b, Vector &result )
result.z = fpmax(a.z, b.z);
}
inline float ComputeVolume( const Vector &vecMins, const Vector &vecMaxs )
{
Vector vecDelta;
VectorSubtract( vecMaxs, vecMins, vecDelta );
return DotProduct( vecDelta, vecDelta );
}
// Get a random vector.
inline Vector RandomVector( float minVal, float maxVal )
{
@ -1610,7 +1653,7 @@ public:
}
#endif
};
} ALIGN16_POST;
//-----------------------------------------------------------------------------
@ -1643,6 +1686,9 @@ public:
extern void AngleQuaternion( RadianEuler const &angles, Quaternion &qt );
extern void QuaternionAngles( Quaternion const &q, RadianEuler &angles );
FORCEINLINE void NetworkVarConstruct( Quaternion &q ) { q.x = q.y = q.z = q.w = 0.0f; }
inline Quaternion::Quaternion(RadianEuler const &angle)
{
AngleQuaternion( angle, *this );
@ -1790,6 +1836,8 @@ private:
#endif
};
FORCEINLINE void NetworkVarConstruct( QAngle &q ) { q.x = q.y = q.z = 0.0f; }
//-----------------------------------------------------------------------------
// Allows us to specifically pass the vector by value when we need to
//-----------------------------------------------------------------------------
@ -1853,9 +1901,9 @@ inline void QAngle::Init( vec_t ix, vec_t iy, vec_t iz )
inline void QAngle::Random( vec_t minVal, vec_t maxVal )
{
x = minVal + ((float)rand() / (float)RAND_MAX) * (maxVal - minVal);
y = minVal + ((float)rand() / (float)RAND_MAX) * (maxVal - minVal);
z = minVal + ((float)rand() / (float)RAND_MAX) * (maxVal - minVal);
x = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
y = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
z = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
CHECK_VALID(*this);
}
@ -2128,11 +2176,16 @@ inline void AngularImpulseToQAngle( const AngularImpulse &impulse, QAngle &angle
}
#if !defined( _X360 )
extern float (*pfInvRSquared)( const float *v );
FORCEINLINE vec_t InvRSquared( float const *v )
{
return (*pfInvRSquared)(v);
#if defined(__i386__) || defined(_M_IX86)
float sqrlen = v[0]*v[0]+v[1]*v[1]+v[2]*v[2] + 1.0e-10f, result;
_mm_store_ss(&result, _mm_rcp_ss( _mm_max_ss( _mm_set_ss(1.0f), _mm_load_ss(&sqrlen) ) ));
return result;
#else
return 1.f/fpmax(1.f, v[0]*v[0]+v[1]*v[1]+v[2]*v[2]);
#endif
}
FORCEINLINE vec_t InvRSquared( const Vector &v )
@ -2140,36 +2193,63 @@ FORCEINLINE vec_t InvRSquared( const Vector &v )
return InvRSquared(&v.x);
}
#else
// call directly
FORCEINLINE float _VMX_InvRSquared( const Vector &v )
#if defined(__i386__) || defined(_M_IX86)
inline void _SSE_RSqrtInline( float a, float* out )
{
XMVECTOR xmV = XMVector3ReciprocalLength( XMLoadVector3( v.Base() ) );
xmV = XMVector3Dot( xmV, xmV );
return xmV.x;
__m128 xx = _mm_load_ss( &a );
__m128 xr = _mm_rsqrt_ss( xx );
__m128 xt;
xt = _mm_mul_ss( xr, xr );
xt = _mm_mul_ss( xt, xx );
xt = _mm_sub_ss( _mm_set_ss(3.f), xt );
xt = _mm_mul_ss( xt, _mm_set_ss(0.5f) );
xr = _mm_mul_ss( xr, xt );
_mm_store_ss( out, xr );
}
#define InvRSquared(x) _VMX_InvRSquared(x)
#endif // _X360
#if !defined( _X360 )
extern float (FASTCALL *pfVectorNormalize)(Vector& v);
#endif
// FIXME: Change this back to a #define once we get rid of the vec_t version
FORCEINLINE float VectorNormalize( Vector& v )
FORCEINLINE float VectorNormalize( Vector& vec )
{
return (*pfVectorNormalize)(v);
#ifndef DEBUG // stop crashing my edit-and-continue!
#if defined(__i386__) || defined(_M_IX86)
#define DO_SSE_OPTIMIZATION
#endif
#endif
#if defined( DO_SSE_OPTIMIZATION )
float sqrlen = vec.LengthSqr() + 1.0e-10f, invlen;
_SSE_RSqrtInline(sqrlen, &invlen);
vec.x *= invlen;
vec.y *= invlen;
vec.z *= invlen;
return sqrlen * invlen;
#else
extern float (FASTCALL *pfVectorNormalize)(Vector& v);
return (*pfVectorNormalize)(vec);
#endif
}
// FIXME: Obsolete version of VectorNormalize, once we remove all the friggin float*s
FORCEINLINE float VectorNormalize( float * v )
{
return VectorNormalize(*(reinterpret_cast<Vector *>(v)));
}
FORCEINLINE void VectorNormalizeFast( Vector &vec )
{
VectorNormalize(vec);
}
#else
FORCEINLINE float _VMX_InvRSquared( const Vector &v )
{
XMVECTOR xmV = XMVector3ReciprocalLength( XMLoadVector3( v.Base() ) );
xmV = XMVector3Dot( xmV, xmV );
return xmV.x;
}
// call directly
FORCEINLINE float _VMX_VectorNormalize( Vector &vec )
{
@ -2180,6 +2260,9 @@ FORCEINLINE float _VMX_VectorNormalize( Vector &vec )
vec.z *= den;
return mag;
}
#define InvRSquared(x) _VMX_InvRSquared(x)
// FIXME: Change this back to a #define once we get rid of the vec_t version
FORCEINLINE float VectorNormalize( Vector& v )
{
@ -2191,18 +2274,6 @@ FORCEINLINE float VectorNormalize( float *pV )
return _VMX_VectorNormalize(*(reinterpret_cast<Vector*>(pV)));
}
#endif // _X360
#if !defined( _X360 )
extern void (FASTCALL *pfVectorNormalizeFast)(Vector& v);
FORCEINLINE void VectorNormalizeFast( Vector& v )
{
(*pfVectorNormalizeFast)(v);
}
#else
// call directly
FORCEINLINE void VectorNormalizeFast( Vector &vec )
{
@ -2215,11 +2286,19 @@ FORCEINLINE void VectorNormalizeFast( Vector &vec )
#endif // _X360
inline vec_t Vector::NormalizeInPlace()
{
return VectorNormalize( *this );
}
inline Vector Vector::Normalized() const
{
Vector norm = *this;
VectorNormalize( norm );
return norm;
}
inline bool Vector::IsLengthGreaterThan( float val ) const
{
return LengthSqr() > val*val;

View File

@ -239,8 +239,8 @@ inline void Vector2D::Init( vec_t ix, vec_t iy )
inline void Vector2D::Random( float minVal, float maxVal )
{
x = minVal + ((float)rand() / (float)RAND_MAX) * (maxVal - minVal);
y = minVal + ((float)rand() / (float)RAND_MAX) * (maxVal - minVal);
x = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
y = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
}
inline void Vector2DClear( Vector2D& a )

View File

@ -132,11 +132,7 @@ const Vector4D vec4_invalid( FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX );
// SSE optimized routines
//-----------------------------------------------------------------------------
#ifdef _WIN32
class __declspec(align(16)) Vector4DAligned : public Vector4D
#elif defined _LINUX || defined __APPLE__
class __attribute__((aligned(16))) Vector4DAligned : public Vector4D
#endif
class ALIGN16 Vector4DAligned : public Vector4D
{
public:
Vector4DAligned(void) {}
@ -154,7 +150,7 @@ private:
// No assignment operators either...
Vector4DAligned& operator=( Vector4DAligned const& src );
};
} ALIGN16_POST;
//-----------------------------------------------------------------------------
// Vector4D related operations
@ -249,10 +245,10 @@ inline void Vector4D::Init( vec_t ix, vec_t iy, vec_t iz, vec_t iw )
inline void Vector4D::Random( vec_t minVal, vec_t maxVal )
{
x = minVal + ((vec_t)rand() / (float)RAND_MAX) * (maxVal - minVal);
y = minVal + ((vec_t)rand() / (float)RAND_MAX) * (maxVal - minVal);
z = minVal + ((vec_t)rand() / (float)RAND_MAX) * (maxVal - minVal);
w = minVal + ((vec_t)rand() / (float)RAND_MAX) * (maxVal - minVal);
x = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
y = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
z = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
w = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
}
inline void Vector4DClear( Vector4D& a )

View File

@ -54,6 +54,7 @@ public:
// Creates a matrix where the X axis = forward
// the Y axis = left, and the Z axis = up
VMatrix( const Vector& forward, const Vector& left, const Vector& up );
VMatrix( const Vector& forward, const Vector& left, const Vector& up, const Vector& translation );
// Construct from a 3x4 matrix
VMatrix( const matrix3x4_t& matrix3x4 );
@ -106,7 +107,6 @@ public:
void PreTranslate(const Vector &vTrans);
void PostTranslate(const Vector &vTrans);
matrix3x4_t& As3x4();
const matrix3x4_t& As3x4() const;
void CopyFrom3x4( const matrix3x4_t &m3x4 );
void Set3x4( matrix3x4_t& matrix3x4 ) const;
@ -199,6 +199,9 @@ public:
// Setup a matrix for origin and angles.
void SetupMatrixOrgAngles( const Vector &origin, const QAngle &vAngles );
// Setup a matrix for angles and no translation.
void SetupMatrixAngles( const QAngle &vAngles );
// General inverse. This may fail so check the return!
bool InverseGeneral(VMatrix &vInverse) const;
@ -457,6 +460,16 @@ inline VMatrix::VMatrix( const Vector& xAxis, const Vector& yAxis, const Vector&
);
}
inline VMatrix::VMatrix( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector& translation )
{
Init(
xAxis.x, yAxis.x, zAxis.x, translation.x,
xAxis.y, yAxis.y, zAxis.y, translation.y,
xAxis.z, yAxis.z, zAxis.z, translation.z,
0.0f, 0.0f, 0.0f, 1.0f
);
}
inline void VMatrix::Init(
vec_t m00, vec_t m01, vec_t m02, vec_t m03,
@ -616,11 +629,6 @@ inline const matrix3x4_t& VMatrix::As3x4() const
return *((const matrix3x4_t*)this);
}
inline matrix3x4_t& VMatrix::As3x4()
{
return *((matrix3x4_t*)this);
}
inline void VMatrix::CopyFrom3x4( const matrix3x4_t &m3x4 )
{
memcpy( m, m3x4.Base(), sizeof( matrix3x4_t ) );

18
public/minmax.h Normal file
View File

@ -0,0 +1,18 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose:
//
// $NoKeywords: $
//=============================================================================//
#ifndef MINMAX_H
#define MINMAX_H
#ifndef V_min
#define V_min(a,b) (((a) < (b)) ? (a) : (b))
#endif
#ifndef V_max
#define V_max(a,b) (((a) > (b)) ? (a) : (b))
#endif
#endif // MINMAX_H

View File

@ -109,16 +109,38 @@ FORCEINLINE float fpmax( float a, float b )
#endif
#ifdef __cplusplus
template< class T >
inline T clamp( T const &val, T const &minVal, T const &maxVal )
{
if( val < minVal )
return minVal;
else if( val > maxVal )
return maxVal;
else
return val;
}
// This is the preferred clamp operator. Using the clamp macro can lead to
// unexpected side-effects or more expensive code. Even the clamp (all
// lower-case) function can generate more expensive code because of the
// mixed types involved.
template< class T >
T Clamp( T const &val, T const &minVal, T const &maxVal )
{
if( val < minVal )
return minVal;
else if( val > maxVal )
return maxVal;
else
return val;
}
// This is the preferred Min operator. Using the MIN macro can lead to unexpected
// side-effects or more expensive code.
template< class T >
T Min( T const &val1, T const &val2 )
{
return val1 < val2 ? val1 : val2;
}
// This is the preferred Max operator. Using the MAX macro can lead to unexpected
// side-effects or more expensive code.
template< class T >
T Max( T const &val1, T const &val2 )
{
return val1 > val2 ? val1 : val2;
}
#endif
#ifndef FALSE
@ -247,7 +269,7 @@ struct colorVec
#ifndef NOTE_UNUSED
#define NOTE_UNUSED(x) (x = x) // for pesky compiler / lint warnings
#define NOTE_UNUSED(x) (void)(x) // for pesky compiler / lint warnings
#endif
#ifdef __cplusplus

View File

@ -32,11 +32,12 @@
#define SETBITS(iBitVector, bits) ((iBitVector) |= (bits))
#define CLEARBITS(iBitVector, bits) ((iBitVector) &= ~(bits))
#define FBitSet(iBitVector, bit) ((iBitVector) & (bit))
#define FBitSet(iBitVector, bits) ((iBitVector) & (bits))
inline bool IsPowerOfTwo( int value )
template <typename T>
inline bool IsPowerOfTwo( T value )
{
return (value & ( value - 1 )) == 0;
return (value & ( value - (T)1 )) == (T)0;
}
#define CONST_INTEGER_AS_STRING(x) #x //Wraps the integer in quotes, allowing us to form constant strings with it

View File

@ -1,4 +1,4 @@
//========= Copyright © 1996-2005, Valve Corporation, All rights reserved. ============//
//========= Copyright <EFBFBD> 1996-2005, Valve Corporation, All rights reserved. ============//
//
// Purpose:
//
@ -11,11 +11,14 @@
#pragma once
#endif
#ifdef _WIN32
#include <intrin.h>
#endif
#include <assert.h>
#include "tier0/platform.h"
PLATFORM_INTERFACE int64 g_ClockSpeed;
PLATFORM_INTERFACE uint32_t g_dwClockSpeed;
PLATFORM_INTERFACE uint64 g_ClockSpeed;
#if defined( _X360 ) && defined( _CERT )
PLATFORM_INTERFACE uint32_t g_dwFakeFastCounter;
#endif
@ -30,20 +33,20 @@ friend class CFastTimer;
public:
CCycleCount();
CCycleCount( int64 cycles );
CCycleCount( uint64 cycles );
void Sample(); // Sample the clock. This takes about 34 clocks to execute (or 26,000 calls per millisecond on a P900).
void Init(); // Set to zero.
void Init( float initTimeMsec );
void Init( double initTimeMsec ) { Init( (float)initTimeMsec ); }
void Init( int64 cycles );
void Init( uint64 cycles );
bool IsLessThan( CCycleCount const &other ) const; // Compare two counts.
// Convert to other time representations. These functions are slow, so it's preferable to call them
// during display rather than inside a timing block.
uint32_t GetCycles() const;
int64 GetLongCycles() const;
uint64 GetLongCycles() const;
uint32_t GetMicroseconds() const;
uint64 GetUlMicroseconds() const;
@ -63,12 +66,12 @@ public:
// dest = rSrc1 - rSrc2
static void Sub( CCycleCount const &rSrc1, CCycleCount const &rSrc2, CCycleCount &dest ); // Add two samples together.
static int64 GetTimestamp();
static uint64 GetTimestamp();
int64 m_Int64;
uint64 m_Int64;
};
class CClockSpeedInit
class PLATFORM_CLASS CClockSpeedInit
{
public:
CClockSpeedInit()
@ -76,21 +79,7 @@ public:
Init();
}
static void Init()
{
#if defined( _X360 ) && !defined( _CERT )
PMCStart();
PMCInitIntervalTimer( 0 );
#endif
const CPUInformation& pi = GetCPUInformation();
g_ClockSpeed = pi.m_Speed;
g_dwClockSpeed = (uint32_t)g_ClockSpeed;
g_ClockSpeedMicrosecondsMultiplier = 1000000.0 / (double)g_ClockSpeed;
g_ClockSpeedMillisecondsMultiplier = 1000.0 / (double)g_ClockSpeed;
g_ClockSpeedSecondsMultiplier = 1.0f / (double)g_ClockSpeed;
}
static void Init();
};
class CFastTimer
@ -104,7 +93,7 @@ public:
CCycleCount GetDurationInProgress() const; // Call without ending. Not that cheap.
// Return number of cycles per second on this processor.
static inline uint32_t GetClockSpeed();
static inline int64 GetClockSpeed();
private:
CCycleCount m_Duration;
@ -233,8 +222,6 @@ private:
unsigned m_nIters;
CCycleCount m_Total;
CCycleCount m_Peak;
// bool m_fReport;
// const tchar *m_pszName;
};
// -------------------------------------------------------------------------- //
@ -257,87 +244,37 @@ private:
inline CCycleCount::CCycleCount()
{
Init( (int64)0 );
Init( (uint64)0 );
}
inline CCycleCount::CCycleCount( int64 cycles )
inline CCycleCount::CCycleCount( uint64 cycles )
{
Init( cycles );
}
inline void CCycleCount::Init()
{
Init( (int64)0 );
Init( (uint64)0 );
}
inline void CCycleCount::Init( float initTimeMsec )
{
if ( g_ClockSpeedMillisecondsMultiplier > 0 )
Init( (int64)(initTimeMsec / g_ClockSpeedMillisecondsMultiplier) );
Init( (uint64)(initTimeMsec / g_ClockSpeedMillisecondsMultiplier) );
else
Init( (int64)0 );
Init( (uint64)0 );
}
inline void CCycleCount::Init( int64 cycles )
inline void CCycleCount::Init( uint64 cycles )
{
m_Int64 = cycles;
}
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4189) // warning C4189: local variable is initialized but not referenced
#endif
inline void CCycleCount::Sample()
{
#if defined( _X360 )
#if !defined( _CERT )
// read the highest resolution timer directly (ticks at native 3.2GHz), bypassing any calls into PMC
// can only resolve 32 bits, rollover is ~1.32 secs
// based on PMCGetIntervalTimer() from the April 2007 XDK
int64 temp;
__asm
{
lis r11,08FFFh
ld r11,011E0h(r11)
rldicl r11,r11,32,32
// unforunate can't get the inline assembler to write directly into desired target
std r11,temp
}
m_Int64 = temp;
#else
m_Int64 = ++g_dwFakeFastCounter;
#endif
#elif defined( _WIN32 ) && !defined( _WIN64 )
uint32_t* pSample = (uint32_t *)&m_Int64;
__asm
{
// force the cpu to synchronize the instruction queue
// NJS: CPUID can really impact performance in tight loops.
//cpuid
//cpuid
//cpuid
mov ecx, pSample
rdtsc
mov [ecx], eax
mov [ecx+4], edx
}
#elif defined( _LINUX )
uint32_t* pSample = (uint32_t *)&m_Int64;
__asm__ __volatile__ (
"rdtsc\n\t"
"movl %%eax, (%0)\n\t"
"movl %%edx, 4(%0)\n\t"
: /* no output regs */
: "D" (pSample)
: "%eax", "%edx" );
#endif
m_Int64 = Plat_Rdtsc();
}
#ifdef _MSC_VER
#pragma warning(pop)
#endif
inline CCycleCount& CCycleCount::operator+=( CCycleCount const &other )
{
m_Int64 += other.m_Int64;
@ -355,7 +292,7 @@ inline void CCycleCount::Sub( CCycleCount const &rSrc1, CCycleCount const &rSrc2
dest.m_Int64 = rSrc1.m_Int64 - rSrc2.m_Int64;
}
inline int64 CCycleCount::GetTimestamp()
inline uint64 CCycleCount::GetTimestamp()
{
CCycleCount c;
c.Sample();
@ -373,7 +310,7 @@ inline uint32_t CCycleCount::GetCycles() const
return (uint32_t)m_Int64;
}
inline int64 CCycleCount::GetLongCycles() const
inline uint64 CCycleCount::GetLongCycles() const
{
return m_Int64;
}
@ -397,7 +334,7 @@ inline double CCycleCount::GetMicrosecondsF() const
inline void CCycleCount::SetMicroseconds( uint32_t nMicroseconds )
{
m_Int64 = ((int64)nMicroseconds * g_ClockSpeed) / 1000000;
m_Int64 = ((uint64)nMicroseconds * g_ClockSpeed) / 1000000;
}
@ -438,10 +375,10 @@ inline void CFastTimer::End()
if ( IsX360() )
{
// have to handle rollover, hires timer is only accurate to 32 bits
// more than one overflow should not have occured, otherwise caller should use a slower timer
// more than one overflow should not have occurred, otherwise caller should use a slower timer
if ( (uint64)cnt.m_Int64 <= (uint64)m_Duration.m_Int64 )
{
// rollover occured
// rollover occurred
cnt.m_Int64 += 0x100000000LL;
}
}
@ -460,10 +397,10 @@ inline CCycleCount CFastTimer::GetDurationInProgress() const
if ( IsX360() )
{
// have to handle rollover, hires timer is only accurate to 32 bits
// more than one overflow should not have occured, otherwise caller should use a slower timer
// more than one overflow should not have occurred, otherwise caller should use a slower timer
if ( (uint64)cnt.m_Int64 <= (uint64)m_Duration.m_Int64 )
{
// rollover occured
// rollover occurred
cnt.m_Int64 += 0x100000000LL;
}
}
@ -475,9 +412,9 @@ inline CCycleCount CFastTimer::GetDurationInProgress() const
}
inline uint32_t CFastTimer::GetClockSpeed()
inline int64 CFastTimer::GetClockSpeed()
{
return g_dwClockSpeed;
return g_ClockSpeed;
}
@ -553,15 +490,20 @@ inline CAverageTimeMarker::~CAverageTimeMarker()
// CLimitTimer
// Use this to time whether a desired interval of time has passed. It's extremely fast
// to check while running.
// to check while running. NOTE: CMicroSecOverage() and CMicroSecLeft() are not as fast to check.
class CLimitTimer
{
public:
CLimitTimer() {}
CLimitTimer( uint64 cMicroSecDuration ) { SetLimit( cMicroSecDuration ); }
void SetLimit( uint64 m_cMicroSecDuration );
bool BLimitReached( void );
bool BLimitReached() const;
int CMicroSecOverage() const;
uint64 CMicroSecLeft() const;
private:
int64 m_lCycleLimit;
uint64 m_lCycleLimit;
};
@ -569,9 +511,9 @@ private:
// Purpose: Initializes the limit timer with a period of time to measure.
// Input : cMicroSecDuration - How long a time period to measure
//-----------------------------------------------------------------------------
inline void CLimitTimer::SetLimit( uint64 m_cMicroSecDuration )
inline void CLimitTimer::SetLimit( uint64 cMicroSecDuration )
{
int64 dlCycles = ( ( uint64 ) m_cMicroSecDuration * ( int64 ) g_dwClockSpeed ) / ( int64 ) 1000000L;
uint64 dlCycles = ( ( uint64 ) cMicroSecDuration * g_ClockSpeed ) / ( uint64 ) 1000000L;
CCycleCount cycleCount;
cycleCount.Sample( );
m_lCycleLimit = cycleCount.GetLongCycles( ) + dlCycles;
@ -582,7 +524,7 @@ inline void CLimitTimer::SetLimit( uint64 m_cMicroSecDuration )
// Purpose: Determines whether our specified time period has passed
// Output: true if at least the specified time period has passed
//-----------------------------------------------------------------------------
inline bool CLimitTimer::BLimitReached( )
inline bool CLimitTimer::BLimitReached() const
{
CCycleCount cycleCount;
cycleCount.Sample( );
@ -590,5 +532,38 @@ inline bool CLimitTimer::BLimitReached( )
}
//-----------------------------------------------------------------------------
// Purpose: If we're over our specified time period, return the amount of the overage.
// Output: # of microseconds since we reached our specified time period.
//-----------------------------------------------------------------------------
inline int CLimitTimer::CMicroSecOverage() const
{
CCycleCount cycleCount;
cycleCount.Sample();
uint64 lcCycles = cycleCount.GetLongCycles();
if ( lcCycles < m_lCycleLimit )
return 0;
return( ( int ) ( ( lcCycles - m_lCycleLimit ) * ( uint64 ) 1000000L / g_ClockSpeed ) );
}
//-----------------------------------------------------------------------------
// Purpose: If we're under our specified time period, return the amount under.
// Output: # of microseconds until we reached our specified time period, 0 if we've passed it
//-----------------------------------------------------------------------------
inline uint64 CLimitTimer::CMicroSecLeft() const
{
CCycleCount cycleCount;
cycleCount.Sample();
uint64 lcCycles = cycleCount.GetLongCycles();
if ( lcCycles >= m_lCycleLimit )
return 0;
return( ( uint64 ) ( ( m_lCycleLimit - lcCycles ) * ( uint64 ) 1000000L / g_ClockSpeed ) );
}
#endif // FASTTIMER_H

View File

@ -382,7 +382,7 @@ public:
#pragma warning(disable:4290)
#pragma warning(push)
#include <typeinfo.h>
#include <typeinfo>
// MEM_DEBUG_CLASSNAME is opt-in.
// Note: typeid().name() is not threadsafe, so if the project needs to access it in multiple threads

View File

@ -44,10 +44,11 @@ typedef uint32_t ThreadId_t;
// feature enables
#define NEW_SOFTWARE_LIGHTING
#if defined(_LINUX) || defined(__APPLE__)
#ifdef POSIX
// need this for _alloca
#include <alloca.h>
#endif // _LINUX
#include <time.h>
#endif
#if defined __APPLE__
#include <stdlib.h>
@ -234,6 +235,14 @@ typedef unsigned int uint;
#define abstract_class class NO_VTABLE
#endif
// MSVC CRT uses 0x7fff while gcc uses MAX_INT, leading to mismatches between platforms
// As a result, we pick the least common denominator here. This should be used anywhere
// you might typically want to use RAND_MAX
#define VALVE_RAND_MAX 0x7fff
/*
FIXME: Enable this when we no longer fear change =)
@ -242,32 +251,32 @@ FIXME: Enable this when we no longer fear change =)
#include <float.h>
// Maximum and minimum representable values
#define INT8_MAX SCHAR_MAX
#define INT16_MAX SHRT_MAX
#define INT32_MAX LONG_MAX
#define INT64_MAX (((int64)~0) >> 1)
#define INT8_MAX SCHAR_MAX
#define INT16_MAX SHRT_MAX
#define INT32_MAX LONG_MAX
#define INT64_MAX (((int64)~0) >> 1)
#define INT8_MIN SCHAR_MIN
#define INT16_MIN SHRT_MIN
#define INT32_MIN LONG_MIN
#define INT64_MIN (((int64)1) << 63)
#define INT8_MIN SCHAR_MIN
#define INT16_MIN SHRT_MIN
#define INT32_MIN LONG_MIN
#define INT64_MIN (((int64)1) << 63)
#define UINT8_MAX ((uint8)~0)
#define UINT16_MAX ((uint16)~0)
#define UINT32_MAX ((uint32)~0)
#define UINT64_MAX ((uint64)~0)
#define UINT8_MAX ((uint8)~0)
#define UINT16_MAX ((uint16)~0)
#define UINT32_MAX ((uint32)~0)
#define UINT64_MAX ((uint64)~0)
#define UINT8_MIN 0
#define UINT16_MIN 0
#define UINT32_MIN 0
#define UINT64_MIN 0
#define UINT8_MIN 0
#define UINT16_MIN 0
#define UINT32_MIN 0
#define UINT64_MIN 0
#ifndef UINT_MIN
#define UINT_MIN UINT32_MIN
#define UINT_MIN UINT32_MIN
#endif
#define FLOAT32_MAX FLT_MAX
#define FLOAT64_MAX DBL_MAX
#define FLOAT32_MAX FLT_MAX
#define FLOAT64_MAX DBL_MAX
#define FLOAT32_MIN FLT_MIN
#define FLOAT64_MIN DBL_MIN
@ -332,11 +341,35 @@ typedef void * HINSTANCE;
#define DECL_ALIGN(x) /* */
#endif
#ifdef _MSC_VER
// MSVC has the align at the start of the struct
#define ALIGN4 DECL_ALIGN(4)
#define ALIGN8 DECL_ALIGN(8)
#define ALIGN16 DECL_ALIGN(16)
#define ALIGN32 DECL_ALIGN(32)
#define ALIGN128 DECL_ALIGN(128)
#define ALIGN4_POST
#define ALIGN8_POST
#define ALIGN16_POST
#define ALIGN32_POST
#define ALIGN128_POST
#elif defined( GNUC )
// gnuc has the align decoration at the end
#define ALIGN4
#define ALIGN8
#define ALIGN16
#define ALIGN32
#define ALIGN128
#define ALIGN4_POST DECL_ALIGN(4)
#define ALIGN8_POST DECL_ALIGN(8)
#define ALIGN16_POST DECL_ALIGN(16)
#define ALIGN32_POST DECL_ALIGN(32)
#define ALIGN128_POST DECL_ALIGN(128)
#else
#error
#endif
// Pull in the /analyze code annotations.
#include "annotations.h"
@ -829,17 +862,20 @@ inline void StoreLittleDWord( uint32_t *base, unsigned int dwordIndex, uint32_t
#ifndef STATIC_TIER0
#ifdef TIER0_DLL_EXPORT
#define PLATFORM_INTERFACE DLL_EXPORT
#define PLATFORM_OVERLOAD DLL_GLOBAL_EXPORT
#define PLATFORM_INTERFACE DLL_EXPORT
#define PLATFORM_OVERLOAD DLL_GLOBAL_EXPORT
#define PLATFORM_CLASS DLL_CLASS_EXPORT
#else
#define PLATFORM_INTERFACE DLL_IMPORT
#define PLATFORM_OVERLOAD DLL_GLOBAL_IMPORT
#define PLATFORM_INTERFACE DLL_IMPORT
#define PLATFORM_OVERLOAD DLL_GLOBAL_IMPORT
#define PLATFORM_CLASS DLL_CLASS_IMPORT
#endif
#else // BUILD_AS_DLL
#define PLATFORM_INTERFACE extern
#define PLATFORM_OVERLOAD
#define PLATFORM_CLASS
#endif // BUILD_AS_DLL
@ -854,6 +890,41 @@ PLATFORM_INTERFACE bool Plat_IsInBenchmarkMode();
PLATFORM_INTERFACE double Plat_FloatTime(); // Returns time in seconds since the module was loaded.
PLATFORM_INTERFACE uint32_t Plat_MSTime(); // Time in milliseconds.
PLATFORM_INTERFACE char * Plat_ctime( const time_t *timep, char *buf, size_t bufsize );
PLATFORM_INTERFACE struct tm * Plat_gmtime( const time_t *timep, struct tm *result );
PLATFORM_INTERFACE time_t Plat_timegm( struct tm *timeptr );
PLATFORM_INTERFACE struct tm * Plat_localtime( const time_t *timep, struct tm *result );
#if defined( _WIN32 ) && defined( _MSC_VER ) && ( _MSC_VER >= 1400 )
extern "C" unsigned __int64 __rdtsc();
#pragma intrinsic(__rdtsc)
#endif
inline uint64 Plat_Rdtsc()
{
#if defined( _X360 )
return ( uint64 )__mftb32();
#elif defined( _WIN64 )
return ( uint64 )__rdtsc();
#elif defined( _WIN32 )
#if defined( _MSC_VER ) && ( _MSC_VER >= 1400 )
return ( uint64 )__rdtsc();
#else
__asm rdtsc;
__asm ret;
#endif
#elif defined( __i386__ )
uint64 val;
__asm__ __volatile__ ( "rdtsc" : "=A" (val) );
return val;
#elif defined( __x86_64__ )
uint32 lo, hi;
__asm__ __volatile__ ( "rdtsc" : "=a" (lo), "=d" (hi));
return ( ( ( uint64 )hi ) << 32 ) | lo;
#else
#error
#endif
}
// b/w compatibility
#define Sys_FloatTime Plat_FloatTime
@ -901,13 +972,10 @@ struct CPUInformation // Size: Win32=64, Win64=72
CPUInformation(): m_Size(0){}
};
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunknown-pragmas"
#pragma clang diagnostic ignored "-Wreturn-type-c-linkage"
#endif
PLATFORM_INTERFACE const CPUInformation& GetCPUInformation();
// Have to return a pointer, not a reference, because references are not compatible with the
// extern "C" implied by PLATFORM_INTERFACE.
PLATFORM_INTERFACE const CPUInformation* GetCPUInformation();
PLATFORM_INTERFACE void GetCurrentDate( int *pDay, int *pMonth, int *pYear );