mirror of
https://github.com/alliedmodders/hl2sdk.git
synced 2024-12-23 01:59:43 +08:00
Port GetCPUInformation and mathlib from sdk2013
This commit is contained in:
parent
b099570391
commit
0d247b9566
@ -11,6 +11,7 @@ builder.SetBuildFolder('/')
|
||||
|
||||
project = builder.StaticLibraryProject('mathlib')
|
||||
project.sources = [
|
||||
'almostequal.cpp',
|
||||
'anorms.cpp',
|
||||
'bumpvects.cpp',
|
||||
'color_conversion.cpp',
|
||||
@ -25,6 +26,7 @@ project.sources = [
|
||||
'randsse.cpp',
|
||||
'simdvectormatrix.cpp',
|
||||
'sparse_convolution_noise.cpp',
|
||||
'spherical.cpp',
|
||||
'sse.cpp',
|
||||
'sseconst.cpp',
|
||||
'ssenoise.cpp',
|
||||
|
@ -7,6 +7,8 @@
|
||||
#include "mathlib/IceKey.H"
|
||||
#include <cstdint>
|
||||
|
||||
#include "tier0/memdbgon.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable: 4244)
|
||||
#endif
|
||||
|
97
mathlib/almostequal.cpp
Normal file
97
mathlib/almostequal.cpp
Normal file
@ -0,0 +1,97 @@
|
||||
//========= Copyright Valve Corporation, All rights reserved. ============//
|
||||
//
|
||||
// Purpose: Fast ways to compare equality of two floats. Assumes
|
||||
// sizeof(float) == sizeof(int) and we are using IEEE format.
|
||||
//
|
||||
// Source: http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
|
||||
//=====================================================================================//
|
||||
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "mathlib/mathlib.h"
|
||||
|
||||
static inline bool AE_IsInfinite(float a)
|
||||
{
|
||||
const int kInfAsInt = 0x7F800000;
|
||||
|
||||
// An infinity has an exponent of 255 (shift left 23 positions) and
|
||||
// a zero mantissa. There are two infinities - positive and negative.
|
||||
if ((*(int*)&a & 0x7FFFFFFF) == kInfAsInt)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool AE_IsNan(float a)
|
||||
{
|
||||
// a NAN has an exponent of 255 (shifted left 23 positions) and
|
||||
// a non-zero mantissa.
|
||||
int exp = *(int*)&a & 0x7F800000;
|
||||
int mantissa = *(int*)&a & 0x007FFFFF;
|
||||
if (exp == 0x7F800000 && mantissa != 0)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline int AE_Sign(float a)
|
||||
{
|
||||
// The sign bit of a number is the high bit.
|
||||
return (*(int*)&a) & 0x80000000;
|
||||
}
|
||||
|
||||
// This is the 'final' version of the AlmostEqualUlps function.
|
||||
// The optional checks are included for completeness, but in many
|
||||
// cases they are not necessary, or even not desirable.
|
||||
bool AlmostEqual(float a, float b, int maxUlps)
|
||||
{
|
||||
// There are several optional checks that you can do, depending
|
||||
// on what behavior you want from your floating point comparisons.
|
||||
// These checks should not be necessary and they are included
|
||||
// mainly for completeness.
|
||||
|
||||
// If a or b are infinity (positive or negative) then
|
||||
// only return true if they are exactly equal to each other -
|
||||
// that is, if they are both infinities of the same sign.
|
||||
// This check is only needed if you will be generating
|
||||
// infinities and you don't want them 'close' to numbers
|
||||
// near FLT_MAX.
|
||||
if (AE_IsInfinite(a) || AE_IsInfinite(b))
|
||||
return a == b;
|
||||
|
||||
// If a or b are a NAN, return false. NANs are equal to nothing,
|
||||
// not even themselves.
|
||||
// This check is only needed if you will be generating NANs
|
||||
// and you use a maxUlps greater than 4 million or you want to
|
||||
// ensure that a NAN does not equal itself.
|
||||
if (AE_IsNan(a) || AE_IsNan(b))
|
||||
return false;
|
||||
|
||||
// After adjusting floats so their representations are lexicographically
|
||||
// ordered as twos-complement integers a very small positive number
|
||||
// will compare as 'close' to a very small negative number. If this is
|
||||
// not desireable, and if you are on a platform that supports
|
||||
// subnormals (which is the only place the problem can show up) then
|
||||
// you need this check.
|
||||
// The check for a == b is because zero and negative zero have different
|
||||
// signs but are equal to each other.
|
||||
if (AE_Sign(a) != AE_Sign(b))
|
||||
return a == b;
|
||||
|
||||
int aInt = *(int*)&a;
|
||||
// Make aInt lexicographically ordered as a twos-complement int
|
||||
if (aInt < 0)
|
||||
aInt = 0x80000000 - aInt;
|
||||
// Make bInt lexicographically ordered as a twos-complement int
|
||||
int bInt = *(int*)&b;
|
||||
if (bInt < 0)
|
||||
bInt = 0x80000000 - bInt;
|
||||
|
||||
// Now we can compare aInt and bInt to find out how far apart a and b
|
||||
// are.
|
||||
int intDiff = abs(aInt - bInt);
|
||||
if (intDiff <= maxUlps)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -106,27 +106,23 @@ ALIGN128 float power2_n[256] = // 2**(index - 128) / 255
|
||||
// You can use this to double check the exponent table and assert that
|
||||
// the precomputation is correct.
|
||||
#ifdef DBGFLAG_ASSERT
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _WIN32
|
||||
#pragma warning(push)
|
||||
#pragma warning( disable : 4189 ) // disable unused local variable warning
|
||||
#endif
|
||||
#ifdef __GNUC__
|
||||
__attribute__((unused)) static void CheckExponentTable()
|
||||
#else
|
||||
static void CheckExponentTable()
|
||||
#endif
|
||||
{
|
||||
for( int i = 0; i < 256; i++ )
|
||||
{
|
||||
float testAgainst = pow( 2.0f, i - 128 ) / 255.0f;
|
||||
float diff = testAgainst - power2_n[i] ;
|
||||
float relativeDiff = diff / testAgainst;
|
||||
Assert( sizeof(relativeDiff) > 0 && testAgainst == 0 ?
|
||||
power2_n[i] < 1.16E-041 :
|
||||
power2_n[i] == testAgainst );
|
||||
Assert( testAgainst == 0 ?
|
||||
power2_n[i] < 1.16E-041 :
|
||||
power2_n[i] == testAgainst );
|
||||
}
|
||||
}
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _WIN32
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
#endif
|
||||
@ -617,10 +613,10 @@ void VectorToColorRGBExp32( const Vector& vin, ColorRGBExp32 &c )
|
||||
scalar = *reinterpret_cast<float *>(&fbits);
|
||||
}
|
||||
|
||||
// we should never need to clamp:
|
||||
Assert(vin.x * scalar <= 255.0f &&
|
||||
vin.y * scalar <= 255.0f &&
|
||||
vin.z * scalar <= 255.0f);
|
||||
// We can totally wind up above 255 and that's okay--but above 256 would be right out.
|
||||
Assert(vin.x * scalar < 256.0f &&
|
||||
vin.y * scalar < 256.0f &&
|
||||
vin.z * scalar < 256.0f);
|
||||
|
||||
// This awful construction is necessary to prevent VC2005 from using the
|
||||
// fldcw/fnstcw control words around every float-to-unsigned-char operation.
|
||||
|
@ -6,7 +6,7 @@
|
||||
//
|
||||
//=============================================================================//
|
||||
#include <quantize.h>
|
||||
#include <tier0/basetypes.h>
|
||||
#include <minmax.h>
|
||||
|
||||
#define N_EXTRAVALUES 1
|
||||
#define N_DIMENSIONS (3+N_EXTRAVALUES)
|
||||
@ -46,7 +46,7 @@ void ColorQuantize(uint8 const *Image,
|
||||
val1+=PIXEL(x,y,c)*ExtraValueXForms[i*3+c];
|
||||
val1>>=8;
|
||||
NthSample(s,y*Width+x,N_DIMENSIONS)->Value[c]=(uint8)
|
||||
(MIN(255,MAX(0,val1)));
|
||||
(V_min(255,V_max(0,val1)));
|
||||
}
|
||||
}
|
||||
struct QuantizedValue *q=Quantize(s,Width*Height,N_DIMENSIONS,
|
||||
@ -76,7 +76,7 @@ void ColorQuantize(uint8 const *Image,
|
||||
tryc+=Error[x][c][ErrorUse];
|
||||
Error[x][c][ErrorUse]=0;
|
||||
}
|
||||
samp[c]=(uint8) MIN(255,MAX(0,tryc));
|
||||
samp[c]=(uint8) V_min(255,V_max(0,tryc));
|
||||
}
|
||||
struct QuantizedValue *f=FindMatch(samp,3,Weights,q);
|
||||
out_pixels[Width*y+x]=(uint8) (f->value);
|
||||
|
@ -10,7 +10,7 @@
|
||||
|
||||
void LightDesc_t::RecalculateDerivedValues(void)
|
||||
{
|
||||
m_Flags=0;
|
||||
m_Flags = LIGHTTYPE_OPTIMIZATIONFLAGS_DERIVED_VALUES_CALCED;
|
||||
if (m_Attenuation0)
|
||||
m_Flags|=LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION0;
|
||||
if (m_Attenuation1)
|
||||
|
@ -1,4 +1,4 @@
|
||||
//===== Copyright © 1996-2005, Valve Corporation, All rights reserved. ======//
|
||||
//===== Copyright <EFBFBD> 1996-2005, Valve Corporation, All rights reserved. ======//
|
||||
//
|
||||
// Purpose: Math primitives.
|
||||
//
|
||||
@ -17,7 +17,7 @@
|
||||
#include "tier0/vprof.h"
|
||||
//#define _VPROF_MATHLIB
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _WIN32
|
||||
#pragma warning(disable:4244) // "conversion from 'const int' to 'float', possible loss of data"
|
||||
#pragma warning(disable:4730) // "mixing _m64 and floating point expressions may result in incorrect code"
|
||||
#endif
|
||||
@ -25,6 +25,7 @@
|
||||
#include "mathlib/mathlib.h"
|
||||
#include "mathlib/vector.h"
|
||||
#if !defined( _X360 )
|
||||
#include "mathlib/amd3dx.h"
|
||||
#include "sse.h"
|
||||
#endif
|
||||
|
||||
@ -426,6 +427,33 @@ void MatrixSetColumn( const Vector &in, int column, matrix3x4_t& out )
|
||||
out[2][column] = in.z;
|
||||
}
|
||||
|
||||
void MatrixScaleBy ( const float flScale, matrix3x4_t &out )
|
||||
{
|
||||
out[0][0] *= flScale;
|
||||
out[1][0] *= flScale;
|
||||
out[2][0] *= flScale;
|
||||
out[0][1] *= flScale;
|
||||
out[1][1] *= flScale;
|
||||
out[2][1] *= flScale;
|
||||
out[0][2] *= flScale;
|
||||
out[1][2] *= flScale;
|
||||
out[2][2] *= flScale;
|
||||
}
|
||||
|
||||
void MatrixScaleByZero ( matrix3x4_t &out )
|
||||
{
|
||||
out[0][0] = 0.0f;
|
||||
out[1][0] = 0.0f;
|
||||
out[2][0] = 0.0f;
|
||||
out[0][1] = 0.0f;
|
||||
out[1][1] = 0.0f;
|
||||
out[2][1] = 0.0f;
|
||||
out[0][2] = 0.0f;
|
||||
out[1][2] = 0.0f;
|
||||
out[2][2] = 0.0f;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int VectorCompare (const float *v1, const float *v2)
|
||||
{
|
||||
@ -565,53 +593,128 @@ void ConcatRotations (const float in1[3][3], const float in2[3][3], float out[3]
|
||||
in1[2][2] * in2[2][2];
|
||||
}
|
||||
|
||||
void ConcatTransforms_Aligned( const matrix3x4_t &m0, const matrix3x4_t &m1, matrix3x4_t &out )
|
||||
{
|
||||
Assert( (((size_t)&m0) % 16) == 0 );
|
||||
Assert( (((size_t)&m1) % 16) == 0 );
|
||||
Assert( (((size_t)&out) % 16) == 0 );
|
||||
|
||||
fltx4 lastMask = *(fltx4 *)(&g_SIMD_ComponentMask[3]);
|
||||
fltx4 rowA0 = LoadAlignedSIMD( m0.m_flMatVal[0] );
|
||||
fltx4 rowA1 = LoadAlignedSIMD( m0.m_flMatVal[1] );
|
||||
fltx4 rowA2 = LoadAlignedSIMD( m0.m_flMatVal[2] );
|
||||
|
||||
fltx4 rowB0 = LoadAlignedSIMD( m1.m_flMatVal[0] );
|
||||
fltx4 rowB1 = LoadAlignedSIMD( m1.m_flMatVal[1] );
|
||||
fltx4 rowB2 = LoadAlignedSIMD( m1.m_flMatVal[2] );
|
||||
|
||||
// now we have the rows of m0 and the columns of m1
|
||||
// first output row
|
||||
fltx4 A0 = SplatXSIMD(rowA0);
|
||||
fltx4 A1 = SplatYSIMD(rowA0);
|
||||
fltx4 A2 = SplatZSIMD(rowA0);
|
||||
fltx4 mul00 = MulSIMD( A0, rowB0 );
|
||||
fltx4 mul01 = MulSIMD( A1, rowB1 );
|
||||
fltx4 mul02 = MulSIMD( A2, rowB2 );
|
||||
fltx4 out0 = AddSIMD( mul00, AddSIMD(mul01,mul02) );
|
||||
|
||||
// second output row
|
||||
A0 = SplatXSIMD(rowA1);
|
||||
A1 = SplatYSIMD(rowA1);
|
||||
A2 = SplatZSIMD(rowA1);
|
||||
fltx4 mul10 = MulSIMD( A0, rowB0 );
|
||||
fltx4 mul11 = MulSIMD( A1, rowB1 );
|
||||
fltx4 mul12 = MulSIMD( A2, rowB2 );
|
||||
fltx4 out1 = AddSIMD( mul10, AddSIMD(mul11,mul12) );
|
||||
|
||||
// third output row
|
||||
A0 = SplatXSIMD(rowA2);
|
||||
A1 = SplatYSIMD(rowA2);
|
||||
A2 = SplatZSIMD(rowA2);
|
||||
fltx4 mul20 = MulSIMD( A0, rowB0 );
|
||||
fltx4 mul21 = MulSIMD( A1, rowB1 );
|
||||
fltx4 mul22 = MulSIMD( A2, rowB2 );
|
||||
fltx4 out2 = AddSIMD( mul20, AddSIMD(mul21,mul22) );
|
||||
|
||||
// add in translation vector
|
||||
A0 = AndSIMD(rowA0,lastMask);
|
||||
A1 = AndSIMD(rowA1,lastMask);
|
||||
A2 = AndSIMD(rowA2,lastMask);
|
||||
out0 = AddSIMD(out0, A0);
|
||||
out1 = AddSIMD(out1, A1);
|
||||
out2 = AddSIMD(out2, A2);
|
||||
|
||||
StoreAlignedSIMD( out.m_flMatVal[0], out0 );
|
||||
StoreAlignedSIMD( out.m_flMatVal[1], out1 );
|
||||
StoreAlignedSIMD( out.m_flMatVal[2], out2 );
|
||||
}
|
||||
|
||||
/*
|
||||
================
|
||||
R_ConcatTransforms
|
||||
================
|
||||
*/
|
||||
|
||||
void ConcatTransforms (const matrix3x4_t& in1, const matrix3x4_t& in2, matrix3x4_t& out)
|
||||
{
|
||||
Assert( s_bMathlibInitialized );
|
||||
if ( &in1 == &out )
|
||||
#if 0
|
||||
// test for ones that'll be 2x faster
|
||||
if ( (((size_t)&in1) % 16) == 0 && (((size_t)&in2) % 16) == 0 && (((size_t)&out) % 16) == 0 )
|
||||
{
|
||||
matrix3x4_t in1b;
|
||||
MatrixCopy( in1, in1b );
|
||||
ConcatTransforms( in1b, in2, out );
|
||||
ConcatTransforms_Aligned( in1, in2, out );
|
||||
return;
|
||||
}
|
||||
if ( &in2 == &out )
|
||||
{
|
||||
matrix3x4_t in2b;
|
||||
MatrixCopy( in2, in2b );
|
||||
ConcatTransforms( in1, in2b, out );
|
||||
return;
|
||||
}
|
||||
out[0][0] = in1[0][0] * in2[0][0] + in1[0][1] * in2[1][0] +
|
||||
in1[0][2] * in2[2][0];
|
||||
out[0][1] = in1[0][0] * in2[0][1] + in1[0][1] * in2[1][1] +
|
||||
in1[0][2] * in2[2][1];
|
||||
out[0][2] = in1[0][0] * in2[0][2] + in1[0][1] * in2[1][2] +
|
||||
in1[0][2] * in2[2][2];
|
||||
out[0][3] = in1[0][0] * in2[0][3] + in1[0][1] * in2[1][3] +
|
||||
in1[0][2] * in2[2][3] + in1[0][3];
|
||||
out[1][0] = in1[1][0] * in2[0][0] + in1[1][1] * in2[1][0] +
|
||||
in1[1][2] * in2[2][0];
|
||||
out[1][1] = in1[1][0] * in2[0][1] + in1[1][1] * in2[1][1] +
|
||||
in1[1][2] * in2[2][1];
|
||||
out[1][2] = in1[1][0] * in2[0][2] + in1[1][1] * in2[1][2] +
|
||||
in1[1][2] * in2[2][2];
|
||||
out[1][3] = in1[1][0] * in2[0][3] + in1[1][1] * in2[1][3] +
|
||||
in1[1][2] * in2[2][3] + in1[1][3];
|
||||
out[2][0] = in1[2][0] * in2[0][0] + in1[2][1] * in2[1][0] +
|
||||
in1[2][2] * in2[2][0];
|
||||
out[2][1] = in1[2][0] * in2[0][1] + in1[2][1] * in2[1][1] +
|
||||
in1[2][2] * in2[2][1];
|
||||
out[2][2] = in1[2][0] * in2[0][2] + in1[2][1] * in2[1][2] +
|
||||
in1[2][2] * in2[2][2];
|
||||
out[2][3] = in1[2][0] * in2[0][3] + in1[2][1] * in2[1][3] +
|
||||
in1[2][2] * in2[2][3] + in1[2][3];
|
||||
#endif
|
||||
|
||||
fltx4 lastMask = *(fltx4 *)(&g_SIMD_ComponentMask[3]);
|
||||
fltx4 rowA0 = LoadUnalignedSIMD( in1.m_flMatVal[0] );
|
||||
fltx4 rowA1 = LoadUnalignedSIMD( in1.m_flMatVal[1] );
|
||||
fltx4 rowA2 = LoadUnalignedSIMD( in1.m_flMatVal[2] );
|
||||
|
||||
fltx4 rowB0 = LoadUnalignedSIMD( in2.m_flMatVal[0] );
|
||||
fltx4 rowB1 = LoadUnalignedSIMD( in2.m_flMatVal[1] );
|
||||
fltx4 rowB2 = LoadUnalignedSIMD( in2.m_flMatVal[2] );
|
||||
|
||||
// now we have the rows of m0 and the columns of m1
|
||||
// first output row
|
||||
fltx4 A0 = SplatXSIMD(rowA0);
|
||||
fltx4 A1 = SplatYSIMD(rowA0);
|
||||
fltx4 A2 = SplatZSIMD(rowA0);
|
||||
fltx4 mul00 = MulSIMD( A0, rowB0 );
|
||||
fltx4 mul01 = MulSIMD( A1, rowB1 );
|
||||
fltx4 mul02 = MulSIMD( A2, rowB2 );
|
||||
fltx4 out0 = AddSIMD( mul00, AddSIMD(mul01,mul02) );
|
||||
|
||||
// second output row
|
||||
A0 = SplatXSIMD(rowA1);
|
||||
A1 = SplatYSIMD(rowA1);
|
||||
A2 = SplatZSIMD(rowA1);
|
||||
fltx4 mul10 = MulSIMD( A0, rowB0 );
|
||||
fltx4 mul11 = MulSIMD( A1, rowB1 );
|
||||
fltx4 mul12 = MulSIMD( A2, rowB2 );
|
||||
fltx4 out1 = AddSIMD( mul10, AddSIMD(mul11,mul12) );
|
||||
|
||||
// third output row
|
||||
A0 = SplatXSIMD(rowA2);
|
||||
A1 = SplatYSIMD(rowA2);
|
||||
A2 = SplatZSIMD(rowA2);
|
||||
fltx4 mul20 = MulSIMD( A0, rowB0 );
|
||||
fltx4 mul21 = MulSIMD( A1, rowB1 );
|
||||
fltx4 mul22 = MulSIMD( A2, rowB2 );
|
||||
fltx4 out2 = AddSIMD( mul20, AddSIMD(mul21,mul22) );
|
||||
|
||||
// add in translation vector
|
||||
A0 = AndSIMD(rowA0,lastMask);
|
||||
A1 = AndSIMD(rowA1,lastMask);
|
||||
A2 = AndSIMD(rowA2,lastMask);
|
||||
out0 = AddSIMD(out0, A0);
|
||||
out1 = AddSIMD(out1, A1);
|
||||
out2 = AddSIMD(out2, A2);
|
||||
|
||||
// write to output
|
||||
StoreUnalignedSIMD( out.m_flMatVal[0], out0 );
|
||||
StoreUnalignedSIMD( out.m_flMatVal[1], out1 );
|
||||
StoreUnalignedSIMD( out.m_flMatVal[2], out2 );
|
||||
}
|
||||
|
||||
|
||||
@ -1358,7 +1461,9 @@ float Bias( float x, float biasAmt )
|
||||
{
|
||||
lastExponent = log( biasAmt ) * -1.4427f; // (-1.4427 = 1 / log(0.5))
|
||||
}
|
||||
return pow( x, lastExponent );
|
||||
float fRet = pow( x, lastExponent );
|
||||
Assert ( !IS_NAN( fRet ) );
|
||||
return fRet;
|
||||
}
|
||||
|
||||
|
||||
@ -1374,7 +1479,9 @@ float Gain( float x, float biasAmt )
|
||||
|
||||
float SmoothCurve( float x )
|
||||
{
|
||||
return (1 - cos( x * M_PI )) * 0.5f;
|
||||
// Actual smooth curve. Visualization:
|
||||
// http://www.wolframalpha.com/input/?i=plot%5B+0.5+*+%281+-+cos%5B2+*+pi+*+x%5D%29+for+x+%3D+%280%2C+1%29+%5D
|
||||
return 0.5f * (1 - cos( 2.0f * M_PI * x ) );
|
||||
}
|
||||
|
||||
|
||||
@ -1566,7 +1673,9 @@ float QuaternionAngleDiff( const Quaternion &p, const Quaternion &q )
|
||||
QuaternionConjugate( q, qInv );
|
||||
QuaternionMult( p, qInv, diff );
|
||||
|
||||
float sinang = sqrt( diff.x * diff.x + diff.y * diff.y + diff.z * diff.z );
|
||||
// Note if the quaternion is slightly non-normalized the square root below may be more than 1,
|
||||
// the value is clamped to one otherwise it may result in asin() returning an undefined result.
|
||||
float sinang = MIN( 1.0f, sqrt( diff.x * diff.x + diff.y * diff.y + diff.z * diff.z ) );
|
||||
float angle = RAD2DEG( 2 * asin( sinang ) );
|
||||
return angle;
|
||||
#else
|
||||
@ -1666,7 +1775,7 @@ void QuaternionScale( const Quaternion &p, float t, Quaternion &q )
|
||||
// FIXME: nick, this isn't overly sensitive to accuracy, and it may be faster to
|
||||
// use the cos part (w) of the quaternion (sin(omega)*N,cos(omega)) to figure the new scale.
|
||||
float sinom = sqrt( DotProduct( &p.x, &p.x ) );
|
||||
sinom = MIN( sinom, 1.f );
|
||||
sinom = V_min( sinom, 1.f );
|
||||
|
||||
float sinsom = sin( asin( sinom ) * t );
|
||||
|
||||
@ -1751,7 +1860,13 @@ void QuaternionMult( const Quaternion &p, const Quaternion &q, Quaternion &qt )
|
||||
|
||||
void QuaternionMatrix( const Quaternion &q, const Vector &pos, matrix3x4_t& matrix )
|
||||
{
|
||||
Assert( pos.IsValid() );
|
||||
#ifdef DBGFLAG_ASSERT
|
||||
static bool s_bHushAsserts = !!CommandLine()->FindParm("-hushasserts");
|
||||
if (!s_bHushAsserts)
|
||||
{
|
||||
Assert( pos.IsValid() );
|
||||
}
|
||||
#endif
|
||||
|
||||
QuaternionMatrix( q, matrix );
|
||||
|
||||
@ -1763,7 +1878,13 @@ void QuaternionMatrix( const Quaternion &q, const Vector &pos, matrix3x4_t& matr
|
||||
void QuaternionMatrix( const Quaternion &q, matrix3x4_t& matrix )
|
||||
{
|
||||
Assert( s_bMathlibInitialized );
|
||||
Assert( q.IsValid() );
|
||||
#ifdef DBGFLAG_ASSERT
|
||||
static bool s_bHushAsserts = !!CommandLine()->FindParm("-hushasserts");
|
||||
if ( !s_bHushAsserts )
|
||||
{
|
||||
Assert( q.IsValid() );
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef _VPROF_MATHLIB
|
||||
VPROF_BUDGET( "QuaternionMatrix", "Mathlib" );
|
||||
@ -3211,7 +3332,7 @@ void MathLib_Init( float gamma, float texGamma, float brightness, int overbright
|
||||
|
||||
#if !defined( _X360 )
|
||||
// Grab the processor information:
|
||||
const CPUInformation& pi = GetCPUInformation();
|
||||
const CPUInformation& pi = *GetCPUInformation();
|
||||
|
||||
// Select the default generic routines.
|
||||
pfSqrt = _sqrtf;
|
||||
@ -3240,6 +3361,8 @@ void MathLib_Init( float gamma, float texGamma, float brightness, int overbright
|
||||
{
|
||||
s_bSSEEnabled = true;
|
||||
|
||||
#ifndef PLATFORM_WINDOWS_PC64
|
||||
// These are not yet available.
|
||||
// Select the SSE specific routines if available
|
||||
pfVectorNormalize = _VectorNormalize;
|
||||
pfVectorNormalizeFast = _SSE_VectorNormalizeFast;
|
||||
@ -3247,7 +3370,8 @@ void MathLib_Init( float gamma, float texGamma, float brightness, int overbright
|
||||
pfSqrt = _SSE_Sqrt;
|
||||
pfRSqrt = _SSE_RSqrtAccurate;
|
||||
pfRSqrtFast = _SSE_RSqrtFast;
|
||||
#ifdef _WIN32
|
||||
#endif
|
||||
#ifdef PLATFORM_WINDOWS_PC32
|
||||
pfFastSinCos = _SSE_SinCos;
|
||||
pfFastCos = _SSE_cos;
|
||||
#endif
|
||||
@ -3260,7 +3384,7 @@ void MathLib_Init( float gamma, float texGamma, float brightness, int overbright
|
||||
if ( bAllowSSE2 && pi.m_bSSE2 )
|
||||
{
|
||||
s_bSSE2Enabled = true;
|
||||
#ifdef _WIN32
|
||||
#ifdef PLATFORM_WINDOWS_PC32
|
||||
pfFastSinCos = _SSE2_SinCos;
|
||||
pfFastCos = _SSE2_cos;
|
||||
#endif
|
||||
@ -3269,7 +3393,7 @@ void MathLib_Init( float gamma, float texGamma, float brightness, int overbright
|
||||
{
|
||||
s_bSSE2Enabled = false;
|
||||
}
|
||||
#endif
|
||||
#endif // !_X360
|
||||
|
||||
s_bMathlibInitialized = true;
|
||||
|
||||
@ -3920,10 +4044,10 @@ void CalcTriangleTangentSpace( const Vector &p0, const Vector &p1, const Vector
|
||||
//-----------------------------------------------------------------------------
|
||||
void RGBtoHSV( const Vector &rgb, Vector &hsv )
|
||||
{
|
||||
float flMax = MAX( rgb.x, rgb.y );
|
||||
flMax = MAX( flMax, rgb.z );
|
||||
float flMin = MIN( rgb.x, rgb.y );
|
||||
flMin = MIN( flMin, rgb.z );
|
||||
float flMax = V_max( rgb.x, rgb.y );
|
||||
flMax = V_max( flMax, rgb.z );
|
||||
float flMin = V_min( rgb.x, rgb.y );
|
||||
flMin = V_min( flMin, rgb.z );
|
||||
|
||||
// hsv.z is the value
|
||||
hsv.z = flMax;
|
||||
@ -4070,3 +4194,44 @@ void GetInterpolationData( float const *pKnotPositions,
|
||||
*pInterpolationValue = FLerp( 0, 1, 0, flSizeOfGap, flOffsetFromStartOfGap );
|
||||
return;
|
||||
}
|
||||
|
||||
float RandomVectorInUnitSphere( Vector *pVector )
|
||||
{
|
||||
// Guarantee uniform random distribution within a sphere
|
||||
// Graphics gems III contains this algorithm ("Nonuniform random point sets via warping")
|
||||
float u = ((float)rand() / VALVE_RAND_MAX);
|
||||
float v = ((float)rand() / VALVE_RAND_MAX);
|
||||
float w = ((float)rand() / VALVE_RAND_MAX);
|
||||
|
||||
float flPhi = acos( 1 - 2 * u );
|
||||
float flTheta = 2 * M_PI * v;
|
||||
float flRadius = powf( w, 1.0f / 3.0f );
|
||||
|
||||
float flSinPhi, flCosPhi;
|
||||
float flSinTheta, flCosTheta;
|
||||
SinCos( flPhi, &flSinPhi, &flCosPhi );
|
||||
SinCos( flTheta, &flSinTheta, &flCosTheta );
|
||||
|
||||
pVector->x = flRadius * flSinPhi * flCosTheta;
|
||||
pVector->y = flRadius * flSinPhi * flSinTheta;
|
||||
pVector->z = flRadius * flCosPhi;
|
||||
return flRadius;
|
||||
}
|
||||
|
||||
float RandomVectorInUnitCircle( Vector2D *pVector )
|
||||
{
|
||||
// Guarantee uniform random distribution within a sphere
|
||||
// Graphics gems III contains this algorithm ("Nonuniform random point sets via warping")
|
||||
float u = ((float)rand() / VALVE_RAND_MAX);
|
||||
float v = ((float)rand() / VALVE_RAND_MAX);
|
||||
|
||||
float flTheta = 2 * M_PI * v;
|
||||
float flRadius = powf( u, 1.0f / 2.0f );
|
||||
|
||||
float flSinTheta, flCosTheta;
|
||||
SinCos( flTheta, &flSinTheta, &flCosTheta );
|
||||
|
||||
pVector->x = flRadius * flCosTheta;
|
||||
pVector->y = flRadius * flSinTheta;
|
||||
return flRadius;
|
||||
}
|
||||
|
@ -34,7 +34,6 @@ CPolyhedron *ConvertLinkedGeometryToPolyhedron( GeneratePolyhedronFromPlanes_Uno
|
||||
//#define DEBUG_DUMP_POLYHEDRONS_TO_NUMBERED_GLVIEWS //dumps successfully generated polyhedrons
|
||||
|
||||
#ifdef _DEBUG
|
||||
#include "filesystem.h"
|
||||
void DumpPolyhedronToGLView( const CPolyhedron *pPolyhedron, const char *pFilename, const VMatrix *pTransform );
|
||||
void DumpPlaneToGlView( const float *pPlane, float fGrayScale, const char *pszFileName, const VMatrix *pTransform );
|
||||
void DumpLineToGLView( const Vector &vPoint1, const Vector &vColor1, const Vector &vPoint2, const Vector &vColor2, float fThickness, FILE *pFile );
|
||||
@ -103,19 +102,19 @@ CPolyhedron_AllocByNew *CPolyhedron_AllocByNew::Allocate( unsigned short iVertic
|
||||
class CPolyhedron_TempMemory : public CPolyhedron
|
||||
{
|
||||
public:
|
||||
#ifdef _DEBUG
|
||||
#ifdef DBGFLAG_ASSERT
|
||||
int iReferenceCount;
|
||||
#endif
|
||||
|
||||
virtual void Release( void )
|
||||
{
|
||||
#ifdef _DEBUG
|
||||
#ifdef DBGFLAG_ASSERT
|
||||
--iReferenceCount;
|
||||
#endif
|
||||
}
|
||||
|
||||
CPolyhedron_TempMemory( void )
|
||||
#ifdef _DEBUG
|
||||
#ifdef DBGFLAG_ASSERT
|
||||
: iReferenceCount( 0 )
|
||||
#endif
|
||||
{ };
|
||||
@ -128,7 +127,7 @@ static CPolyhedron_TempMemory s_TempMemoryPolyhedron;
|
||||
CPolyhedron *GetTempPolyhedron( unsigned short iVertices, unsigned short iLines, unsigned short iIndices, unsigned short iPolygons ) //grab the temporary polyhedron. Avoids new/delete for quick work. Can only be in use by one chunk of code at a time
|
||||
{
|
||||
AssertMsg( s_TempMemoryPolyhedron.iReferenceCount == 0, "Temporary polyhedron memory being rewritten before released" );
|
||||
#ifdef _DEBUG
|
||||
#ifdef DBGFLAG_ASSERT
|
||||
++s_TempMemoryPolyhedron.iReferenceCount;
|
||||
#endif
|
||||
s_TempMemoryPolyhedron_Buffer.SetCount( (sizeof( Vector ) * iVertices) +
|
||||
@ -857,8 +856,8 @@ const char * DumpPolyhedronCutHistory( const CUtlVector<CPolyhedron *> &DumpedHi
|
||||
|
||||
#else
|
||||
|
||||
#define AssertMsg_DumpPolyhedron(condition, message)
|
||||
#define Assert_DumpPolyhedron(condition)
|
||||
#define AssertMsg_DumpPolyhedron(condition, message) NULL;
|
||||
#define Assert_DumpPolyhedron(condition) NULL;
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -6,6 +6,10 @@
|
||||
|
||||
#include "mathlib/ssemath.h"
|
||||
|
||||
// NOTE: This has to be the last file included!
|
||||
#include "tier0/memdbgon.h"
|
||||
|
||||
|
||||
fltx4 Pow_FixedPoint_Exponent_SIMD( const fltx4 & x, int exponent)
|
||||
{
|
||||
fltx4 rslt=Four_Ones; // x^0=1.0
|
||||
@ -32,8 +36,61 @@ fltx4 Pow_FixedPoint_Exponent_SIMD( const fltx4 & x, int exponent)
|
||||
break;
|
||||
}
|
||||
if (exponent<0)
|
||||
return ReciprocalEstSIMD(rslt); // pow(x,-b)=1/pow(x,b)
|
||||
return ReciprocalEstSaturateSIMD(rslt); // pow(x,-b)=1/pow(x,b)
|
||||
else
|
||||
return rslt;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* (c) Ian Stephenson
|
||||
*
|
||||
* ian@dctsystems.co.uk
|
||||
*
|
||||
* Fast pow() reference implementation
|
||||
*/
|
||||
|
||||
|
||||
static float shift23=(1<<23);
|
||||
static float OOshift23=1.0/(1<<23);
|
||||
|
||||
float FastLog2(float i)
|
||||
{
|
||||
float LogBodge=0.346607f;
|
||||
float x;
|
||||
float y;
|
||||
x=*(int *)&i;
|
||||
x*= OOshift23; //1/pow(2,23);
|
||||
x=x-127;
|
||||
|
||||
y=x-floorf(x);
|
||||
y=(y-y*y)*LogBodge;
|
||||
return x+y;
|
||||
}
|
||||
float FastPow2(float i)
|
||||
{
|
||||
float PowBodge=0.33971f;
|
||||
float x;
|
||||
float y=i-floorf(i);
|
||||
y=(y-y*y)*PowBodge;
|
||||
|
||||
x=i+127-y;
|
||||
x*= shift23; //pow(2,23);
|
||||
*(int*)&x=(int)x;
|
||||
return x;
|
||||
}
|
||||
float FastPow(float a, float b)
|
||||
{
|
||||
if (a <= OOshift23)
|
||||
{
|
||||
return 0.0f;
|
||||
}
|
||||
return FastPow2(b*FastLog2(a));
|
||||
}
|
||||
float FastPow10( float i )
|
||||
{
|
||||
return FastPow2( i * 3.321928f );
|
||||
}
|
||||
|
||||
|
@ -18,11 +18,10 @@
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <minmax.h>
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include <tier0/basetypes.h>
|
||||
|
||||
static int current_ndims;
|
||||
static struct QuantizedValue *current_root;
|
||||
static int current_ssize;
|
||||
@ -412,8 +411,8 @@ static void Label(struct QuantizedValue *q, int updatecolor)
|
||||
else
|
||||
for(int i=0;i<current_ndims;i++)
|
||||
{
|
||||
q->Mins[i]=MIN(q->Children[0]->Mins[i],q->Children[1]->Mins[i]);
|
||||
q->Maxs[i]=MAX(q->Children[0]->Maxs[i],q->Children[1]->Maxs[i]);
|
||||
q->Mins[i]=V_min(q->Children[0]->Mins[i],q->Children[1]->Mins[i]);
|
||||
q->Maxs[i]=V_max(q->Children[0]->Maxs[i],q->Children[1]->Maxs[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -48,7 +48,7 @@ void CSIMDVectorMatrix::CreateFromRGBA_FloatImageData(int srcwidth, int srcheigh
|
||||
{
|
||||
for(int cp=0;cp<4; cp++)
|
||||
{
|
||||
int real_cp=MIN( cp, ntrailing_pixels_per_source_line-1 );
|
||||
int real_cp=V_min( cp, ntrailing_pixels_per_source_line-1 );
|
||||
data_out[4*c+cp]= data_in[c+4*real_cp];
|
||||
}
|
||||
}
|
||||
|
124
mathlib/spherical.cpp
Normal file
124
mathlib/spherical.cpp
Normal file
@ -0,0 +1,124 @@
|
||||
//========= Copyright Valve Corporation, All rights reserved. ============//
|
||||
//
|
||||
// Purpose: spherical math routines
|
||||
//
|
||||
//=====================================================================================//
|
||||
|
||||
#include <math.h>
|
||||
#include <float.h> // Needed for FLT_EPSILON
|
||||
#include "basetypes.h"
|
||||
#include <memory.h>
|
||||
#include "tier0/dbg.h"
|
||||
#include "mathlib/mathlib.h"
|
||||
#include "mathlib/vector.h"
|
||||
#include "mathlib/spherical_geometry.h"
|
||||
|
||||
// memdbgon must be the last include file in a .cpp file!!!
|
||||
#include "tier0/memdbgon.h"
|
||||
|
||||
float s_flFactorials[]={
|
||||
1.,
|
||||
1.,
|
||||
2.,
|
||||
6.,
|
||||
24.,
|
||||
120.,
|
||||
720.,
|
||||
5040.,
|
||||
40320.,
|
||||
362880.,
|
||||
3628800.,
|
||||
39916800.,
|
||||
479001600.,
|
||||
6227020800.,
|
||||
87178291200.,
|
||||
1307674368000.,
|
||||
20922789888000.,
|
||||
355687428096000.,
|
||||
6402373705728000.,
|
||||
121645100408832000.,
|
||||
2432902008176640000.,
|
||||
51090942171709440000.,
|
||||
1124000727777607680000.,
|
||||
25852016738884976640000.,
|
||||
620448401733239439360000.,
|
||||
15511210043330985984000000.,
|
||||
403291461126605635584000000.,
|
||||
10888869450418352160768000000.,
|
||||
304888344611713860501504000000.,
|
||||
8841761993739701954543616000000.,
|
||||
265252859812191058636308480000000.,
|
||||
8222838654177922817725562880000000.,
|
||||
263130836933693530167218012160000000.,
|
||||
8683317618811886495518194401280000000.
|
||||
};
|
||||
|
||||
float AssociatedLegendrePolynomial( int nL, int nM, float flX )
|
||||
{
|
||||
// evaluate associated legendre polynomial at flX, using recurrence relation
|
||||
float flPmm = 1.;
|
||||
if ( nM > 0 )
|
||||
{
|
||||
float flSomX2 = sqrt( ( 1 - flX ) * ( 1 + flX ) );
|
||||
float flFact = 1.;
|
||||
for( int i = 0 ; i < nM; i++ )
|
||||
{
|
||||
flPmm *= -flFact * flSomX2;
|
||||
flFact += 2.0;
|
||||
}
|
||||
}
|
||||
if ( nL == nM )
|
||||
return flPmm;
|
||||
float flPmmp1 = flX * ( 2.0 * nM + 1.0 ) * flPmm;
|
||||
if ( nL == nM + 1 )
|
||||
return flPmmp1;
|
||||
float flPll = 0.;
|
||||
for( int nLL = nM + 2 ; nLL <= nL; nLL++ )
|
||||
{
|
||||
flPll = ( ( 2.0 * nLL - 1.0 ) * flX * flPmmp1 - ( nLL + nM - 1.0 ) * flPmm ) * ( 1.0 / ( nLL - nM ) );
|
||||
flPmm = flPmmp1;
|
||||
flPmmp1 = flPll;
|
||||
}
|
||||
return flPll;
|
||||
}
|
||||
|
||||
static float SHNormalizationFactor( int nL, int nM )
|
||||
{
|
||||
double flTemp = ( ( 2. * nL + 1.0 ) * s_flFactorials[ nL - nM ] )/ ( 4. * M_PI * s_flFactorials[ nL + nM ] );
|
||||
return sqrt( flTemp );
|
||||
}
|
||||
|
||||
#define SQRT_2 1.414213562373095
|
||||
|
||||
FORCEINLINE float SphericalHarmonic( int nL, int nM, float flTheta, float flPhi, float flCosTheta )
|
||||
{
|
||||
if ( nM == 0 )
|
||||
return SHNormalizationFactor( nL, 0 ) * AssociatedLegendrePolynomial( nL, nM, flCosTheta );
|
||||
|
||||
if ( nM > 0 )
|
||||
return SQRT_2 * SHNormalizationFactor( nL, nM ) * cos ( nM * flPhi ) *
|
||||
AssociatedLegendrePolynomial( nL, nM, flCosTheta );
|
||||
|
||||
return
|
||||
SQRT_2 * SHNormalizationFactor( nL, -nM ) * sin( -nM * flPhi ) * AssociatedLegendrePolynomial( nL, -nM, flCosTheta );
|
||||
|
||||
}
|
||||
|
||||
float SphericalHarmonic( int nL, int nM, float flTheta, float flPhi )
|
||||
{
|
||||
return SphericalHarmonic( nL, nM, flTheta, flPhi, cos( flTheta ) );
|
||||
}
|
||||
|
||||
float SphericalHarmonic( int nL, int nM, Vector const &vecDirection )
|
||||
{
|
||||
Assert( fabs( VectorLength( vecDirection ) - 1.0 ) < 0.0001 );
|
||||
float flPhi = acos( vecDirection.z );
|
||||
float flTheta = 0;
|
||||
float S = Square( vecDirection.x ) + Square( vecDirection.y );
|
||||
if ( S > 0 )
|
||||
{
|
||||
flTheta = atan2( vecDirection.y, vecDirection.x );
|
||||
}
|
||||
return SphericalHarmonic( nL, nM, flTheta, flPhi, cos( flTheta ) );
|
||||
}
|
||||
|
458
mathlib/sse.cpp
458
mathlib/sse.cpp
@ -1,4 +1,4 @@
|
||||
//========= Copyright © 1996-2005, Valve Corporation, All rights reserved. ============//
|
||||
//========= Copyright <EFBFBD> 1996-2005, Valve Corporation, All rights reserved. ============//
|
||||
//
|
||||
// Purpose: SSE Math primitives.
|
||||
//
|
||||
@ -16,7 +16,10 @@
|
||||
// memdbgon must be the last include file in a .cpp file!!!
|
||||
#include "tier0/memdbgon.h"
|
||||
|
||||
#if defined ( _WIN32 ) && !defined ( _WIN64 )
|
||||
#ifndef COMPILER_MSVC64
|
||||
// Implement for 64-bit Windows if needed.
|
||||
|
||||
#ifdef _WIN32
|
||||
static const uint32 _sincos_masks[] = { (uint32)0x0, (uint32)~0x0 };
|
||||
static const uint32 _sincos_inv_masks[] = { (uint32)~0x0, (uint32)0x0 };
|
||||
#endif
|
||||
@ -37,21 +40,21 @@ static const uint32 _sincos_inv_masks[] = { (uint32)~0x0, (uint32)0x0 };
|
||||
|
||||
#define _PS_CONST(Name, Val) \
|
||||
static const __declspec(align(16)) float _ps_##Name[4] = { Val, Val, Val, Val }
|
||||
#elif defined _LINUX || defined __APPLE__
|
||||
#elif POSIX
|
||||
#define _PS_EXTERN_CONST(Name, Val) \
|
||||
const __attribute__((aligned(16))) float _ps_##Name[4] = { Val, Val, Val, Val }
|
||||
const float _ps_##Name[4] __attribute__((aligned(16))) = { Val, Val, Val, Val }
|
||||
|
||||
#define _PS_EXTERN_CONST_TYPE(Name, Type, Val) \
|
||||
const __attribute__((aligned(16))) Type _ps_##Name[4] = { Val, Val, Val, Val }; \
|
||||
const Type _ps_##Name[4] __attribute__((aligned(16))) = { Val, Val, Val, Val }; \
|
||||
|
||||
#define _EPI32_CONST(Name, Val) \
|
||||
static const __attribute__((aligned(16))) int32 _epi32_##Name[4] = { Val, Val, Val, Val }
|
||||
static const int32 _epi32_##Name[4] __attribute__((aligned(16))) = { Val, Val, Val, Val }
|
||||
|
||||
#define _PS_CONST(Name, Val) \
|
||||
static const __attribute__((aligned(16))) float _ps_##Name[4] = { Val, Val, Val, Val }
|
||||
static const float _ps_##Name[4] __attribute__((aligned(16))) = { Val, Val, Val, Val }
|
||||
#endif
|
||||
|
||||
#if defined ( _WIN32 ) && !defined ( _WIN64 )
|
||||
#ifdef _WIN32
|
||||
_PS_EXTERN_CONST(am_0, 0.0f);
|
||||
_PS_EXTERN_CONST(am_1, 1.0f);
|
||||
_PS_EXTERN_CONST(am_m1, -1.0f);
|
||||
@ -62,8 +65,8 @@ _PS_EXTERN_CONST(am_pi_o_2, (float)(M_PI / 2.0));
|
||||
_PS_EXTERN_CONST(am_2_o_pi, (float)(2.0 / M_PI));
|
||||
_PS_EXTERN_CONST(am_pi_o_4, (float)(M_PI / 4.0));
|
||||
_PS_EXTERN_CONST(am_4_o_pi, (float)(4.0 / M_PI));
|
||||
_PS_EXTERN_CONST_TYPE(am_sign_mask, int32, (int32)0x80000000);
|
||||
_PS_EXTERN_CONST_TYPE(am_inv_sign_mask, int32, ~0x80000000);
|
||||
_PS_EXTERN_CONST_TYPE(am_sign_mask, int32, static_cast<int32>(0x80000000));
|
||||
_PS_EXTERN_CONST_TYPE(am_inv_sign_mask, int32, static_cast<int32>(~0x80000000));
|
||||
_PS_EXTERN_CONST_TYPE(am_min_norm_pos,int32, 0x00800000);
|
||||
_PS_EXTERN_CONST_TYPE(am_mant_mask, int32, 0x7f800000);
|
||||
_PS_EXTERN_CONST_TYPE(am_inv_mant_mask, int32, ~0x7f800000);
|
||||
@ -86,9 +89,6 @@ void __cdecl _SSE_VectorMA( const float *start, float scale, const float *direc
|
||||
//-----------------------------------------------------------------------------
|
||||
float _SSE_Sqrt(float x)
|
||||
{
|
||||
#if defined( _WIN64 )
|
||||
return std::sqrt(x);
|
||||
#else
|
||||
Assert( s_bMathlibInitialized );
|
||||
float root = 0.f;
|
||||
#ifdef _WIN32
|
||||
@ -97,17 +97,10 @@ float _SSE_Sqrt(float x)
|
||||
sqrtss xmm0, x
|
||||
movss root, xmm0
|
||||
}
|
||||
#elif defined _LINUX || defined __APPLE__
|
||||
__asm__ __volatile__(
|
||||
"movss %1,%%xmm2\n"
|
||||
"sqrtss %%xmm2,%%xmm1\n"
|
||||
"movss %%xmm1,%0"
|
||||
: "=m" (root)
|
||||
: "m" (x)
|
||||
);
|
||||
#elif POSIX
|
||||
_mm_store_ss( &root, _mm_sqrt_ss( _mm_load_ss( &x ) ) );
|
||||
#endif
|
||||
return root;
|
||||
#endif // _WIN64
|
||||
}
|
||||
|
||||
// Single iteration NewtonRaphson reciprocal square root:
|
||||
@ -128,17 +121,21 @@ float _SSE_RSqrtAccurate(float x)
|
||||
return (0.5f * rroot) * (3.f - (x * rroot) * rroot);
|
||||
}
|
||||
#else
|
||||
|
||||
#ifdef POSIX
|
||||
const __m128 f3 = _mm_set_ss(3.0f); // 3 as SSE value
|
||||
const __m128 f05 = _mm_set_ss(0.5f); // 0.5 as SSE value
|
||||
#endif
|
||||
|
||||
// Intel / Kipps SSE RSqrt. Significantly faster than above.
|
||||
float _SSE_RSqrtAccurate(float a)
|
||||
{
|
||||
#if defined( _WIN64 )
|
||||
return std::sqrt(a);
|
||||
#else
|
||||
|
||||
#ifdef _WIN32
|
||||
float x;
|
||||
float half = 0.5f;
|
||||
float three = 3.f;
|
||||
|
||||
#ifdef _WIN32
|
||||
__asm
|
||||
{
|
||||
movss xmm3, a;
|
||||
@ -154,27 +151,25 @@ float _SSE_RSqrtAccurate(float a)
|
||||
|
||||
movss x, xmm1;
|
||||
}
|
||||
#elif defined _LINUX || defined __APPLE__
|
||||
__asm__ __volatile__(
|
||||
"movss %1, %%xmm3 \n\t"
|
||||
"movss %2, %%xmm1 \n\t"
|
||||
"movss %3, %%xmm2 \n\t"
|
||||
"rsqrtss %%xmm3, %%xmm0 \n\t"
|
||||
"mulss %%xmm0, %%xmm3 \n\t"
|
||||
"mulss %%xmm0, %%xmm1 \n\t"
|
||||
"mulss %%xmm0, %%xmm3 \n\t"
|
||||
"subss %%xmm3, %%xmm2 \n\t"
|
||||
"mulss %%xmm2, %%xmm1 \n\t"
|
||||
"movss %%xmm1, %0 \n\t"
|
||||
: "=m" (x)
|
||||
: "m" (a), "m" (half), "m" (three)
|
||||
);
|
||||
|
||||
return x;
|
||||
#elif POSIX
|
||||
__m128 xx = _mm_load_ss( &a );
|
||||
__m128 xr = _mm_rsqrt_ss( xx );
|
||||
__m128 xt;
|
||||
|
||||
xt = _mm_mul_ss( xr, xr );
|
||||
xt = _mm_mul_ss( xt, xx );
|
||||
xt = _mm_sub_ss( f3, xt );
|
||||
xt = _mm_mul_ss( xt, f05 );
|
||||
xr = _mm_mul_ss( xr, xt );
|
||||
|
||||
_mm_store_ss( &a, xr );
|
||||
return a;
|
||||
#else
|
||||
#error "Not Implemented"
|
||||
#endif
|
||||
|
||||
return x;
|
||||
#endif // _WIN64
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -182,54 +177,40 @@ float _SSE_RSqrtAccurate(float a)
|
||||
// or so, so ok for closed transforms. (ie, computing lighting normals)
|
||||
float _SSE_RSqrtFast(float x)
|
||||
{
|
||||
#if defined( _WIN64 )
|
||||
return std::sqrt(x);
|
||||
#else
|
||||
Assert( s_bMathlibInitialized );
|
||||
|
||||
float rroot = 0.0f;
|
||||
float rroot;
|
||||
#ifdef _WIN32
|
||||
_asm
|
||||
{
|
||||
rsqrtss xmm0, x
|
||||
movss rroot, xmm0
|
||||
}
|
||||
#elif defined _LINUX || defined __APPLE__
|
||||
__asm__ __volatile__(
|
||||
"rsqrtss %1, %%xmm0 \n\t"
|
||||
"movss %%xmm0, %0 \n\t"
|
||||
: "=m" (x)
|
||||
: "m" (rroot)
|
||||
: "%xmm0"
|
||||
);
|
||||
#elif POSIX
|
||||
__asm__ __volatile__( "rsqrtss %0, %1" : "=x" (rroot) : "x" (x) );
|
||||
#else
|
||||
#error
|
||||
#endif
|
||||
|
||||
return rroot;
|
||||
#endif // _WIN64
|
||||
}
|
||||
|
||||
float FASTCALL _SSE_VectorNormalize (Vector& vec)
|
||||
{
|
||||
#if defined( _WIN64 )
|
||||
float l = std::sqrt(vec.x * vec.x + vec.y * vec.y + vec.z * vec.z);
|
||||
vec.x /= l;
|
||||
vec.y /= l;
|
||||
vec.z /= l;
|
||||
return l;
|
||||
#else
|
||||
Assert( s_bMathlibInitialized );
|
||||
|
||||
// NOTE: This is necessary to prevent an memory overwrite...
|
||||
// sice vec only has 3 floats, we can't "movaps" directly into it.
|
||||
#ifdef _WIN32
|
||||
__declspec(align(16)) float result[4];
|
||||
#elif defined _LINUX || defined __APPLE__
|
||||
__attribute__((aligned(16))) float result[4];
|
||||
#elif POSIX
|
||||
float result[4] __attribute__((aligned(16)));
|
||||
#endif
|
||||
|
||||
float *v = &vec[0];
|
||||
#ifdef _WIN32
|
||||
float *r = &result[0];
|
||||
#endif
|
||||
|
||||
float radius = 0.f;
|
||||
// Blah, get rid of these comparisons ... in reality, if you have all 3 as zero, it shouldn't
|
||||
@ -237,7 +218,6 @@ float FASTCALL _SSE_VectorNormalize (Vector& vec)
|
||||
if ( v[0] || v[1] || v[2] )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
float *r = &result[0];
|
||||
_asm
|
||||
{
|
||||
mov eax, v
|
||||
@ -262,7 +242,7 @@ float FASTCALL _SSE_VectorNormalize (Vector& vec)
|
||||
mulps xmm4, xmm1 // r4 = vx * 1/radius, vy * 1/radius, vz * 1/radius, X
|
||||
movaps [edx], xmm4 // v = vx * 1/radius, vy * 1/radius, vz * 1/radius, X
|
||||
}
|
||||
#elif defined _LINUX || defined __APPLE__
|
||||
#elif POSIX
|
||||
__asm__ __volatile__(
|
||||
#ifdef ALIGNED_VECTOR
|
||||
"movaps %2, %%xmm4 \n\t"
|
||||
@ -285,6 +265,7 @@ float FASTCALL _SSE_VectorNormalize (Vector& vec)
|
||||
"movaps %%xmm4, %1 \n\t"
|
||||
: "=m" (radius), "=m" (result)
|
||||
: "m" (*v)
|
||||
: "xmm1", "xmm2", "xmm3", "xmm4"
|
||||
);
|
||||
#else
|
||||
#error "Not Implemented"
|
||||
@ -296,7 +277,6 @@ float FASTCALL _SSE_VectorNormalize (Vector& vec)
|
||||
}
|
||||
|
||||
return radius;
|
||||
#endif // _WIN64
|
||||
}
|
||||
|
||||
void FASTCALL _SSE_VectorNormalizeFast (Vector& vec)
|
||||
@ -310,10 +290,6 @@ void FASTCALL _SSE_VectorNormalizeFast (Vector& vec)
|
||||
|
||||
float _SSE_InvRSquared(const float* v)
|
||||
{
|
||||
#if defined( _WIN64 )
|
||||
float r2 = DotProduct(v, v);
|
||||
return r2 < 1.f ? 1.f : 1/r2;
|
||||
#else
|
||||
float inv_r2 = 1.f;
|
||||
#ifdef _WIN32
|
||||
_asm { // Intel SSE only routine
|
||||
@ -331,12 +307,13 @@ float _SSE_InvRSquared(const float* v)
|
||||
shufps xmm2, xmm2, 1 // x2 = vy * vy, X, X, X
|
||||
addss xmm1, xmm2 // x1 = (vx * vx) + (vy * vy), X, X, X
|
||||
addss xmm1, xmm3 // x1 = (vx * vx) + (vy * vy) + (vz * vz), X, X, X
|
||||
maxss xmm1, xmm5 // x1 = MAX( 1.0, x1 )
|
||||
rcpss xmm0, xmm1 // x0 = 1 / MAX( 1.0, x1 )
|
||||
maxss xmm1, xmm5 // x1 = max( 1.0, x1 )
|
||||
rcpss xmm0, xmm1 // x0 = 1 / max( 1.0, x1 )
|
||||
movss inv_r2, xmm0 // inv_r2 = x0
|
||||
}
|
||||
#elif defined _LINUX || defined __APPLE__
|
||||
#elif POSIX
|
||||
__asm__ __volatile__(
|
||||
"movss %0, %%xmm5 \n\t"
|
||||
#ifdef ALIGNED_VECTOR
|
||||
"movaps %1, %%xmm4 \n\t"
|
||||
#else
|
||||
@ -352,23 +329,64 @@ float _SSE_InvRSquared(const float* v)
|
||||
"maxss %%xmm5, %%xmm1 \n\t"
|
||||
"rcpss %%xmm1, %%xmm0 \n\t"
|
||||
"movss %%xmm0, %0 \n\t"
|
||||
: "=m" (inv_r2)
|
||||
: "m" (*v), "m" (inv_r2)
|
||||
: "+m" (inv_r2)
|
||||
: "m" (*v)
|
||||
: "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
#else
|
||||
#error "Not Implemented"
|
||||
#endif
|
||||
|
||||
return inv_r2;
|
||||
#endif // _WIN64
|
||||
}
|
||||
|
||||
|
||||
#ifdef POSIX
|
||||
// #define _PS_CONST(Name, Val) static const ALIGN16 float _ps_##Name[4] ALIGN16_POST = { Val, Val, Val, Val }
|
||||
#define _PS_CONST_TYPE(Name, Type, Val) static const ALIGN16 Type _ps_##Name[4] ALIGN16_POST = { static_cast<Type>(Val), static_cast<Type>(Val), static_cast<Type>(Val), static_cast<Type>(Val) }
|
||||
|
||||
_PS_CONST_TYPE(sign_mask, int, 0x80000000);
|
||||
_PS_CONST_TYPE(inv_sign_mask, int, ~0x80000000);
|
||||
|
||||
|
||||
#define _PI32_CONST(Name, Val) static const ALIGN16 int _pi32_##Name[4] ALIGN16_POST = { Val, Val, Val, Val }
|
||||
|
||||
_PI32_CONST(1, 1);
|
||||
_PI32_CONST(inv1, ~1);
|
||||
_PI32_CONST(2, 2);
|
||||
_PI32_CONST(4, 4);
|
||||
#ifdef _WIN32
|
||||
_PI32_CONST(0x7f, 0x7f);
|
||||
#endif
|
||||
_PS_CONST(1 , 1.0f);
|
||||
_PS_CONST(0p5, 0.5f);
|
||||
|
||||
_PS_CONST(minus_cephes_DP1, -0.78515625);
|
||||
_PS_CONST(minus_cephes_DP2, -2.4187564849853515625e-4);
|
||||
_PS_CONST(minus_cephes_DP3, -3.77489497744594108e-8);
|
||||
_PS_CONST(sincof_p0, -1.9515295891E-4);
|
||||
_PS_CONST(sincof_p1, 8.3321608736E-3);
|
||||
_PS_CONST(sincof_p2, -1.6666654611E-1);
|
||||
_PS_CONST(coscof_p0, 2.443315711809948E-005);
|
||||
_PS_CONST(coscof_p1, -1.388731625493765E-003);
|
||||
_PS_CONST(coscof_p2, 4.166664568298827E-002);
|
||||
_PS_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI
|
||||
|
||||
typedef union xmm_mm_union {
|
||||
__m128 xmm;
|
||||
__m64 mm[2];
|
||||
} xmm_mm_union;
|
||||
|
||||
#define COPY_MM_TO_XMM(mm0_, mm1_, xmm_) { xmm_mm_union u; u.mm[0]=mm0_; u.mm[1]=mm1_; xmm_ = u.xmm; }
|
||||
|
||||
typedef __m128 v4sf; // vector of 4 float (sse1)
|
||||
typedef __m64 v2si; // vector of 2 int (mmx)
|
||||
|
||||
#endif
|
||||
|
||||
void _SSE_SinCos(float x, float* s, float* c)
|
||||
{
|
||||
#if defined( _WIN64 )
|
||||
*s = std::sin(x);
|
||||
*c = std::cos(x);
|
||||
#elif defined( _WIN32 )
|
||||
#ifdef _WIN32
|
||||
float t4, t8, t12;
|
||||
|
||||
__asm
|
||||
@ -453,8 +471,121 @@ void _SSE_SinCos(float x, float* s, float* c)
|
||||
movss [eax], xmm0
|
||||
movss [edx], xmm4
|
||||
}
|
||||
#elif defined _LINUX || defined __APPLE__
|
||||
// #warning "_SSE_sincos NOT implemented!"
|
||||
#elif POSIX
|
||||
|
||||
Assert( "Needs testing, verify impl!\n" );
|
||||
|
||||
v4sf xx = _mm_load_ss( &x );
|
||||
|
||||
v4sf xmm1, xmm2, xmm3 = _mm_setzero_ps(), sign_bit_sin, y;
|
||||
v2si mm0, mm1, mm2, mm3, mm4, mm5;
|
||||
sign_bit_sin = xx;
|
||||
/* take the absolute value */
|
||||
xx = _mm_and_ps(xx, *(v4sf*)_ps_inv_sign_mask);
|
||||
/* extract the sign bit (upper one) */
|
||||
sign_bit_sin = _mm_and_ps(sign_bit_sin, *(v4sf*)_ps_sign_mask);
|
||||
|
||||
/* scale by 4/Pi */
|
||||
y = _mm_mul_ps(xx, *(v4sf*)_ps_cephes_FOPI);
|
||||
|
||||
/* store the integer part of y in mm2:mm3 */
|
||||
xmm3 = _mm_movehl_ps(xmm3, y);
|
||||
mm2 = _mm_cvttps_pi32(y);
|
||||
mm3 = _mm_cvttps_pi32(xmm3);
|
||||
|
||||
/* j=(j+1) & (~1) (see the cephes sources) */
|
||||
mm2 = _mm_add_pi32(mm2, *(v2si*)_pi32_1);
|
||||
mm3 = _mm_add_pi32(mm3, *(v2si*)_pi32_1);
|
||||
mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_inv1);
|
||||
mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_inv1);
|
||||
|
||||
y = _mm_cvtpi32x2_ps(mm2, mm3);
|
||||
|
||||
mm4 = mm2;
|
||||
mm5 = mm3;
|
||||
|
||||
/* get the swap sign flag for the sine */
|
||||
mm0 = _mm_and_si64(mm2, *(v2si*)_pi32_4);
|
||||
mm1 = _mm_and_si64(mm3, *(v2si*)_pi32_4);
|
||||
mm0 = _mm_slli_pi32(mm0, 29);
|
||||
mm1 = _mm_slli_pi32(mm1, 29);
|
||||
v4sf swap_sign_bit_sin;
|
||||
COPY_MM_TO_XMM(mm0, mm1, swap_sign_bit_sin);
|
||||
|
||||
/* get the polynom selection mask for the sine */
|
||||
|
||||
mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_2);
|
||||
mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_2);
|
||||
mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
|
||||
mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
|
||||
v4sf poly_mask;
|
||||
COPY_MM_TO_XMM(mm2, mm3, poly_mask);
|
||||
|
||||
/* The magic pass: "Extended precision modular arithmetic"
|
||||
x = ((x - y * DP1) - y * DP2) - y * DP3; */
|
||||
xmm1 = *(v4sf*)_ps_minus_cephes_DP1;
|
||||
xmm2 = *(v4sf*)_ps_minus_cephes_DP2;
|
||||
xmm3 = *(v4sf*)_ps_minus_cephes_DP3;
|
||||
xmm1 = _mm_mul_ps(y, xmm1);
|
||||
xmm2 = _mm_mul_ps(y, xmm2);
|
||||
xmm3 = _mm_mul_ps(y, xmm3);
|
||||
xx = _mm_add_ps(xx, xmm1);
|
||||
xx = _mm_add_ps(xx, xmm2);
|
||||
xx = _mm_add_ps(xx, xmm3);
|
||||
|
||||
/* get the sign flag for the cosine */
|
||||
mm4 = _mm_sub_pi32(mm4, *(v2si*)_pi32_2);
|
||||
mm5 = _mm_sub_pi32(mm5, *(v2si*)_pi32_2);
|
||||
mm4 = _mm_andnot_si64(mm4, *(v2si*)_pi32_4);
|
||||
mm5 = _mm_andnot_si64(mm5, *(v2si*)_pi32_4);
|
||||
mm4 = _mm_slli_pi32(mm4, 29);
|
||||
mm5 = _mm_slli_pi32(mm5, 29);
|
||||
v4sf sign_bit_cos;
|
||||
COPY_MM_TO_XMM(mm4, mm5, sign_bit_cos);
|
||||
_mm_empty(); /* good-bye mmx */
|
||||
|
||||
sign_bit_sin = _mm_xor_ps(sign_bit_sin, swap_sign_bit_sin);
|
||||
|
||||
|
||||
/* Evaluate the first polynom (0 <= x <= Pi/4) */
|
||||
v4sf z = _mm_mul_ps(xx,xx);
|
||||
y = *(v4sf*)_ps_coscof_p0;
|
||||
|
||||
y = _mm_mul_ps(y, z);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p1);
|
||||
y = _mm_mul_ps(y, z);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p2);
|
||||
y = _mm_mul_ps(y, z);
|
||||
y = _mm_mul_ps(y, z);
|
||||
v4sf tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
|
||||
y = _mm_sub_ps(y, tmp);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_1);
|
||||
|
||||
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
|
||||
|
||||
v4sf y2 = *(v4sf*)_ps_sincof_p0;
|
||||
y2 = _mm_mul_ps(y2, z);
|
||||
y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
|
||||
y2 = _mm_mul_ps(y2, z);
|
||||
y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
|
||||
y2 = _mm_mul_ps(y2, z);
|
||||
y2 = _mm_mul_ps(y2, xx);
|
||||
y2 = _mm_add_ps(y2, xx);
|
||||
|
||||
/* select the correct result from the two polynoms */
|
||||
xmm3 = poly_mask;
|
||||
v4sf ysin2 = _mm_and_ps(xmm3, y2);
|
||||
v4sf ysin1 = _mm_andnot_ps(xmm3, y);
|
||||
y2 = _mm_sub_ps(y2,ysin2);
|
||||
y = _mm_sub_ps(y, ysin1);
|
||||
|
||||
xmm1 = _mm_add_ps(ysin1,ysin2);
|
||||
xmm2 = _mm_add_ps(y,y2);
|
||||
|
||||
/* update the sign */
|
||||
_mm_store_ss( s, _mm_xor_ps(xmm1, sign_bit_sin) );
|
||||
_mm_store_ss( c, _mm_xor_ps(xmm2, sign_bit_cos) );
|
||||
|
||||
#else
|
||||
#error "Not Implemented"
|
||||
#endif
|
||||
@ -462,9 +593,7 @@ void _SSE_SinCos(float x, float* s, float* c)
|
||||
|
||||
float _SSE_cos( float x )
|
||||
{
|
||||
#if defined ( _WIN64 )
|
||||
return std::cos(x);
|
||||
#elif defined( _WIN32 )
|
||||
#ifdef _WIN32
|
||||
float temp;
|
||||
__asm
|
||||
{
|
||||
@ -513,8 +642,102 @@ float _SSE_cos( float x )
|
||||
movss x, xmm0
|
||||
|
||||
}
|
||||
#elif defined _LINUX || defined __APPLE__
|
||||
// #warning "_SSE_cos NOT implemented!"
|
||||
#elif POSIX
|
||||
|
||||
Assert( "Needs testing, verify impl!\n" );
|
||||
|
||||
v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, y;
|
||||
v2si mm0, mm1, mm2, mm3;
|
||||
/* take the absolute value */
|
||||
v4sf xx = _mm_load_ss( &x );
|
||||
|
||||
xx = _mm_and_ps(xx, *(v4sf*)_ps_inv_sign_mask);
|
||||
|
||||
/* scale by 4/Pi */
|
||||
y = _mm_mul_ps(xx, *(v4sf*)_ps_cephes_FOPI);
|
||||
|
||||
/* store the integer part of y in mm0:mm1 */
|
||||
xmm2 = _mm_movehl_ps(xmm2, y);
|
||||
mm2 = _mm_cvttps_pi32(y);
|
||||
mm3 = _mm_cvttps_pi32(xmm2);
|
||||
|
||||
/* j=(j+1) & (~1) (see the cephes sources) */
|
||||
mm2 = _mm_add_pi32(mm2, *(v2si*)_pi32_1);
|
||||
mm3 = _mm_add_pi32(mm3, *(v2si*)_pi32_1);
|
||||
mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_inv1);
|
||||
mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_inv1);
|
||||
|
||||
y = _mm_cvtpi32x2_ps(mm2, mm3);
|
||||
|
||||
|
||||
mm2 = _mm_sub_pi32(mm2, *(v2si*)_pi32_2);
|
||||
mm3 = _mm_sub_pi32(mm3, *(v2si*)_pi32_2);
|
||||
|
||||
/* get the swap sign flag in mm0:mm1 and the
|
||||
polynom selection mask in mm2:mm3 */
|
||||
|
||||
mm0 = _mm_andnot_si64(mm2, *(v2si*)_pi32_4);
|
||||
mm1 = _mm_andnot_si64(mm3, *(v2si*)_pi32_4);
|
||||
mm0 = _mm_slli_pi32(mm0, 29);
|
||||
mm1 = _mm_slli_pi32(mm1, 29);
|
||||
|
||||
mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_2);
|
||||
mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_2);
|
||||
|
||||
mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
|
||||
mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
|
||||
|
||||
v4sf sign_bit, poly_mask;
|
||||
COPY_MM_TO_XMM(mm0, mm1, sign_bit);
|
||||
COPY_MM_TO_XMM(mm2, mm3, poly_mask);
|
||||
_mm_empty(); /* good-bye mmx */
|
||||
|
||||
/* The magic pass: "Extended precision modular arithmetic"
|
||||
x = ((x - y * DP1) - y * DP2) - y * DP3; */
|
||||
xmm1 = *(v4sf*)_ps_minus_cephes_DP1;
|
||||
xmm2 = *(v4sf*)_ps_minus_cephes_DP2;
|
||||
xmm3 = *(v4sf*)_ps_minus_cephes_DP3;
|
||||
xmm1 = _mm_mul_ps(y, xmm1);
|
||||
xmm2 = _mm_mul_ps(y, xmm2);
|
||||
xmm3 = _mm_mul_ps(y, xmm3);
|
||||
xx = _mm_add_ps(xx, xmm1);
|
||||
xx = _mm_add_ps(xx, xmm2);
|
||||
xx = _mm_add_ps(xx, xmm3);
|
||||
|
||||
/* Evaluate the first polynom (0 <= x <= Pi/4) */
|
||||
y = *(v4sf*)_ps_coscof_p0;
|
||||
v4sf z = _mm_mul_ps(xx,xx);
|
||||
|
||||
y = _mm_mul_ps(y, z);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p1);
|
||||
y = _mm_mul_ps(y, z);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p2);
|
||||
y = _mm_mul_ps(y, z);
|
||||
y = _mm_mul_ps(y, z);
|
||||
v4sf tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
|
||||
y = _mm_sub_ps(y, tmp);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_1);
|
||||
|
||||
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
|
||||
|
||||
v4sf y2 = *(v4sf*)_ps_sincof_p0;
|
||||
y2 = _mm_mul_ps(y2, z);
|
||||
y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
|
||||
y2 = _mm_mul_ps(y2, z);
|
||||
y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
|
||||
y2 = _mm_mul_ps(y2, z);
|
||||
y2 = _mm_mul_ps(y2, xx);
|
||||
y2 = _mm_add_ps(y2, xx);
|
||||
|
||||
/* select the correct result from the two polynoms */
|
||||
xmm3 = poly_mask;
|
||||
y2 = _mm_and_ps(xmm3, y2); //, xmm3);
|
||||
y = _mm_andnot_ps(xmm3, y);
|
||||
y = _mm_add_ps(y,y2);
|
||||
/* update the sign */
|
||||
|
||||
_mm_store_ss( &x, _mm_xor_ps(y, sign_bit) );
|
||||
|
||||
#else
|
||||
#error "Not Implemented"
|
||||
#endif
|
||||
@ -525,12 +748,10 @@ float _SSE_cos( float x )
|
||||
//-----------------------------------------------------------------------------
|
||||
// SSE2 implementations of optimized routines:
|
||||
//-----------------------------------------------------------------------------
|
||||
#ifdef PLATFORM_WINDOWS_PC32
|
||||
void _SSE2_SinCos(float x, float* s, float* c) // any x
|
||||
{
|
||||
#if defined( _WIN64 )
|
||||
*s = std::sin(x);
|
||||
*c = std::cos(x);
|
||||
#elif defined( _WIN32 )
|
||||
#ifdef _WIN32
|
||||
__asm
|
||||
{
|
||||
movss xmm0, x
|
||||
@ -606,18 +827,19 @@ void _SSE2_SinCos(float x, float* s, float* c) // any x
|
||||
movss [eax], xmm0
|
||||
movss [edx], xmm6
|
||||
}
|
||||
#elif defined _LINUX || defined __APPLE__
|
||||
// #warning "_SSE2_SinCos NOT implemented!"
|
||||
#elif POSIX
|
||||
#warning "_SSE2_SinCos NOT implemented!"
|
||||
Assert( 0 );
|
||||
#else
|
||||
#error "Not Implemented"
|
||||
#endif
|
||||
}
|
||||
#endif // PLATFORM_WINDOWS_PC32
|
||||
|
||||
#ifdef PLATFORM_WINDOWS_PC32
|
||||
float _SSE2_cos(float x)
|
||||
{
|
||||
#if defined ( _WIN64 )
|
||||
return std::cos(x);
|
||||
#elif defined( _WIN32 )
|
||||
#ifdef _WIN32
|
||||
__asm
|
||||
{
|
||||
movss xmm0, x
|
||||
@ -663,25 +885,25 @@ float _SSE2_cos(float x)
|
||||
mulss xmm0, xmm1
|
||||
movss x, xmm0
|
||||
}
|
||||
#elif defined _LINUX || defined __APPLE__
|
||||
// #warning "_SSE2_cos NOT implemented!"
|
||||
#elif POSIX
|
||||
#warning "_SSE2_cos NOT implemented!"
|
||||
Assert( 0 );
|
||||
#else
|
||||
#error "Not Implemented"
|
||||
#endif
|
||||
|
||||
return x;
|
||||
}
|
||||
#endif // PLATFORM_WINDOWS_PC32
|
||||
|
||||
#if 0
|
||||
// SSE Version of VectorTransform
|
||||
void VectorTransformSSE(const float *in1, const matrix3x4_t& in2, float *out1)
|
||||
{
|
||||
Assert( s_bMathlibInitialized );
|
||||
Assert( in1 != out1 );
|
||||
#if defined ( _WIN64 )
|
||||
out1[0] = DotProduct(in1, in2[0]) + in2[0][3];
|
||||
out1[1] = DotProduct(in1, in2[1]) + in2[1][3];
|
||||
out1[2] = DotProduct(in1, in2[2]) + in2[2][3];
|
||||
#elif defined( _WIN32 )
|
||||
|
||||
#ifdef _WIN32
|
||||
__asm
|
||||
{
|
||||
mov eax, in1;
|
||||
@ -723,8 +945,8 @@ void VectorTransformSSE(const float *in1, const matrix3x4_t& in2, float *out1)
|
||||
addss xmm0, [ecx+12]
|
||||
movss [edx+8], xmm0;
|
||||
}
|
||||
#elif defined _LINUX || defined __APPLE__
|
||||
// #warning "VectorTransformSSE C implementation only"
|
||||
#elif POSIX
|
||||
#warning "VectorTransformSSE C implementation only"
|
||||
out1[0] = DotProduct(in1, in2[0]) + in2[0][3];
|
||||
out1[1] = DotProduct(in1, in2[1]) + in2[1][3];
|
||||
out1[2] = DotProduct(in1, in2[2]) + in2[2][3];
|
||||
@ -732,16 +954,15 @@ void VectorTransformSSE(const float *in1, const matrix3x4_t& in2, float *out1)
|
||||
#error "Not Implemented"
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
void VectorRotateSSE( const float *in1, const matrix3x4_t& in2, float *out1 )
|
||||
{
|
||||
Assert( s_bMathlibInitialized );
|
||||
Assert( in1 != out1 );
|
||||
#if defined ( _WIN64 )
|
||||
out1[0] = DotProduct( in1, in2[0] );
|
||||
out1[1] = DotProduct( in1, in2[1] );
|
||||
out1[2] = DotProduct( in1, in2[2] );
|
||||
#elif defined( _WIN32 )
|
||||
|
||||
#ifdef _WIN32
|
||||
__asm
|
||||
{
|
||||
mov eax, in1;
|
||||
@ -780,8 +1001,8 @@ void VectorRotateSSE( const float *in1, const matrix3x4_t& in2, float *out1 )
|
||||
addss xmm0, xmm2;
|
||||
movss [edx+8], xmm0;
|
||||
}
|
||||
#elif defined _LINUX || defined __APPLE__
|
||||
// #warning "VectorRotateSSE C implementation only"
|
||||
#elif POSIX
|
||||
#warning "VectorRotateSSE C implementation only"
|
||||
out1[0] = DotProduct( in1, in2[0] );
|
||||
out1[1] = DotProduct( in1, in2[1] );
|
||||
out1[2] = DotProduct( in1, in2[2] );
|
||||
@ -789,8 +1010,9 @@ void VectorRotateSSE( const float *in1, const matrix3x4_t& in2, float *out1 )
|
||||
#error "Not Implemented"
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( _WIN32 ) && !defined( _WIN64 )
|
||||
#ifdef _WIN32
|
||||
void _declspec(naked) _SSE_VectorMA( const float *start, float scale, const float *direction, float *dest )
|
||||
{
|
||||
// FIXME: This don't work!! It will overwrite memory in the write to dest
|
||||
@ -821,7 +1043,7 @@ void _declspec(naked) _SSE_VectorMA( const float *start, float scale, const floa
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( _WIN32 ) && !defined( _WIN64 )
|
||||
#ifdef _WIN32
|
||||
#ifdef PFN_VECTORMA
|
||||
void _declspec(naked) __cdecl _SSE_VectorMA( const Vector &start, float scale, const Vector &direction, Vector &dest )
|
||||
{
|
||||
@ -886,4 +1108,6 @@ vec_t DotProduct (const vec_t *a, const vec_t *c)
|
||||
ret
|
||||
}
|
||||
}
|
||||
*/
|
||||
*/
|
||||
|
||||
#endif // COMPILER_MSVC64
|
||||
|
@ -15,9 +15,13 @@ void FASTCALL _SSE_VectorNormalizeFast(Vector& vec);
|
||||
float _SSE_InvRSquared(const float* v);
|
||||
void _SSE_SinCos(float x, float* s, float* c);
|
||||
float _SSE_cos( float x);
|
||||
#ifdef PLATFORM_WINDOWS_PC32
|
||||
void _SSE2_SinCos(float x, float* s, float* c);
|
||||
float _SSE2_cos(float x);
|
||||
#endif
|
||||
#if 0
|
||||
void VectorTransformSSE(const float *in1, const matrix3x4_t& in2, float *out1);
|
||||
void VectorRotateSSE( const float *in1, const matrix3x4_t& in2, float *out1 );
|
||||
#endif
|
||||
|
||||
#endif // _SSE_H
|
||||
|
@ -1,4 +1,4 @@
|
||||
//===== Copyright © 1996-2005, Valve Corporation, All rights reserved. ======//
|
||||
//===== Copyright <EFBFBD> 1996-2005, Valve Corporation, All rights reserved. ======//
|
||||
//
|
||||
// Purpose:
|
||||
//
|
||||
@ -30,24 +30,33 @@ const fltx4 Four_FLT_MAX={FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
|
||||
const fltx4 Four_Negative_FLT_MAX={-FLT_MAX,-FLT_MAX,-FLT_MAX,-FLT_MAX};
|
||||
const fltx4 g_SIMD_0123 = { 0., 1., 2., 3. };
|
||||
|
||||
const int32 ALIGN16 g_SIMD_clear_signmask[4]= {(int32)0x7fffffff,(int32)0x7fffffff,(int32)0x7fffffff,(int32)0x7fffffff};
|
||||
const int32 ALIGN16 g_SIMD_signmask[4]= { (int32)0x80000000, (int32)0x80000000, (int32)0x80000000, (int32)0x80000000 };
|
||||
const int32 ALIGN16 g_SIMD_lsbmask[4]= { (int32)0xfffffffe, (int32)0xfffffffe, (int32)0xfffffffe, (int32)0xfffffffe };
|
||||
const int32 ALIGN16 g_SIMD_clear_wmask[4]= { (int32)0xffffffff, (int32)0xffffffff, (int32)0xffffffff, 0 };
|
||||
const int32 ALIGN16 g_SIMD_AllOnesMask[4]= { (int32)0xffffffff, (int32)0xffffffff, (int32)0xffffffff, (int32)0xffffffff }; // ~0,~0,~0,~0
|
||||
const int32 ALIGN16 g_SIMD_Low16BitsMask[4]= { (int32)0xffff, (int32)0xffff,(int32) 0xffff, (int32)0xffff }; // 0xffff x 4
|
||||
|
||||
const int32 ALIGN16 g_SIMD_ComponentMask[4][4] =
|
||||
extern const fltx4 g_QuatMultRowSign[4];
|
||||
const fltx4 g_QuatMultRowSign[4] =
|
||||
{
|
||||
{ (int32)0xFFFFFFFF, 0, 0, 0 }, { 0, (int32)0xFFFFFFFF, 0, 0 }, { 0, 0, (int32)0xFFFFFFFF, 0 }, { 0, 0, 0, (int32)0xFFFFFFFF }
|
||||
{ 1.0f, 1.0f, -1.0f, 1.0f },
|
||||
{ -1.0f, 1.0f, 1.0f, 1.0f },
|
||||
{ 1.0f, -1.0f, 1.0f, 1.0f },
|
||||
{ -1.0f, -1.0f, -1.0f, 1.0f }
|
||||
};
|
||||
|
||||
const int32 ALIGN16 g_SIMD_SkipTailMask[4][4] =
|
||||
const int32 ALIGN16 g_SIMD_clear_signmask[4] ALIGN16_POST = {static_cast<int32>(0x7fffffff), static_cast<int32>(0x7fffffff), static_cast<int32>(0x7fffffff), static_cast<int32>(0x7fffffff)};
|
||||
const int32 ALIGN16 g_SIMD_signmask[4] ALIGN16_POST = { static_cast<int32>(0x80000000), static_cast<int32>(0x80000000), static_cast<int32>(0x80000000), static_cast<int32>(0x80000000) };
|
||||
const int32 ALIGN16 g_SIMD_lsbmask[4] ALIGN16_POST = { static_cast<int32>(0xfffffffe), static_cast<int32>(0xfffffffe), static_cast<int32>(0xfffffffe), static_cast<int32>(0xfffffffe) };
|
||||
const int32 ALIGN16 g_SIMD_clear_wmask[4] ALIGN16_POST = { static_cast<int32>(0xffffffff), static_cast<int32>(0xffffffff), static_cast<int32>(0xffffffff), 0 };
|
||||
const int32 ALIGN16 g_SIMD_AllOnesMask[4] ALIGN16_POST = { static_cast<int32>(0xffffffff), static_cast<int32>(0xffffffff), static_cast<int32>(0xffffffff), static_cast<int32>(0xffffffff) }; // ~0,~0,~0,~0
|
||||
const int32 ALIGN16 g_SIMD_Low16BitsMask[4] ALIGN16_POST = { 0xffff, 0xffff, 0xffff, 0xffff }; // 0xffff x 4
|
||||
|
||||
const int32 ALIGN16 g_SIMD_ComponentMask[4][4] ALIGN16_POST =
|
||||
{
|
||||
{ (int32)0xffffffff, (int32)0xffffffff, (int32)0xffffffff, (int32)0xffffffff },
|
||||
{ (int32)0xffffffff, (int32)0x00000000, (int32)0x00000000, (int32)0x00000000 },
|
||||
{ (int32)0xffffffff, (int32)0xffffffff, (int32)0x00000000, (int32)0x00000000 },
|
||||
{ (int32)0xffffffff, (int32)0xffffffff, (int32)0xffffffff, (int32)0x00000000 },
|
||||
{ static_cast<int32>(0xFFFFFFFF), 0, 0, 0 }, { 0, static_cast<int32>(0xFFFFFFFF), 0, 0 }, { 0, 0, static_cast<int32>(0xFFFFFFFF), 0 }, { 0, 0, 0, static_cast<int32>(0xFFFFFFFF) }
|
||||
};
|
||||
|
||||
const int32 ALIGN16 g_SIMD_SkipTailMask[4][4] ALIGN16_POST =
|
||||
{
|
||||
{ static_cast<int32>(0xffffffff), static_cast<int32>(0xffffffff), static_cast<int32>(0xffffffff), static_cast<int32>(0xffffffff) },
|
||||
{ static_cast<int32>(0xffffffff), static_cast<int32>(0x00000000), static_cast<int32>(0x00000000), static_cast<int32>(0x00000000) },
|
||||
{ static_cast<int32>(0xffffffff), static_cast<int32>(0xffffffff), static_cast<int32>(0x00000000), static_cast<int32>(0x00000000) },
|
||||
{ static_cast<int32>(0xffffffff), static_cast<int32>(0xffffffff), static_cast<int32>(0xffffffff), static_cast<int32>(0x00000000) },
|
||||
};
|
||||
|
||||
|
||||
|
@ -30,6 +30,10 @@ static ALIGN16 int32 idx_mask[4]= {0xffff, 0xffff, 0xffff, 0xffff};
|
||||
// returns 0..1
|
||||
static inline float GetLatticePointValue( int idx_x, int idx_y, int idx_z )
|
||||
{
|
||||
NOTE_UNUSED(perm_d);
|
||||
NOTE_UNUSED(impulse_ycoords);
|
||||
NOTE_UNUSED(impulse_zcoords);
|
||||
|
||||
int ret_idx = perm_a[idx_x & 0xff];
|
||||
ret_idx = perm_b[( idx_y + ret_idx ) & 0xff];
|
||||
ret_idx = perm_c[( idx_z + ret_idx ) & 0xff];
|
||||
|
@ -306,7 +306,7 @@ bool MatrixInverseGeneral(const VMatrix& src, VMatrix& dst)
|
||||
for(iRow=0; iRow < 4; iRow++)
|
||||
{
|
||||
// Find the row with the largest element in this column.
|
||||
fLargest = 0.001f;
|
||||
fLargest = 0.00001f;
|
||||
iLargest = -1;
|
||||
for(iTest=iRow; iTest < 4; iTest++)
|
||||
{
|
||||
@ -509,7 +509,7 @@ bool VMatrix::IsRotationMatrix() const
|
||||
FloatMakePositive( v2.Dot(v3) ) < 0.01f;
|
||||
}
|
||||
|
||||
void VMatrix::SetupMatrixOrgAngles( const Vector &origin, const QAngle &vAngles )
|
||||
static void SetupMatrixAnglesInternal( vec_t m[4][4], const QAngle & vAngles )
|
||||
{
|
||||
float sr, sp, sy, cr, cp, cy;
|
||||
|
||||
@ -530,6 +530,11 @@ void VMatrix::SetupMatrixOrgAngles( const Vector &origin, const QAngle &vAngles
|
||||
m[0][3] = 0.f;
|
||||
m[1][3] = 0.f;
|
||||
m[2][3] = 0.f;
|
||||
}
|
||||
|
||||
void VMatrix::SetupMatrixOrgAngles( const Vector &origin, const QAngle &vAngles )
|
||||
{
|
||||
SetupMatrixAnglesInternal( m, vAngles );
|
||||
|
||||
// Add translation
|
||||
m[0][3] = origin.x;
|
||||
@ -542,6 +547,21 @@ void VMatrix::SetupMatrixOrgAngles( const Vector &origin, const QAngle &vAngles
|
||||
}
|
||||
|
||||
|
||||
void VMatrix::SetupMatrixAngles( const QAngle &vAngles )
|
||||
{
|
||||
SetupMatrixAnglesInternal( m, vAngles );
|
||||
|
||||
// Zero everything else
|
||||
m[0][3] = 0.0f;
|
||||
m[1][3] = 0.0f;
|
||||
m[2][3] = 0.0f;
|
||||
m[3][0] = 0.0f;
|
||||
m[3][1] = 0.0f;
|
||||
m[3][2] = 0.0f;
|
||||
m[3][3] = 1.0f;
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Sets matrix to identity
|
||||
//-----------------------------------------------------------------------------
|
||||
@ -728,7 +748,7 @@ void Vector4DMultiplyPosition( const VMatrix& src1, Vector const& src2, Vector4D
|
||||
{
|
||||
// Make sure it works if src2 == dst
|
||||
Vector tmp;
|
||||
Vector const&v = ( &src2 == &dst.AsVector3D() ) ? tmp : src2;
|
||||
Vector const&v = ( &src2 == &dst.AsVector3D() ) ? static_cast<const Vector&>(tmp) : src2;
|
||||
|
||||
if (&src2 == &dst.AsVector3D())
|
||||
{
|
||||
@ -751,7 +771,7 @@ void Vector3DMultiply( const VMatrix &src1, const Vector &src2, Vector &dst )
|
||||
{
|
||||
// Make sure it works if src2 == dst
|
||||
Vector tmp;
|
||||
const Vector &v = (&src2 == &dst) ? tmp : src2;
|
||||
const Vector &v = (&src2 == &dst) ? static_cast<const Vector&>(tmp) : src2;
|
||||
|
||||
if( &src2 == &dst )
|
||||
{
|
||||
@ -772,7 +792,7 @@ void Vector3DMultiplyPositionProjective( const VMatrix& src1, const Vector &src2
|
||||
{
|
||||
// Make sure it works if src2 == dst
|
||||
Vector tmp;
|
||||
const Vector &v = (&src2 == &dst) ? tmp: src2;
|
||||
const Vector &v = (&src2 == &dst) ? static_cast<const Vector&>(tmp): src2;
|
||||
if( &src2 == &dst )
|
||||
{
|
||||
VectorCopy( src2, tmp );
|
||||
@ -799,7 +819,7 @@ void Vector3DMultiplyProjective( const VMatrix& src1, const Vector &src2, Vector
|
||||
{
|
||||
// Make sure it works if src2 == dst
|
||||
Vector tmp;
|
||||
const Vector &v = (&src2 == &dst) ? tmp : src2;
|
||||
const Vector &v = (&src2 == &dst) ? static_cast<const Vector&>(tmp) : src2;
|
||||
if( &src2 == &dst )
|
||||
{
|
||||
VectorCopy( src2, tmp );
|
||||
@ -852,7 +872,7 @@ void Vector3DMultiplyTranspose( const VMatrix& src1, const Vector& src2, Vector&
|
||||
bool srcEqualsDst = (&src2 == &dst);
|
||||
|
||||
Vector tmp;
|
||||
const Vector&v = srcEqualsDst ? tmp : src2;
|
||||
const Vector&v = srcEqualsDst ? static_cast<const Vector&>(tmp) : src2;
|
||||
|
||||
if (srcEqualsDst)
|
||||
{
|
||||
@ -937,7 +957,7 @@ void MatrixBuildTranslation( VMatrix& dst, const Vector &translation )
|
||||
//-----------------------------------------------------------------------------
|
||||
void MatrixBuildRotationAboutAxis( VMatrix &dst, const Vector &vAxisOfRot, float angleDegrees )
|
||||
{
|
||||
MatrixBuildRotationAboutAxis( vAxisOfRot, angleDegrees, dst.As3x4() );
|
||||
MatrixBuildRotationAboutAxis( vAxisOfRot, angleDegrees, const_cast< matrix3x4_t &> ( dst.As3x4() ) );
|
||||
dst[3][0] = 0;
|
||||
dst[3][1] = 0;
|
||||
dst[3][2] = 0;
|
||||
@ -1233,19 +1253,29 @@ void MatrixBuildOrtho( VMatrix& dst, double left, double top, double right, doub
|
||||
0.0f, 0.0f, 0.0f, 1.0f );
|
||||
}
|
||||
|
||||
void MatrixBuildPerspectiveZRange( VMatrix& dst, double flZNear, double flZFar )
|
||||
{
|
||||
dst.m[2][0] = 0.0f;
|
||||
dst.m[2][1] = 0.0f;
|
||||
dst.m[2][2] = flZFar / ( flZNear - flZFar );
|
||||
dst.m[2][3] = flZNear * flZFar / ( flZNear - flZFar );
|
||||
}
|
||||
|
||||
void MatrixBuildPerspectiveX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar )
|
||||
{
|
||||
float flWidth = 2.0f * flZNear * tanf( flFovX * M_PI / 360.0f );
|
||||
float flHeight = flWidth / flAspect;
|
||||
dst.Init( 2.0f * flZNear / flWidth, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 2.0f * flZNear/ flHeight, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, flZFar / ( flZNear - flZFar ), flZNear * flZFar / ( flZNear - flZFar ),
|
||||
float flWidthScale = 1.0f / tanf( flFovX * M_PI / 360.0f );
|
||||
float flHeightScale = flAspect * flWidthScale;
|
||||
dst.Init( flWidthScale, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, flHeightScale, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, -1.0f, 0.0f );
|
||||
|
||||
MatrixBuildPerspectiveZRange ( dst, flZNear, flZFar );
|
||||
}
|
||||
|
||||
void MatrixBuildPerspectiveOffCenterX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar, double bottom, double top, double left, double right )
|
||||
{
|
||||
float flWidth = 2.0f * flZNear * tanf( flFovX * M_PI / 360.0f );
|
||||
float flWidth = tanf( flFovX * M_PI / 360.0f );
|
||||
float flHeight = flWidth / flAspect;
|
||||
|
||||
// bottom, top, left, right are 0..1 so convert to -<val>/2..<val>/2
|
||||
@ -1254,10 +1284,12 @@ void MatrixBuildPerspectiveOffCenterX( VMatrix& dst, double flFovX, double flAsp
|
||||
float flBottom = -(flHeight/2.0f) * (1.0f - bottom) + bottom * (flHeight/2.0f);
|
||||
float flTop = -(flHeight/2.0f) * (1.0f - top) + top * (flHeight/2.0f);
|
||||
|
||||
dst.Init( (2.0f * flZNear) / (flRight-flLeft), 0.0f, (flLeft+flRight)/(flRight-flLeft), 0.0f,
|
||||
0.0f, 2.0f*flZNear/(flTop-flBottom), (flTop+flBottom)/(flTop-flBottom), 0.0f,
|
||||
0.0f, 0.0f, flZFar/(flZNear-flZFar), flZNear*flZFar/(flZNear-flZFar),
|
||||
0.0f, 0.0f, -1.0f, 0.0f );
|
||||
dst.Init( 1.0f / (flRight-flLeft), 0.0f, (flLeft+flRight)/(flRight-flLeft), 0.0f,
|
||||
0.0f, 1.0f /(flTop-flBottom), (flTop+flBottom)/(flTop-flBottom), 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, -1.0f, 0.0f );
|
||||
|
||||
MatrixBuildPerspectiveZRange ( dst, flZNear, flZFar );
|
||||
}
|
||||
#endif // !_STATIC_LINKED || _SHARED_LIB
|
||||
|
||||
|
1190
public/mathlib/amd3dx.h
Normal file
1190
public/mathlib/amd3dx.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -58,8 +58,8 @@ inline Vector32& Vector32::operator=(const Vector &vOther)
|
||||
|
||||
static float expScale[4] = { 4.0f, 16.0f, 32.f, 64.f };
|
||||
|
||||
float fmax = MAX( fabs( vOther.x ), fabs( vOther.y ) );
|
||||
fmax = fpmax( fmax, fabs( vOther.z ) );
|
||||
float fmax = Max( fabs( vOther.x ), fabs( vOther.y ) );
|
||||
fmax = Max( fmax, (float)fabs( vOther.z ) );
|
||||
|
||||
for (exp = 0; exp < 3; exp++)
|
||||
{
|
||||
@ -70,9 +70,9 @@ inline Vector32& Vector32::operator=(const Vector &vOther)
|
||||
|
||||
float fexp = 512.0f / expScale[exp];
|
||||
|
||||
x = clamp( (int)(vOther.x * fexp) + 512, 0, 1023 );
|
||||
y = clamp( (int)(vOther.y * fexp) + 512, 0, 1023 );
|
||||
z = clamp( (int)(vOther.z * fexp) + 512, 0, 1023 );
|
||||
x = Clamp( (int)(vOther.x * fexp) + 512, 0, 1023 );
|
||||
y = Clamp( (int)(vOther.y * fexp) + 512, 0, 1023 );
|
||||
z = Clamp( (int)(vOther.z * fexp) + 512, 0, 1023 );
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -118,8 +118,8 @@ inline Normal32& Normal32::operator=(const Vector &vOther)
|
||||
{
|
||||
CHECK_VALID(vOther);
|
||||
|
||||
x = clamp( (int)(vOther.x * 16384) + 16384, 0, 32767 );
|
||||
y = clamp( (int)(vOther.y * 16384) + 16384, 0, 32767 );
|
||||
x = Clamp( (int)(vOther.x * 16384) + 16384, 0, 32767 );
|
||||
y = Clamp( (int)(vOther.y * 16384) + 16384, 0, 32767 );
|
||||
zneg = (vOther.z < 0);
|
||||
//x = vOther.x;
|
||||
//y = vOther.y;
|
||||
@ -182,9 +182,9 @@ inline Quaternion64& Quaternion64::operator=(const Quaternion &vOther)
|
||||
{
|
||||
CHECK_VALID(vOther);
|
||||
|
||||
x = clamp( (int)(vOther.x * 1048576) + 1048576, 0, 2097151 );
|
||||
y = clamp( (int)(vOther.y * 1048576) + 1048576, 0, 2097151 );
|
||||
z = clamp( (int)(vOther.z * 1048576) + 1048576, 0, 2097151 );
|
||||
x = Clamp( (int)(vOther.x * 1048576) + 1048576, 0, 2097151 );
|
||||
y = Clamp( (int)(vOther.y * 1048576) + 1048576, 0, 2097151 );
|
||||
z = Clamp( (int)(vOther.z * 1048576) + 1048576, 0, 2097151 );
|
||||
wneg = (vOther.w < 0);
|
||||
return *this;
|
||||
}
|
||||
@ -229,9 +229,9 @@ inline Quaternion48& Quaternion48::operator=(const Quaternion &vOther)
|
||||
{
|
||||
CHECK_VALID(vOther);
|
||||
|
||||
x = clamp( (int)(vOther.x * 32768) + 32768, 0, 65535 );
|
||||
y = clamp( (int)(vOther.y * 32768) + 32768, 0, 65535 );
|
||||
z = clamp( (int)(vOther.z * 16384) + 16384, 0, 32767 );
|
||||
x = Clamp( (int)(vOther.x * 32768) + 32768, 0, 65535 );
|
||||
y = Clamp( (int)(vOther.y * 32768) + 32768, 0, 65535 );
|
||||
z = Clamp( (int)(vOther.z * 16384) + 16384, 0, 32767 );
|
||||
wneg = (vOther.w < 0);
|
||||
return *this;
|
||||
}
|
||||
@ -276,9 +276,9 @@ inline Quaternion32& Quaternion32::operator=(const Quaternion &vOther)
|
||||
{
|
||||
CHECK_VALID(vOther);
|
||||
|
||||
x = clamp( (int)(vOther.x * 1024) + 1024, 0, 2047 );
|
||||
y = clamp( (int)(vOther.y * 512) + 512, 0, 1023 );
|
||||
z = clamp( (int)(vOther.z * 512) + 512, 0, 1023 );
|
||||
x = Clamp( (int)(vOther.x * 1024) + 1024, 0, 2047 );
|
||||
y = Clamp( (int)(vOther.y * 512) + 512, 0, 1023 );
|
||||
z = Clamp( (int)(vOther.z * 512) + 512, 0, 1023 );
|
||||
wneg = (vOther.w < 0);
|
||||
return *this;
|
||||
}
|
||||
|
@ -28,6 +28,7 @@ enum LightType_OptimizationFlags_t
|
||||
LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION0 = 1,
|
||||
LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION1 = 2,
|
||||
LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION2 = 4,
|
||||
LIGHTTYPE_OPTIMIZATIONFLAGS_DERIVED_VALUES_CALCED = 8,
|
||||
};
|
||||
|
||||
struct LightDesc_t
|
||||
@ -102,6 +103,11 @@ public:
|
||||
{
|
||||
return ((m_Type!=MATERIAL_LIGHT_SPOT) || (rdir.Dot(m_Direction)>=m_PhiDot));
|
||||
}
|
||||
|
||||
float OneOverThetaDotMinusPhiDot() const
|
||||
{
|
||||
return OneOver_ThetaDot_Minus_PhiDot;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
@ -28,6 +28,14 @@ extern float (*pfFastCos)(float x);
|
||||
#define FastSinCos(x,s,c) (*pfFastSinCos)(x,s,c)
|
||||
#define FastCos(x) (*pfFastCos)(x)
|
||||
|
||||
#if defined(__i386__) || defined(_M_IX86)
|
||||
// On x86, the inline FPU or SSE sqrt instruction is faster than
|
||||
// the overhead of setting up a function call and saving/restoring
|
||||
// the FPU or SSE register state and can be scheduled better, too.
|
||||
#undef FastSqrt
|
||||
#define FastSqrt(x) ::sqrtf(x)
|
||||
#endif
|
||||
|
||||
#endif // !_X360
|
||||
|
||||
#if defined( _X360 )
|
||||
|
@ -16,6 +16,61 @@
|
||||
|
||||
#include "mathlib/math_pfns.h"
|
||||
|
||||
#if defined(__i386__) || defined(_M_IX86)
|
||||
// For MMX intrinsics
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
|
||||
// XXX remove me
|
||||
#undef clamp
|
||||
|
||||
#ifdef DEBUG // stop crashing edit-and-continue
|
||||
FORCEINLINE float clamp( float val, float minVal, float maxVal )
|
||||
{
|
||||
if ( maxVal < minVal )
|
||||
return maxVal;
|
||||
else if( val < minVal )
|
||||
return minVal;
|
||||
else if( val > maxVal )
|
||||
return maxVal;
|
||||
else
|
||||
return val;
|
||||
}
|
||||
#else // DEBUG
|
||||
FORCEINLINE float clamp( float val, float minVal, float maxVal )
|
||||
{
|
||||
#if defined(__i386__) || defined(_M_IX86)
|
||||
_mm_store_ss( &val,
|
||||
_mm_min_ss(
|
||||
_mm_max_ss(
|
||||
_mm_load_ss(&val),
|
||||
_mm_load_ss(&minVal) ),
|
||||
_mm_load_ss(&maxVal) ) );
|
||||
#else
|
||||
val = fpmax(minVal, val);
|
||||
val = fpmin(maxVal, val);
|
||||
#endif
|
||||
return val;
|
||||
}
|
||||
#endif // DEBUG
|
||||
|
||||
//
|
||||
// Returns a clamped value in the range [min, max].
|
||||
//
|
||||
template< class T >
|
||||
inline T clamp( T const &val, T const &minVal, T const &maxVal )
|
||||
{
|
||||
if ( maxVal < minVal )
|
||||
return maxVal;
|
||||
else if( val < minVal )
|
||||
return minVal;
|
||||
else if( val > maxVal )
|
||||
return maxVal;
|
||||
else
|
||||
return val;
|
||||
}
|
||||
|
||||
|
||||
// plane_t structure
|
||||
// !!! if this is changed, it must be changed in asm code too !!!
|
||||
// FIXME: does the asm code even exist anymore?
|
||||
@ -225,12 +280,12 @@ FORCEINLINE void VectorClear(vec_t *a)
|
||||
|
||||
FORCEINLINE float VectorMaximum(const vec_t *v)
|
||||
{
|
||||
return MAX( v[0], MAX( v[1], v[2] ) );
|
||||
return V_max( v[0], V_max( v[1], v[2] ) );
|
||||
}
|
||||
|
||||
FORCEINLINE float VectorMaximum(const Vector& v)
|
||||
{
|
||||
return MAX( v.x, MAX( v.y, v.z ) );
|
||||
return V_max( v.x, V_max( v.y, v.z ) );
|
||||
}
|
||||
|
||||
FORCEINLINE void VectorScale (const float* in, vec_t scale, float* out)
|
||||
@ -255,7 +310,7 @@ inline void VectorNegate(vec_t *a)
|
||||
}
|
||||
|
||||
|
||||
//#define VectorMaximum(a) ( MAX( (a)[0], MAX( (a)[1], (a)[2] ) ) )
|
||||
//#define VectorMaximum(a) ( V_max( (a)[0], V_max( (a)[1], (a)[2] ) ) )
|
||||
#define Vector2Clear(x) {(x)[0]=(x)[1]=0;}
|
||||
#define Vector2Negate(x) {(x)[0]=-((x)[0]);(x)[1]=-((x)[1]);}
|
||||
#define Vector2Copy(a,b) {(b)[0]=(a)[0];(b)[1]=(a)[1];}
|
||||
@ -282,10 +337,10 @@ FORCEINLINE void VectorMAInline( const Vector& start, float scale, const Vector&
|
||||
dest.z=start.z+direction.z*scale;
|
||||
}
|
||||
|
||||
//FORCEINLINE void VectorMA( const Vector& start, float scale, const Vector& direction, Vector& dest )
|
||||
//{
|
||||
// VectorMAInline(start, scale, direction, dest);
|
||||
//}
|
||||
FORCEINLINE void VectorMA( const Vector& start, float scale, const Vector& direction, Vector& dest )
|
||||
{
|
||||
VectorMAInline(start, scale, direction, dest);
|
||||
}
|
||||
|
||||
FORCEINLINE void VectorMA( const float * start, float scale, const float *direction, float *dest )
|
||||
{
|
||||
@ -314,12 +369,9 @@ int Q_log2(int val);
|
||||
// Math routines done in optimized assembly math package routines
|
||||
void inline SinCos( float radians, float *sine, float *cosine )
|
||||
{
|
||||
#if defined( _WIN64 )
|
||||
*sine = sinf(radians);
|
||||
*cosine = cosf(radians);
|
||||
#elif defined( _X360 )
|
||||
#if defined( _X360 )
|
||||
XMScalarSinCos( sine, cosine, radians );
|
||||
#elif defined( _WIN32 )
|
||||
#elif defined( PLATFORM_WINDOWS_PC32 )
|
||||
_asm
|
||||
{
|
||||
fld DWORD PTR [radians]
|
||||
@ -331,11 +383,12 @@ void inline SinCos( float radians, float *sine, float *cosine )
|
||||
fstp DWORD PTR [edx]
|
||||
fstp DWORD PTR [eax]
|
||||
}
|
||||
#elif defined( _LINUX ) || defined ( __APPLE__ )
|
||||
#elif defined( PLATFORM_WINDOWS_PC64 )
|
||||
*sine = sin( radians );
|
||||
*cosine = cos( radians );
|
||||
#elif defined( POSIX )
|
||||
double __cosr, __sinr;
|
||||
__asm __volatile__
|
||||
("fsincos"
|
||||
: "=t" (__cosr), "=u" (__sinr) : "0" (radians));
|
||||
__asm ("fsincos" : "=t" (__cosr), "=u" (__sinr) : "0" (radians));
|
||||
|
||||
*sine = __sinr;
|
||||
*cosine = __cosr;
|
||||
@ -379,11 +432,6 @@ FORCEINLINE T Square( T const &a )
|
||||
}
|
||||
|
||||
|
||||
FORCEINLINE bool IsPowerOfTwo( uint x )
|
||||
{
|
||||
return ( x & ( x - 1 ) ) == 0;
|
||||
}
|
||||
|
||||
// return the smallest power of two >= x.
|
||||
// returns 0 if x == 0 or x > 0x80000000 (ie numbers that would be negative if x was signed)
|
||||
// NOTE: the old code took an int, and if you pass in an int of 0x80000000 casted to a uint,
|
||||
@ -450,6 +498,19 @@ bool MatricesAreEqual( const matrix3x4_t &src1, const matrix3x4_t &src2, float f
|
||||
void MatrixGetColumn( const matrix3x4_t &in, int column, Vector &out );
|
||||
void MatrixSetColumn( const Vector &in, int column, matrix3x4_t &out );
|
||||
|
||||
inline void MatrixGetTranslation( const matrix3x4_t &in, Vector &out )
|
||||
{
|
||||
MatrixGetColumn ( in, 3, out );
|
||||
}
|
||||
|
||||
inline void MatrixSetTranslation( const Vector &in, matrix3x4_t &out )
|
||||
{
|
||||
MatrixSetColumn ( in, 3, out );
|
||||
}
|
||||
|
||||
void MatrixScaleBy ( const float flScale, matrix3x4_t &out );
|
||||
void MatrixScaleByZero ( matrix3x4_t &out );
|
||||
|
||||
//void DecomposeRotation( const matrix3x4_t &mat, float *out );
|
||||
void ConcatRotations (const matrix3x4_t &in1, const matrix3x4_t &in2, matrix3x4_t &out);
|
||||
void ConcatTransforms (const matrix3x4_t &in1, const matrix3x4_t &in2, matrix3x4_t &out);
|
||||
@ -625,15 +686,11 @@ template <class T> FORCEINLINE T AVG(T a, T b)
|
||||
}
|
||||
|
||||
// number of elements in an array of static size
|
||||
#define NELEMS(x) ((sizeof(x))/sizeof(x[0]))
|
||||
#define NELEMS(x) ARRAYSIZE(x)
|
||||
|
||||
// XYZ macro, for printf type functions - ex printf("%f %f %f",XYZ(myvector));
|
||||
#define XYZ(v) (v).x,(v).y,(v).z
|
||||
|
||||
//
|
||||
// Returns a clamped value in the range [min, max].
|
||||
//
|
||||
#define V_clamp(val, min, max) (((val) > (max)) ? (max) : (((val) < (min)) ? (min) : (val)))
|
||||
|
||||
inline float Sign( float x )
|
||||
{
|
||||
@ -1070,14 +1127,14 @@ inline float SimpleSplineRemapValClamped( float val, float A, float B, float C,
|
||||
if ( A == B )
|
||||
return val >= B ? D : C;
|
||||
float cVal = (val - A) / (B - A);
|
||||
cVal = V_clamp( cVal, 0.0f, 1.0f );
|
||||
cVal = clamp( cVal, 0.0f, 1.0f );
|
||||
return C + (D - C) * SimpleSpline( cVal );
|
||||
}
|
||||
|
||||
FORCEINLINE int RoundFloatToInt(float f)
|
||||
{
|
||||
#if defined( _WIN64 )
|
||||
return std::round(f);
|
||||
#if defined(__i386__) || defined(_M_IX86) || defined( PLATFORM_WINDOWS_PC64 ) || defined(__x86_64__)
|
||||
return _mm_cvtss_si32(_mm_load_ss(&f));
|
||||
#elif defined( _X360 )
|
||||
#ifdef Assert
|
||||
Assert( IsFPUControlWordSet() );
|
||||
@ -1089,72 +1146,23 @@ FORCEINLINE int RoundFloatToInt(float f)
|
||||
};
|
||||
flResult = __fctiw( f );
|
||||
return pResult[1];
|
||||
#else // !X360
|
||||
int nResult;
|
||||
#if defined( _WIN32 )
|
||||
__asm
|
||||
{
|
||||
fld f
|
||||
fistp nResult
|
||||
}
|
||||
#elif defined( _LINUX ) || defined( __APPLE__ )
|
||||
__asm __volatile__ (
|
||||
"fistpl %0;": "=m" (nResult): "t" (f) : "st"
|
||||
);
|
||||
#endif
|
||||
return nResult;
|
||||
#else
|
||||
#error Unknown architecture
|
||||
#endif
|
||||
}
|
||||
|
||||
FORCEINLINE unsigned char RoundFloatToByte(float f)
|
||||
{
|
||||
#if defined( _WIN64 )
|
||||
return std::round(f);
|
||||
#elif defined( _X360 )
|
||||
int nResult = RoundFloatToInt(f);
|
||||
#ifdef Assert
|
||||
Assert( IsFPUControlWordSet() );
|
||||
#endif
|
||||
union
|
||||
{
|
||||
double flResult;
|
||||
int pIntResult[2];
|
||||
unsigned char pResult[8];
|
||||
};
|
||||
flResult = __fctiw( f );
|
||||
#ifdef Assert
|
||||
Assert( pIntResult[1] >= 0 && pIntResult[1] <= 255 );
|
||||
#endif
|
||||
return pResult[8];
|
||||
|
||||
#else // !X360
|
||||
|
||||
int nResult;
|
||||
|
||||
#if defined( _WIN32 )
|
||||
__asm
|
||||
{
|
||||
fld f
|
||||
fistp nResult
|
||||
}
|
||||
#elif defined( _LINUX ) || defined( __APPLE__ )
|
||||
__asm __volatile__ (
|
||||
"fistpl %0;": "=m" (nResult): "t" (f) : "st"
|
||||
);
|
||||
#endif
|
||||
|
||||
#ifdef Assert
|
||||
Assert( nResult >= 0 && nResult <= 255 );
|
||||
#endif
|
||||
return nResult;
|
||||
|
||||
Assert( (nResult & ~0xFF) == 0 );
|
||||
#endif
|
||||
return (unsigned char) nResult;
|
||||
}
|
||||
|
||||
FORCEINLINE uint32_t RoundFloatToUnsignedLong(float f)
|
||||
FORCEINLINE unsigned long RoundFloatToUnsignedLong(float f)
|
||||
{
|
||||
#if defined( _WIN64 )
|
||||
return std::round(f);
|
||||
#elif defined( _X360 )
|
||||
#if defined( _X360 )
|
||||
#ifdef Assert
|
||||
Assert( IsFPUControlWordSet() );
|
||||
#endif
|
||||
@ -1162,29 +1170,48 @@ FORCEINLINE uint32_t RoundFloatToUnsignedLong(float f)
|
||||
{
|
||||
double flResult;
|
||||
int pIntResult[2];
|
||||
uint32_t pResult[2];
|
||||
unsigned long pResult[2];
|
||||
};
|
||||
flResult = __fctiw( f );
|
||||
Assert( pIntResult[1] >= 0 );
|
||||
return pResult[1];
|
||||
#else // !X360
|
||||
|
||||
#if defined( PLATFORM_WINDOWS_PC64 )
|
||||
uint nRet = ( uint ) f;
|
||||
if ( nRet & 1 )
|
||||
{
|
||||
if ( ( f - floor( f ) >= 0.5 ) )
|
||||
{
|
||||
nRet++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( ( f - floor( f ) > 0.5 ) )
|
||||
{
|
||||
nRet++;
|
||||
}
|
||||
}
|
||||
return nRet;
|
||||
#else // PLATFORM_WINDOWS_PC64
|
||||
unsigned char nResult[8];
|
||||
|
||||
#if defined( _WIN32 )
|
||||
__asm
|
||||
{
|
||||
fld f
|
||||
fistp qword ptr nResult
|
||||
}
|
||||
#elif defined( _LINUX ) || defined( __APPLE__ )
|
||||
__asm __volatile__ (
|
||||
"fistpl %0;": "=m" (nResult): "t" (f) : "st"
|
||||
);
|
||||
#endif
|
||||
#if defined( _WIN32 )
|
||||
__asm
|
||||
{
|
||||
fld f
|
||||
fistp qword ptr nResult
|
||||
}
|
||||
#elif POSIX
|
||||
__asm __volatile__ (
|
||||
"fistpl %0;": "=m" (nResult): "t" (f) : "st"
|
||||
);
|
||||
#endif
|
||||
|
||||
return *((uint32_t*)nResult);
|
||||
#endif
|
||||
return *((unsigned long*)nResult);
|
||||
#endif // PLATFORM_WINDOWS_PC64
|
||||
#endif // !X360
|
||||
}
|
||||
|
||||
FORCEINLINE bool IsIntegralValue( float flValue, float flTolerance = 0.001f )
|
||||
@ -1195,9 +1222,7 @@ FORCEINLINE bool IsIntegralValue( float flValue, float flTolerance = 0.001f )
|
||||
// Fast, accurate ftol:
|
||||
FORCEINLINE int Float2Int( float a )
|
||||
{
|
||||
#if defined ( _WIN64 )
|
||||
return a;
|
||||
#elif defined( _X360 )
|
||||
#if defined( _X360 )
|
||||
union
|
||||
{
|
||||
double flResult;
|
||||
@ -1206,78 +1231,54 @@ FORCEINLINE int Float2Int( float a )
|
||||
flResult = __fctiwz( a );
|
||||
return pResult[1];
|
||||
#else // !X360
|
||||
|
||||
int RetVal;
|
||||
|
||||
#if defined( _WIN32 )
|
||||
int CtrlwdHolder;
|
||||
int CtrlwdSetter;
|
||||
__asm
|
||||
{
|
||||
fld a // push 'a' onto the FP stack
|
||||
fnstcw CtrlwdHolder // store FPU control word
|
||||
movzx eax, CtrlwdHolder // move and zero extend word into eax
|
||||
and eax, 0xFFFFF3FF // set all bits except rounding bits to 1
|
||||
or eax, 0x00000C00 // set rounding mode bits to round towards zero
|
||||
mov CtrlwdSetter, eax // Prepare to set the rounding mode -- prepare to enter plaid!
|
||||
fldcw CtrlwdSetter // Entering plaid!
|
||||
fistp RetVal // Store and converted (to int) result
|
||||
fldcw CtrlwdHolder // Restore control word
|
||||
}
|
||||
#elif defined( _LINUX ) || defined ( __APPLE__ )
|
||||
RetVal = static_cast<int>( a );
|
||||
#endif
|
||||
|
||||
return RetVal;
|
||||
// Rely on compiler to generate CVTTSS2SI on x86
|
||||
return (int) a;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Over 15x faster than: (int)floor(value)
|
||||
inline int Floor2Int( float a )
|
||||
{
|
||||
#if defined ( _WIN64 )
|
||||
return std::floor(a);
|
||||
#else
|
||||
int RetVal;
|
||||
#if defined( _X360 )
|
||||
RetVal = (int)floor( a );
|
||||
#elif defined( _WIN32 )
|
||||
int CtrlwdHolder;
|
||||
int CtrlwdSetter;
|
||||
__asm
|
||||
{
|
||||
fld a // push 'a' onto the FP stack
|
||||
fnstcw CtrlwdHolder // store FPU control word
|
||||
movzx eax, CtrlwdHolder // move and zero extend word into eax
|
||||
and eax, 0xFFFFF3FF // set all bits except rounding bits to 1
|
||||
or eax, 0x00000400 // set rounding mode bits to round down
|
||||
mov CtrlwdSetter, eax // Prepare to set the rounding mode -- prepare to enter plaid!
|
||||
fldcw CtrlwdSetter // Entering plaid!
|
||||
fistp RetVal // Store floored and converted (to int) result
|
||||
fldcw CtrlwdHolder // Restore control word
|
||||
}
|
||||
#elif defined( _LINUX ) || defined( __APPLE__ )
|
||||
#if defined( __i386__ )
|
||||
// Convert to int and back, compare, subtract one if too big
|
||||
__m128 a128 = _mm_set_ss(a);
|
||||
RetVal = _mm_cvtss_si32(a128);
|
||||
__m128 rounded128 = _mm_cvt_si2ss(_mm_setzero_ps(), RetVal);
|
||||
RetVal -= _mm_comigt_ss( rounded128, a128 );
|
||||
#else
|
||||
RetVal = static_cast<int>( floor(a) );
|
||||
#endif
|
||||
return RetVal;
|
||||
#endif // _WIN64
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Fast color conversion from float to unsigned char
|
||||
//-----------------------------------------------------------------------------
|
||||
FORCEINLINE unsigned char FastFToC( float c )
|
||||
FORCEINLINE unsigned int FastFToC( float c )
|
||||
{
|
||||
volatile float dc;
|
||||
|
||||
// ieee trick
|
||||
dc = c * 255.0f + (float)(1 << 23);
|
||||
|
||||
// return the lsb
|
||||
#if defined( _X360 )
|
||||
return ((unsigned char*)&dc)[3];
|
||||
#if defined( __i386__ )
|
||||
// IEEE float bit manipulation works for values between [0, 1<<23)
|
||||
union { float f; int i; } convert = { c*255.0f + (float)(1<<23) };
|
||||
return convert.i & 255;
|
||||
#else
|
||||
return *(unsigned char*)&dc;
|
||||
// consoles CPUs suffer from load-hit-store penalty
|
||||
return Float2Int( c * 255.0f );
|
||||
#endif
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Fast conversion from float to integer with magnitude less than 2**22
|
||||
//-----------------------------------------------------------------------------
|
||||
FORCEINLINE int FastFloatToSmallInt( float c )
|
||||
{
|
||||
#if defined( __i386__ )
|
||||
// IEEE float bit manipulation works for values between [-1<<22, 1<<22)
|
||||
union { float f; int i; } convert = { c + (float)(3<<22) };
|
||||
return (convert.i & ((1<<23)-1)) - (1<<22);
|
||||
#else
|
||||
// consoles CPUs suffer from load-hit-store penalty
|
||||
return Float2Int( c );
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1289,39 +1290,23 @@ FORCEINLINE unsigned char FastFToC( float c )
|
||||
inline float ClampToMsec( float in )
|
||||
{
|
||||
int msec = Floor2Int( in * 1000.0f + 0.5f );
|
||||
return msec / 1000.0f;
|
||||
return 0.001f * msec;
|
||||
}
|
||||
|
||||
// Over 15x faster than: (int)ceil(value)
|
||||
inline int Ceil2Int( float a )
|
||||
{
|
||||
#if defined ( _WIN64 )
|
||||
return std::ceil(a);
|
||||
#else
|
||||
int RetVal;
|
||||
|
||||
#if defined( _X360 )
|
||||
RetVal = (int)ceil( a );
|
||||
#elif defined( _WIN32 )
|
||||
int CtrlwdHolder;
|
||||
int CtrlwdSetter;
|
||||
__asm
|
||||
{
|
||||
fld a // push 'a' onto the FP stack
|
||||
fnstcw CtrlwdHolder // store FPU control word
|
||||
movzx eax, CtrlwdHolder // move and zero extend word into eax
|
||||
and eax, 0xFFFFF3FF // set all bits except rounding bits to 1
|
||||
or eax, 0x00000800 // set rounding mode bits to round down
|
||||
mov CtrlwdSetter, eax // Prepare to set the rounding mode -- prepare to enter plaid!
|
||||
fldcw CtrlwdSetter // Entering plaid!
|
||||
fistp RetVal // Store floored and converted (to int) result
|
||||
fldcw CtrlwdHolder // Restore control word
|
||||
}
|
||||
#elif defined( _LINUX ) || defined( __APPLE__ )
|
||||
RetVal = static_cast<int>( ceil(a) );
|
||||
#if defined( __i386__ )
|
||||
// Convert to int and back, compare, add one if too small
|
||||
__m128 a128 = _mm_load_ss(&a);
|
||||
RetVal = _mm_cvtss_si32(a128);
|
||||
__m128 rounded128 = _mm_cvt_si2ss(_mm_setzero_ps(), RetVal);
|
||||
RetVal += _mm_comilt_ss( rounded128, a128 );
|
||||
#else
|
||||
RetVal = static_cast<int>( ceil(a) );
|
||||
#endif
|
||||
return RetVal;
|
||||
#endif // _WIN64
|
||||
}
|
||||
|
||||
|
||||
@ -1436,7 +1421,7 @@ FORCEINLINE unsigned char LinearToLightmap( float f )
|
||||
|
||||
FORCEINLINE void ColorClamp( Vector& color )
|
||||
{
|
||||
float maxc = MAX( color.x, MAX( color.y, color.z ) );
|
||||
float maxc = V_max( color.x, V_max( color.y, color.z ) );
|
||||
if ( maxc > 1.0f )
|
||||
{
|
||||
float ooMax = 1.0f / maxc;
|
||||
@ -1565,7 +1550,7 @@ float Hermite_Spline(
|
||||
float t );
|
||||
|
||||
|
||||
void Hermite_SplineBasis( float t, float basis[4] );
|
||||
void Hermite_SplineBasis( float t, float basis[] );
|
||||
|
||||
void Hermite_Spline(
|
||||
const Quaternion &q0,
|
||||
@ -1932,10 +1917,10 @@ FORCEINLINE unsigned int * PackNormal_SHORT2( float nx, float ny, float nz, unsi
|
||||
ny *= 16384.0f;
|
||||
|
||||
// '0' and '32768' values are invalid encodings
|
||||
nx = MAX( nx, 1.0f ); // Make sure there are no zero values
|
||||
ny = MAX( ny, 1.0f );
|
||||
nx = MIN( nx, 32767.0f ); // Make sure there are no 32768 values
|
||||
ny = MIN( ny, 32767.0f );
|
||||
nx = V_max( nx, 1.0f ); // Make sure there are no zero values
|
||||
ny = V_max( ny, 1.0f );
|
||||
nx = V_min( nx, 32767.0f ); // Make sure there are no 32768 values
|
||||
ny = V_min( ny, 32767.0f );
|
||||
|
||||
if ( nz < 0.0f )
|
||||
nx = -nx; // Set the sign bit for z
|
||||
@ -2085,6 +2070,46 @@ void RGBtoHSV( const Vector &rgb, Vector &hsv );
|
||||
void HSVtoRGB( const Vector &hsv, Vector &rgb );
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Fast version of pow and log
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
float FastLog2(float i); // log2( i )
|
||||
float FastPow2(float i); // 2^i
|
||||
float FastPow(float a, float b); // a^b
|
||||
float FastPow10( float i ); // 10^i
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// For testing float equality
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
inline bool CloseEnough( float a, float b, float epsilon = EQUAL_EPSILON )
|
||||
{
|
||||
return fabs( a - b ) <= epsilon;
|
||||
}
|
||||
|
||||
inline bool CloseEnough( const Vector &a, const Vector &b, float epsilon = EQUAL_EPSILON )
|
||||
{
|
||||
return fabs( a.x - b.x ) <= epsilon &&
|
||||
fabs( a.y - b.y ) <= epsilon &&
|
||||
fabs( a.z - b.z ) <= epsilon;
|
||||
}
|
||||
|
||||
// Fast compare
|
||||
// maxUlps is the maximum error in terms of Units in the Last Place. This
|
||||
// specifies how big an error we are willing to accept in terms of the value
|
||||
// of the least significant digit of the floating point number’s
|
||||
// representation. maxUlps can also be interpreted in terms of how many
|
||||
// representable floats we are willing to accept between A and B.
|
||||
// This function will allow maxUlps-1 floats between A and B.
|
||||
bool AlmostEqual(float a, float b, int maxUlps = 10);
|
||||
|
||||
inline bool AlmostEqual( const Vector &a, const Vector &b, int maxUlps = 10)
|
||||
{
|
||||
return AlmostEqual( a.x, b.x, maxUlps ) &&
|
||||
AlmostEqual( a.y, b.y, maxUlps ) &&
|
||||
AlmostEqual( a.z, b.z, maxUlps );
|
||||
}
|
||||
|
||||
#endif // MATH_BASE_H
|
||||
|
||||
|
385
public/mathlib/matrixmath.h
Normal file
385
public/mathlib/matrixmath.h
Normal file
@ -0,0 +1,385 @@
|
||||
//========= Copyright Valve Corporation, All rights reserved. ============//
|
||||
//
|
||||
// Purpose:
|
||||
//
|
||||
// A set of generic, template-based matrix functions.
|
||||
//===========================================================================//
|
||||
|
||||
#ifndef MATRIXMATH_H
|
||||
#define MATRIXMATH_H
|
||||
|
||||
#include <stdarg.h>
|
||||
|
||||
// The operations in this file can perform basic matrix operations on matrices represented
|
||||
// using any class that supports the necessary operations:
|
||||
//
|
||||
// .Element( row, col ) - return the element at a given matrox position
|
||||
// .SetElement( row, col, val ) - modify an element
|
||||
// .Width(), .Height() - get dimensions
|
||||
// .SetDimensions( nrows, ncols) - set a matrix to be un-initted and the appropriate size
|
||||
//
|
||||
// Generally, vectors can be used with these functions by using N x 1 matrices to represent them.
|
||||
// Matrices are addressed as row, column, and indices are 0-based
|
||||
//
|
||||
//
|
||||
// Note that the template versions of these routines are defined for generality - it is expected
|
||||
// that template specialization is used for common high performance cases.
|
||||
|
||||
namespace MatrixMath
|
||||
{
|
||||
/// M *= flScaleValue
|
||||
template<class MATRIXCLASS>
|
||||
void ScaleMatrix( MATRIXCLASS &matrix, float flScaleValue )
|
||||
{
|
||||
for( int i = 0; i < matrix.Height(); i++ )
|
||||
{
|
||||
for( int j = 0; j < matrix.Width(); j++ )
|
||||
{
|
||||
matrix.SetElement( i, j, flScaleValue * matrix.Element( i, j ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// AppendElementToMatrix - same as setting the element, except only works when all calls
|
||||
/// happen in top to bottom left to right order, end you have to call FinishedAppending when
|
||||
/// done. For normal matrix classes this is not different then SetElement, but for
|
||||
/// CSparseMatrix, it is an accelerated way to fill a matrix from scratch.
|
||||
template<class MATRIXCLASS>
|
||||
FORCEINLINE void AppendElement( MATRIXCLASS &matrix, int nRow, int nCol, float flValue )
|
||||
{
|
||||
matrix.SetElement( nRow, nCol, flValue ); // default implementation
|
||||
}
|
||||
|
||||
template<class MATRIXCLASS>
|
||||
FORCEINLINE void FinishedAppending( MATRIXCLASS &matrix ) {} // default implementation
|
||||
|
||||
/// M += fl
|
||||
template<class MATRIXCLASS>
|
||||
void AddToMatrix( MATRIXCLASS &matrix, float flAddend )
|
||||
{
|
||||
for( int i = 0; i < matrix.Height(); i++ )
|
||||
{
|
||||
for( int j = 0; j < matrix.Width(); j++ )
|
||||
{
|
||||
matrix.SetElement( i, j, flAddend + matrix.Element( i, j ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// transpose
|
||||
template<class MATRIXCLASSIN, class MATRIXCLASSOUT>
|
||||
void TransposeMatrix( MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut )
|
||||
{
|
||||
pMatrixOut->SetDimensions( matrixIn.Width(), matrixIn.Height() );
|
||||
for( int i = 0; i < pMatrixOut->Height(); i++ )
|
||||
{
|
||||
for( int j = 0; j < pMatrixOut->Width(); j++ )
|
||||
{
|
||||
AppendElement( *pMatrixOut, i, j, matrixIn.Element( j, i ) );
|
||||
}
|
||||
}
|
||||
FinishedAppending( *pMatrixOut );
|
||||
}
|
||||
|
||||
/// copy
|
||||
template<class MATRIXCLASSIN, class MATRIXCLASSOUT>
|
||||
void CopyMatrix( MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut )
|
||||
{
|
||||
pMatrixOut->SetDimensions( matrixIn.Height(), matrixIn.Width() );
|
||||
for( int i = 0; i < matrixIn.Height(); i++ )
|
||||
{
|
||||
for( int j = 0; j < matrixIn.Width(); j++ )
|
||||
{
|
||||
AppendElement( *pMatrixOut, i, j, matrixIn.Element( i, j ) );
|
||||
}
|
||||
}
|
||||
FinishedAppending( *pMatrixOut );
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// M+=M
|
||||
template<class MATRIXCLASSIN, class MATRIXCLASSOUT>
|
||||
void AddMatrixToMatrix( MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut )
|
||||
{
|
||||
for( int i = 0; i < matrixIn.Height(); i++ )
|
||||
{
|
||||
for( int j = 0; j < matrixIn.Width(); j++ )
|
||||
{
|
||||
pMatrixOut->SetElement( i, j, pMatrixOut->Element( i, j ) + matrixIn.Element( i, j ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// M += scale * M
|
||||
template<class MATRIXCLASSIN, class MATRIXCLASSOUT>
|
||||
void AddScaledMatrixToMatrix( float flScale, MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut )
|
||||
{
|
||||
for( int i = 0; i < matrixIn.Height(); i++ )
|
||||
{
|
||||
for( int j = 0; j < matrixIn.Width(); j++ )
|
||||
{
|
||||
pMatrixOut->SetElement( i, j, pMatrixOut->Element( i, j ) + flScale * matrixIn.Element( i, j ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// simple way to initialize a matrix with constants from code.
|
||||
template<class MATRIXCLASSOUT>
|
||||
void SetMatrixToIdentity( MATRIXCLASSOUT *pMatrixOut, float flDiagonalValue = 1.0 )
|
||||
{
|
||||
for( int i = 0; i < pMatrixOut->Height(); i++ )
|
||||
{
|
||||
for( int j = 0; j < pMatrixOut->Width(); j++ )
|
||||
{
|
||||
AppendElement( *pMatrixOut, i, j, ( i == j ) ? flDiagonalValue : 0 );
|
||||
}
|
||||
}
|
||||
FinishedAppending( *pMatrixOut );
|
||||
}
|
||||
|
||||
//// simple way to initialize a matrix with constants from code
|
||||
template<class MATRIXCLASSOUT>
|
||||
void SetMatrixValues( MATRIXCLASSOUT *pMatrix, int nRows, int nCols, ... )
|
||||
{
|
||||
va_list argPtr;
|
||||
va_start( argPtr, nCols );
|
||||
|
||||
pMatrix->SetDimensions( nRows, nCols );
|
||||
for( int nRow = 0; nRow < nRows; nRow++ )
|
||||
{
|
||||
for( int nCol = 0; nCol < nCols; nCol++ )
|
||||
{
|
||||
double flNewValue = va_arg( argPtr, double );
|
||||
pMatrix->SetElement( nRow, nCol, flNewValue );
|
||||
}
|
||||
}
|
||||
va_end( argPtr );
|
||||
}
|
||||
|
||||
|
||||
/// row and colum accessors. treat a row or a column as a column vector
|
||||
template<class MATRIXTYPE> class MatrixRowAccessor
|
||||
{
|
||||
public:
|
||||
FORCEINLINE MatrixRowAccessor( MATRIXTYPE const &matrix, int nRow )
|
||||
{
|
||||
m_pMatrix = &matrix;
|
||||
m_nRow = nRow;
|
||||
}
|
||||
|
||||
FORCEINLINE float Element( int nRow, int nCol ) const
|
||||
{
|
||||
Assert( nCol == 0 );
|
||||
return m_pMatrix->Element( m_nRow, nRow );
|
||||
}
|
||||
|
||||
FORCEINLINE int Width( void ) const { return 1; };
|
||||
FORCEINLINE int Height( void ) const { return m_pMatrix->Width(); }
|
||||
|
||||
private:
|
||||
MATRIXTYPE const *m_pMatrix;
|
||||
int m_nRow;
|
||||
};
|
||||
|
||||
template<class MATRIXTYPE> class MatrixColumnAccessor
|
||||
{
|
||||
public:
|
||||
FORCEINLINE MatrixColumnAccessor( MATRIXTYPE const &matrix, int nColumn )
|
||||
{
|
||||
m_pMatrix = &matrix;
|
||||
m_nColumn = nColumn;
|
||||
}
|
||||
|
||||
FORCEINLINE float Element( int nRow, int nColumn ) const
|
||||
{
|
||||
Assert( nColumn == 0 );
|
||||
return m_pMatrix->Element( nRow, m_nColumn );
|
||||
}
|
||||
|
||||
FORCEINLINE int Width( void ) const { return 1; }
|
||||
FORCEINLINE int Height( void ) const { return m_pMatrix->Height(); }
|
||||
private:
|
||||
MATRIXTYPE const *m_pMatrix;
|
||||
int m_nColumn;
|
||||
};
|
||||
|
||||
/// this translator acts as a proxy for the transposed matrix
|
||||
template<class MATRIXTYPE> class MatrixTransposeAccessor
|
||||
{
|
||||
public:
|
||||
FORCEINLINE MatrixTransposeAccessor( MATRIXTYPE const & matrix )
|
||||
{
|
||||
m_pMatrix = &matrix;
|
||||
}
|
||||
|
||||
FORCEINLINE float Element( int nRow, int nColumn ) const
|
||||
{
|
||||
return m_pMatrix->Element( nColumn, nRow );
|
||||
}
|
||||
|
||||
FORCEINLINE int Width( void ) const { return m_pMatrix->Height(); }
|
||||
FORCEINLINE int Height( void ) const { return m_pMatrix->Width(); }
|
||||
private:
|
||||
MATRIXTYPE const *m_pMatrix;
|
||||
};
|
||||
|
||||
/// this tranpose returns a wrapper around it's argument, allowing things like AddMatrixToMatrix( Transpose( matA ), &matB ) without an extra copy
|
||||
template<class MATRIXCLASSIN>
|
||||
MatrixTransposeAccessor<MATRIXCLASSIN> TransposeMatrix( MATRIXCLASSIN const &matrixIn )
|
||||
{
|
||||
return MatrixTransposeAccessor<MATRIXCLASSIN>( matrixIn );
|
||||
}
|
||||
|
||||
|
||||
/// retrieve rows and columns
|
||||
template<class MATRIXTYPE>
|
||||
FORCEINLINE MatrixColumnAccessor<MATRIXTYPE> MatrixColumn( MATRIXTYPE const &matrix, int nColumn )
|
||||
{
|
||||
return MatrixColumnAccessor<MATRIXTYPE>( matrix, nColumn );
|
||||
}
|
||||
|
||||
template<class MATRIXTYPE>
|
||||
FORCEINLINE MatrixRowAccessor<MATRIXTYPE> MatrixRow( MATRIXTYPE const &matrix, int nRow )
|
||||
{
|
||||
return MatrixRowAccessor<MATRIXTYPE>( matrix, nRow );
|
||||
}
|
||||
|
||||
//// dot product between vectors (or rows and/or columns via accessors)
|
||||
template<class MATRIXACCESSORATYPE, class MATRIXACCESSORBTYPE >
|
||||
float InnerProduct( MATRIXACCESSORATYPE const &vecA, MATRIXACCESSORBTYPE const &vecB )
|
||||
{
|
||||
Assert( vecA.Width() == 1 );
|
||||
Assert( vecB.Width() == 1 );
|
||||
Assert( vecA.Height() == vecB.Height() );
|
||||
double flResult = 0;
|
||||
for( int i = 0; i < vecA.Height(); i++ )
|
||||
{
|
||||
flResult += vecA.Element( i, 0 ) * vecB.Element( i, 0 );
|
||||
}
|
||||
return flResult;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// matrix x matrix multiplication
|
||||
template<class MATRIXATYPE, class MATRIXBTYPE, class MATRIXOUTTYPE>
|
||||
void MatrixMultiply( MATRIXATYPE const &matA, MATRIXBTYPE const &matB, MATRIXOUTTYPE *pMatrixOut )
|
||||
{
|
||||
Assert( matA.Width() == matB.Height() );
|
||||
pMatrixOut->SetDimensions( matA.Height(), matB.Width() );
|
||||
for( int i = 0; i < matA.Height(); i++ )
|
||||
{
|
||||
for( int j = 0; j < matB.Width(); j++ )
|
||||
{
|
||||
pMatrixOut->SetElement( i, j, InnerProduct( MatrixRow( matA, i ), MatrixColumn( matB, j ) ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// solve Ax=B via the conjugate graident method. Code and naming conventions based on the
|
||||
/// wikipedia article.
|
||||
template<class ATYPE, class XTYPE, class BTYPE>
|
||||
void ConjugateGradient( ATYPE const &matA, BTYPE const &vecB, XTYPE &vecX, float flTolerance = 1.0e-20 )
|
||||
{
|
||||
XTYPE vecR;
|
||||
vecR.SetDimensions( vecX.Height(), 1 );
|
||||
MatrixMultiply( matA, vecX, &vecR );
|
||||
ScaleMatrix( vecR, -1 );
|
||||
AddMatrixToMatrix( vecB, &vecR );
|
||||
XTYPE vecP;
|
||||
CopyMatrix( vecR, &vecP );
|
||||
float flRsOld = InnerProduct( vecR, vecR );
|
||||
for( int nIter = 0; nIter < 100; nIter++ )
|
||||
{
|
||||
XTYPE vecAp;
|
||||
MatrixMultiply( matA, vecP, &vecAp );
|
||||
float flDivisor = InnerProduct( vecAp, vecP );
|
||||
float flAlpha = flRsOld / flDivisor;
|
||||
AddScaledMatrixToMatrix( flAlpha, vecP, &vecX );
|
||||
AddScaledMatrixToMatrix( -flAlpha, vecAp, &vecR );
|
||||
float flRsNew = InnerProduct( vecR, vecR );
|
||||
if ( flRsNew < flTolerance )
|
||||
{
|
||||
break;
|
||||
}
|
||||
ScaleMatrix( vecP, flRsNew / flRsOld );
|
||||
AddMatrixToMatrix( vecR, &vecP );
|
||||
flRsOld = flRsNew;
|
||||
}
|
||||
}
|
||||
|
||||
/// solve (A'*A) x=B via the conjugate gradient method. Code and naming conventions based on
|
||||
/// the wikipedia article. Same as Conjugate gradient but allows passing in two matrices whose
|
||||
/// product is used as the A matrix (in order to preserve sparsity)
|
||||
template<class ATYPE, class APRIMETYPE, class XTYPE, class BTYPE>
|
||||
void ConjugateGradient( ATYPE const &matA, APRIMETYPE const &matAPrime, BTYPE const &vecB, XTYPE &vecX, float flTolerance = 1.0e-20 )
|
||||
{
|
||||
XTYPE vecR1;
|
||||
vecR1.SetDimensions( vecX.Height(), 1 );
|
||||
MatrixMultiply( matA, vecX, &vecR1 );
|
||||
XTYPE vecR;
|
||||
vecR.SetDimensions( vecR1.Height(), 1 );
|
||||
MatrixMultiply( matAPrime, vecR1, &vecR );
|
||||
ScaleMatrix( vecR, -1 );
|
||||
AddMatrixToMatrix( vecB, &vecR );
|
||||
XTYPE vecP;
|
||||
CopyMatrix( vecR, &vecP );
|
||||
float flRsOld = InnerProduct( vecR, vecR );
|
||||
for( int nIter = 0; nIter < 100; nIter++ )
|
||||
{
|
||||
XTYPE vecAp1;
|
||||
MatrixMultiply( matA, vecP, &vecAp1 );
|
||||
XTYPE vecAp;
|
||||
MatrixMultiply( matAPrime, vecAp1, &vecAp );
|
||||
float flDivisor = InnerProduct( vecAp, vecP );
|
||||
float flAlpha = flRsOld / flDivisor;
|
||||
AddScaledMatrixToMatrix( flAlpha, vecP, &vecX );
|
||||
AddScaledMatrixToMatrix( -flAlpha, vecAp, &vecR );
|
||||
float flRsNew = InnerProduct( vecR, vecR );
|
||||
if ( flRsNew < flTolerance )
|
||||
{
|
||||
break;
|
||||
}
|
||||
ScaleMatrix( vecP, flRsNew / flRsOld );
|
||||
AddMatrixToMatrix( vecR, &vecP );
|
||||
flRsOld = flRsNew;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class ATYPE, class XTYPE, class BTYPE>
|
||||
void LeastSquaresFit( ATYPE const &matA, BTYPE const &vecB, XTYPE &vecX )
|
||||
{
|
||||
// now, generate the normal equations
|
||||
BTYPE vecBeta;
|
||||
MatrixMath::MatrixMultiply( MatrixMath::TransposeMatrix( matA ), vecB, &vecBeta );
|
||||
|
||||
vecX.SetDimensions( matA.Width(), 1 );
|
||||
MatrixMath::SetMatrixToIdentity( &vecX );
|
||||
|
||||
ATYPE matATransposed;
|
||||
TransposeMatrix( matA, &matATransposed );
|
||||
ConjugateGradient( matA, matATransposed, vecBeta, vecX, 1.0e-20 );
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/// a simple fixed-size matrix class
|
||||
template<int NUMROWS, int NUMCOLS> class CFixedMatrix
|
||||
{
|
||||
public:
|
||||
FORCEINLINE int Width( void ) const { return NUMCOLS; }
|
||||
FORCEINLINE int Height( void ) const { return NUMROWS; }
|
||||
FORCEINLINE float Element( int nRow, int nCol ) const { return m_flValues[nRow][nCol]; }
|
||||
FORCEINLINE void SetElement( int nRow, int nCol, float flValue ) { m_flValues[nRow][nCol] = flValue; }
|
||||
FORCEINLINE void SetDimensions( int nNumRows, int nNumCols ) { Assert( ( nNumRows == NUMROWS ) && ( nNumCols == NUMCOLS ) ); }
|
||||
|
||||
private:
|
||||
float m_flValues[NUMROWS][NUMCOLS];
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif //matrixmath_h
|
@ -1,4 +1,4 @@
|
||||
//===== Copyright © 1996-2005, Valve Corporation, All rights reserved. ======//
|
||||
//===== Copyright <EFBFBD> 1996-2005, Valve Corporation, All rights reserved. ======//
|
||||
//
|
||||
// Purpose: - defines SIMD "structure of arrays" classes and functions.
|
||||
//
|
||||
@ -15,7 +15,7 @@
|
||||
#include <mathlib/vector.h>
|
||||
#include <mathlib/mathlib.h>
|
||||
|
||||
#if defined(_LINUX) || defined(__APPLE__)
|
||||
#if defined(GNUC)
|
||||
#define USE_STDC_FOR_SIMD 0
|
||||
#else
|
||||
#define USE_STDC_FOR_SIMD 0
|
||||
@ -108,7 +108,7 @@ struct ALIGN16 intx4
|
||||
m_i32[2] == other.m_i32[2] &&
|
||||
m_i32[3] == other.m_i32[3] ;
|
||||
}
|
||||
};
|
||||
} ALIGN16_POST;
|
||||
|
||||
|
||||
#if defined( _DEBUG ) && defined( _X360 )
|
||||
@ -136,13 +136,13 @@ FORCEINLINE void TestVPUFlags() {}
|
||||
// miss.)
|
||||
#ifndef _X360
|
||||
extern const fltx4 Four_Zeros; // 0 0 0 0
|
||||
extern const fltx4 Four_Ones; // 1 1 1 1
|
||||
extern const fltx4 Four_Twos; // 2 2 2 2
|
||||
extern const fltx4 Four_Ones; // 1 1 1 1
|
||||
extern const fltx4 Four_Twos; // 2 2 2 2
|
||||
extern const fltx4 Four_Threes; // 3 3 3 3
|
||||
extern const fltx4 Four_Fours; // guess.
|
||||
extern const fltx4 Four_Point225s; // .225 .225 .225 .225
|
||||
extern const fltx4 Four_PointFives; // .5 .5 .5 .5
|
||||
extern const fltx4 Four_Epsilons; // FLT_EPSILON FLT_EPSILON FLT_EPSILON FLT_EPSILON
|
||||
extern const fltx4 Four_Epsilons; // FLT_EPSILON FLT_EPSILON FLT_EPSILON FLT_EPSILON
|
||||
extern const fltx4 Four_2ToThe21s; // (1<<21)..
|
||||
extern const fltx4 Four_2ToThe22s; // (1<<22)..
|
||||
extern const fltx4 Four_2ToThe23s; // (1<<23)..
|
||||
@ -157,7 +157,7 @@ extern const fltx4 Four_Threes; // 3 3 3 3
|
||||
extern const fltx4 Four_Fours; // guess.
|
||||
extern const fltx4 Four_Point225s; // .225 .225 .225 .225
|
||||
extern const fltx4 Four_PointFives; // .5 .5 .5 .5
|
||||
extern const fltx4 Four_Epsilons; // FLT_EPSILON FLT_EPSILON FLT_EPSILON FLT_EPSILON
|
||||
extern const fltx4 Four_Epsilons; // FLT_EPSILON FLT_EPSILON FLT_EPSILON FLT_EPSILON
|
||||
extern const fltx4 Four_2ToThe21s; // (1<<21)..
|
||||
extern const fltx4 Four_2ToThe22s; // (1<<22)..
|
||||
extern const fltx4 Four_2ToThe23s; // (1<<23)..
|
||||
@ -167,20 +167,20 @@ extern const fltx4 Four_NegativeOnes; // -1 -1 -1 -1
|
||||
#endif
|
||||
extern const fltx4 Four_FLT_MAX; // FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX
|
||||
extern const fltx4 Four_Negative_FLT_MAX; // -FLT_MAX, -FLT_MAX, -FLT_MAX, -FLT_MAX
|
||||
extern const fltx4 g_SIMD_0123; // 0 1 2 3 as float
|
||||
extern const fltx4 g_SIMD_0123; // 0 1 2 3 as float
|
||||
|
||||
// external aligned integer constants
|
||||
extern const ALIGN16 int32 g_SIMD_clear_signmask[]; // 0x7fffffff x 4
|
||||
extern const ALIGN16 int32 g_SIMD_signmask[]; // 0x80000000 x 4
|
||||
extern const ALIGN16 int32 g_SIMD_lsbmask[]; // 0xfffffffe x 4
|
||||
extern const ALIGN16 int32 g_SIMD_clear_wmask[]; // -1 -1 -1 0
|
||||
extern const ALIGN16 int32 g_SIMD_ComponentMask[4][4]; // [0xFFFFFFFF 0 0 0], [0 0xFFFFFFFF 0 0], [0 0 0xFFFFFFFF 0], [0 0 0 0xFFFFFFFF]
|
||||
extern const ALIGN16 int32 g_SIMD_AllOnesMask[]; // ~0,~0,~0,~0
|
||||
extern const ALIGN16 int32 g_SIMD_Low16BitsMask[]; // 0xffff x 4
|
||||
extern const ALIGN16 int32 g_SIMD_clear_signmask[] ALIGN16_POST; // 0x7fffffff x 4
|
||||
extern const ALIGN16 int32 g_SIMD_signmask[] ALIGN16_POST; // 0x80000000 x 4
|
||||
extern const ALIGN16 int32 g_SIMD_lsbmask[] ALIGN16_POST; // 0xfffffffe x 4
|
||||
extern const ALIGN16 int32 g_SIMD_clear_wmask[] ALIGN16_POST; // -1 -1 -1 0
|
||||
extern const ALIGN16 int32 g_SIMD_ComponentMask[4][4] ALIGN16_POST; // [0xFFFFFFFF 0 0 0], [0 0xFFFFFFFF 0 0], [0 0 0xFFFFFFFF 0], [0 0 0 0xFFFFFFFF]
|
||||
extern const ALIGN16 int32 g_SIMD_AllOnesMask[] ALIGN16_POST; // ~0,~0,~0,~0
|
||||
extern const ALIGN16 int32 g_SIMD_Low16BitsMask[] ALIGN16_POST; // 0xffff x 4
|
||||
|
||||
// this mask is used for skipping the tail of things. If you have N elements in an array, and wish
|
||||
// to mask out the tail, g_SIMD_SkipTailMask[N & 3] what you want to use for the last iteration.
|
||||
extern const int32 ALIGN16 g_SIMD_SkipTailMask[4][4];
|
||||
extern const int32 ALIGN16 g_SIMD_SkipTailMask[4][4] ALIGN16_POST;
|
||||
|
||||
// Define prefetch macros.
|
||||
// The characteristics of cache and prefetch are completely
|
||||
@ -436,23 +436,23 @@ FORCEINLINE fltx4 ArcTan2SIMD( const fltx4 &a, const fltx4 &b )
|
||||
return result;
|
||||
}
|
||||
|
||||
FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // MAX(a,b)
|
||||
FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // max(a,b)
|
||||
{
|
||||
fltx4 retVal;
|
||||
SubFloat( retVal, 0 ) = MAX( SubFloat( a, 0 ), SubFloat( b, 0 ) );
|
||||
SubFloat( retVal, 1 ) = MAX( SubFloat( a, 1 ), SubFloat( b, 1 ) );
|
||||
SubFloat( retVal, 2 ) = MAX( SubFloat( a, 2 ), SubFloat( b, 2 ) );
|
||||
SubFloat( retVal, 3 ) = MAX( SubFloat( a, 3 ), SubFloat( b, 3 ) );
|
||||
SubFloat( retVal, 0 ) = max( SubFloat( a, 0 ), SubFloat( b, 0 ) );
|
||||
SubFloat( retVal, 1 ) = max( SubFloat( a, 1 ), SubFloat( b, 1 ) );
|
||||
SubFloat( retVal, 2 ) = max( SubFloat( a, 2 ), SubFloat( b, 2 ) );
|
||||
SubFloat( retVal, 3 ) = max( SubFloat( a, 3 ), SubFloat( b, 3 ) );
|
||||
return retVal;
|
||||
}
|
||||
|
||||
FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // MIN(a,b)
|
||||
FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // min(a,b)
|
||||
{
|
||||
fltx4 retVal;
|
||||
SubFloat( retVal, 0 ) = MIN( SubFloat( a, 0 ), SubFloat( b, 0 ) );
|
||||
SubFloat( retVal, 1 ) = MIN( SubFloat( a, 1 ), SubFloat( b, 1 ) );
|
||||
SubFloat( retVal, 2 ) = MIN( SubFloat( a, 2 ), SubFloat( b, 2 ) );
|
||||
SubFloat( retVal, 3 ) = MIN( SubFloat( a, 3 ), SubFloat( b, 3 ) );
|
||||
SubFloat( retVal, 0 ) = min( SubFloat( a, 0 ), SubFloat( b, 0 ) );
|
||||
SubFloat( retVal, 1 ) = min( SubFloat( a, 1 ), SubFloat( b, 1 ) );
|
||||
SubFloat( retVal, 2 ) = min( SubFloat( a, 2 ), SubFloat( b, 2 ) );
|
||||
SubFloat( retVal, 3 ) = min( SubFloat( a, 3 ), SubFloat( b, 3 ) );
|
||||
return retVal;
|
||||
}
|
||||
|
||||
@ -858,7 +858,7 @@ FORCEINLINE void TransposeSIMD( fltx4 & x, fltx4 & y, fltx4 & z, fltx4 & w )
|
||||
// and replicate it to the whole return value.
|
||||
FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 & a )
|
||||
{
|
||||
float lowest = MIN( MIN( SubFloat(a, 0), SubFloat(a, 1) ), SubFloat(a, 2));
|
||||
float lowest = min( min( SubFloat(a, 0), SubFloat(a, 1) ), SubFloat(a, 2));
|
||||
return ReplicateX4(lowest);
|
||||
}
|
||||
|
||||
@ -866,7 +866,7 @@ FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 & a )
|
||||
// and replicate it to the whole return value.
|
||||
FORCEINLINE fltx4 FindHighestSIMD3( const fltx4 & a )
|
||||
{
|
||||
float highest = MAX( MAX( SubFloat(a, 0), SubFloat(a, 1) ), SubFloat(a, 2));
|
||||
float highest = max( max( SubFloat(a, 0), SubFloat(a, 1) ), SubFloat(a, 2));
|
||||
return ReplicateX4(highest);
|
||||
}
|
||||
|
||||
@ -1067,12 +1067,12 @@ FORCEINLINE fltx4 ArcTan2SIMD( const fltx4 &a, const fltx4 &b )
|
||||
|
||||
// DivSIMD defined further down, since it uses ReciprocalSIMD
|
||||
|
||||
FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // MAX(a,b)
|
||||
FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // max(a,b)
|
||||
{
|
||||
return __vmaxfp( a, b );
|
||||
}
|
||||
|
||||
FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // MIN(a,b)
|
||||
FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // min(a,b)
|
||||
{
|
||||
return __vminfp( a, b );
|
||||
}
|
||||
@ -1520,11 +1520,11 @@ FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 & a )
|
||||
compareOne = __vrlimi( compareOne, a, 8 | 4 , 1 );
|
||||
// compareOne is [y,z,G,G]
|
||||
fltx4 retval = MinSIMD( a, compareOne );
|
||||
// retVal is [MIN(x,y), MIN(y,z), G, G]
|
||||
// retVal is [min(x,y), min(y,z), G, G]
|
||||
compareOne = __vrlimi( compareOne, a, 8 , 2);
|
||||
// compareOne is [z, G, G, G]
|
||||
retval = MinSIMD( retval, compareOne );
|
||||
// retVal = [ MIN(MIN(x,y),z), G, G, G ]
|
||||
// retVal = [ min(min(x,y),z), G, G, G ]
|
||||
|
||||
// splat the x component out to the whole vector and return
|
||||
return SplatXSIMD( retval );
|
||||
@ -1544,11 +1544,11 @@ FORCEINLINE fltx4 FindHighestSIMD3( const fltx4 & a )
|
||||
compareOne = __vrlimi( compareOne, a, 8 | 4 , 1 );
|
||||
// compareOne is [y,z,G,G]
|
||||
fltx4 retval = MaxSIMD( a, compareOne );
|
||||
// retVal is [MAX(x,y), MAX(y,z), G, G]
|
||||
// retVal is [max(x,y), max(y,z), G, G]
|
||||
compareOne = __vrlimi( compareOne, a, 8 , 2);
|
||||
// compareOne is [z, G, G, G]
|
||||
retval = MaxSIMD( retval, compareOne );
|
||||
// retVal = [ MAX(MAX(x,y),z), G, G, G ]
|
||||
// retVal = [ max(max(x,y),z), G, G, G ]
|
||||
|
||||
// splat the x component out to the whole vector and return
|
||||
return SplatXSIMD( retval );
|
||||
@ -1757,7 +1757,7 @@ FORCEINLINE fltx4 AndSIMD( const fltx4 & a, const fltx4 & b ) // a & b
|
||||
return _mm_and_ps( a, b );
|
||||
}
|
||||
|
||||
FORCEINLINE fltx4 AndNotSIMD( const fltx4 & a, const fltx4 & b ) // a & ~b
|
||||
FORCEINLINE fltx4 AndNotSIMD( const fltx4 & a, const fltx4 & b ) // ~a & b
|
||||
{
|
||||
return _mm_andnot_ps( a, b );
|
||||
}
|
||||
@ -1813,7 +1813,7 @@ FORCEINLINE fltx4 ReplicateX4( float flValue )
|
||||
FORCEINLINE float SubFloat( const fltx4 & a, int idx )
|
||||
{
|
||||
// NOTE: if the output goes into a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!)
|
||||
#if !defined _LINUX && !defined __APPLE__
|
||||
#ifndef POSIX
|
||||
return a.m128_f32[ idx ];
|
||||
#else
|
||||
return (reinterpret_cast<float const *>(&a))[idx];
|
||||
@ -1822,7 +1822,7 @@ FORCEINLINE float SubFloat( const fltx4 & a, int idx )
|
||||
|
||||
FORCEINLINE float & SubFloat( fltx4 & a, int idx )
|
||||
{
|
||||
#if !defined _LINUX && !defined __APPLE__
|
||||
#ifndef POSIX
|
||||
return a.m128_f32[ idx ];
|
||||
#else
|
||||
return (reinterpret_cast<float *>(&a))[idx];
|
||||
@ -1836,7 +1836,7 @@ FORCEINLINE uint32 SubFloatConvertToInt( const fltx4 & a, int idx )
|
||||
|
||||
FORCEINLINE uint32 SubInt( const fltx4 & a, int idx )
|
||||
{
|
||||
#if !defined _LINUX && !defined __APPLE__
|
||||
#ifndef POSIX
|
||||
return a.m128_u32[idx];
|
||||
#else
|
||||
return (reinterpret_cast<uint32 const *>(&a))[idx];
|
||||
@ -1845,7 +1845,7 @@ FORCEINLINE uint32 SubInt( const fltx4 & a, int idx )
|
||||
|
||||
FORCEINLINE uint32 & SubInt( fltx4 & a, int idx )
|
||||
{
|
||||
#if !defined _LINUX && !defined __APPLE__
|
||||
#ifndef POSIX
|
||||
return a.m128_u32[idx];
|
||||
#else
|
||||
return (reinterpret_cast<uint32 *>(&a))[idx];
|
||||
@ -2120,12 +2120,12 @@ FORCEINLINE fltx4 CmpInBoundsSIMD( const fltx4 & a, const fltx4 & b ) // (a <=
|
||||
return AndSIMD( CmpLeSIMD(a,b), CmpGeSIMD(a, NegSIMD(b)) );
|
||||
}
|
||||
|
||||
FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // MIN(a,b)
|
||||
FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // min(a,b)
|
||||
{
|
||||
return _mm_min_ps( a, b );
|
||||
}
|
||||
|
||||
FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // MAX(a,b)
|
||||
FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // max(a,b)
|
||||
{
|
||||
return _mm_max_ps( a, b );
|
||||
}
|
||||
@ -2271,11 +2271,11 @@ FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 &a )
|
||||
fltx4 compareOne = RotateLeft( a );
|
||||
// compareOne is [y,z,G,x]
|
||||
fltx4 retval = MinSIMD( a, compareOne );
|
||||
// retVal is [MIN(x,y), ... ]
|
||||
// retVal is [min(x,y), ... ]
|
||||
compareOne = RotateLeft2( a );
|
||||
// compareOne is [z, G, x, y]
|
||||
retval = MinSIMD( retval, compareOne );
|
||||
// retVal = [ MIN(MIN(x,y),z)..]
|
||||
// retVal = [ min(min(x,y),z)..]
|
||||
// splat the x component out to the whole vector and return
|
||||
return SplatXSIMD( retval );
|
||||
|
||||
@ -2288,11 +2288,11 @@ FORCEINLINE fltx4 FindHighestSIMD3( const fltx4 &a )
|
||||
fltx4 compareOne = RotateLeft( a );
|
||||
// compareOne is [y,z,G,x]
|
||||
fltx4 retval = MaxSIMD( a, compareOne );
|
||||
// retVal is [MAX(x,y), ... ]
|
||||
// retVal is [max(x,y), ... ]
|
||||
compareOne = RotateLeft2( a );
|
||||
// compareOne is [z, G, x, y]
|
||||
retval = MaxSIMD( retval, compareOne );
|
||||
// retVal = [ MAX(MAX(x,y),z)..]
|
||||
// retVal = [ max(max(x,y),z)..]
|
||||
// splat the x component out to the whole vector and return
|
||||
return SplatXSIMD( retval );
|
||||
|
||||
|
@ -233,7 +233,7 @@ FORCEINLINE fltx4 QuaternionScaleSIMD( const fltx4 &p, float t )
|
||||
// FIXME: nick, this isn't overly sensitive to accuracy, and it may be faster to
|
||||
// use the cos part (w) of the quaternion (sin(omega)*N,cos(omega)) to figure the new scale.
|
||||
float sinom = sqrt( SubFloat( p, 0 ) * SubFloat( p, 0 ) + SubFloat( p, 1 ) * SubFloat( p, 1 ) + SubFloat( p, 2 ) * SubFloat( p, 2 ) );
|
||||
sinom = MIN( sinom, 1.f );
|
||||
sinom = min( sinom, 1.f );
|
||||
|
||||
float sinsom = sin( asin( sinom ) * t );
|
||||
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include "tier0/threadtools.h"
|
||||
#include "mathlib/vector2d.h"
|
||||
#include "mathlib/math_pfns.h"
|
||||
#include "minmax.h"
|
||||
|
||||
// Uncomment this to add extra Asserts to check for NANs, uninitialized vecs, etc.
|
||||
//#define VECTOR_PARANOIA 1
|
||||
@ -48,7 +49,11 @@
|
||||
#ifdef VECTOR_PARANOIA
|
||||
#define CHECK_VALID( _v) Assert( (_v).IsValid() )
|
||||
#else
|
||||
#ifdef GNUC
|
||||
#define CHECK_VALID( _v)
|
||||
#else
|
||||
#define CHECK_VALID( _v) 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define VecToString(v) (static_cast<const char *>(CFmtStr("(%f, %f, %f)", (v).x, (v).y, (v).z))) // ** Note: this generates a temporary, don't hold reference!
|
||||
@ -129,6 +134,7 @@ public:
|
||||
}
|
||||
|
||||
vec_t NormalizeInPlace();
|
||||
Vector Normalized() const;
|
||||
bool IsLengthGreaterThan( float val ) const;
|
||||
bool IsLengthLessThan( float val ) const;
|
||||
|
||||
@ -202,6 +208,7 @@ private:
|
||||
#endif
|
||||
};
|
||||
|
||||
FORCEINLINE void NetworkVarConstruct( Vector &v ) { v.Zero(); }
|
||||
|
||||
#if ( ( !defined( _X360 ) ) && ( ! defined( _LINUX) ) )
|
||||
#define USE_M64S 1
|
||||
@ -260,7 +267,7 @@ private:
|
||||
// No assignment operators either...
|
||||
// ShortVector& operator=( ShortVector const& src );
|
||||
|
||||
};
|
||||
} ALIGN8_POST;
|
||||
|
||||
|
||||
|
||||
@ -396,7 +403,7 @@ public:
|
||||
|
||||
#endif
|
||||
float w; // this space is used anyway
|
||||
};
|
||||
} ALIGN16_POST;
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Vector related operations
|
||||
@ -416,7 +423,9 @@ FORCEINLINE void VectorMultiply( const Vector& a, const Vector& b, Vector& resul
|
||||
FORCEINLINE void VectorDivide( const Vector& a, vec_t b, Vector& result );
|
||||
FORCEINLINE void VectorDivide( const Vector& a, const Vector& b, Vector& result );
|
||||
inline void VectorScale ( const Vector& in, vec_t scale, Vector& result );
|
||||
inline void VectorMA( const Vector& start, float scale, const Vector& direction, Vector& dest );
|
||||
// Don't mark this as inline in its function declaration. That's only necessary on its
|
||||
// definition, and 'inline' here leads to gcc warnings.
|
||||
void VectorMA( const Vector& start, float scale, const Vector& direction, Vector& dest );
|
||||
|
||||
// Vector equality with tolerance
|
||||
bool VectorsAreEqual( const Vector& src1, const Vector& src2, float tolerance = 0.0f );
|
||||
@ -443,6 +452,31 @@ void VectorMax( const Vector &a, const Vector &b, Vector &result );
|
||||
|
||||
// Linearly interpolate between two vectors
|
||||
void VectorLerp(const Vector& src1, const Vector& src2, vec_t t, Vector& dest );
|
||||
Vector VectorLerp(const Vector& src1, const Vector& src2, vec_t t );
|
||||
|
||||
FORCEINLINE Vector ReplicateToVector( float x )
|
||||
{
|
||||
return Vector( x, x, x );
|
||||
}
|
||||
|
||||
// check if a point is in the field of a view of an object. supports up to 180 degree fov.
|
||||
FORCEINLINE bool PointWithinViewAngle( Vector const &vecSrcPosition,
|
||||
Vector const &vecTargetPosition,
|
||||
Vector const &vecLookDirection, float flCosHalfFOV )
|
||||
{
|
||||
Vector vecDelta = vecTargetPosition - vecSrcPosition;
|
||||
float cosDiff = DotProduct( vecLookDirection, vecDelta );
|
||||
|
||||
if ( cosDiff < 0 )
|
||||
return false;
|
||||
|
||||
float flLen2 = vecDelta.LengthSqr();
|
||||
|
||||
// a/sqrt(b) > c == a^2 > b * c ^2
|
||||
return ( cosDiff * cosDiff > flLen2 * flCosHalfFOV * flCosHalfFOV );
|
||||
|
||||
}
|
||||
|
||||
|
||||
#ifndef VECTOR_NO_SLOW_OPERATIONS
|
||||
|
||||
@ -454,6 +488,10 @@ Vector RandomVector( vec_t minVal, vec_t maxVal );
|
||||
|
||||
#endif
|
||||
|
||||
float RandomVectorInUnitSphere( Vector *pVector );
|
||||
float RandomVectorInUnitCircle( Vector2D *pVector );
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//
|
||||
// Inlined Vector methods
|
||||
@ -517,9 +555,9 @@ inline void Vector::Init( vec_t ix, vec_t iy, vec_t iz )
|
||||
|
||||
inline void Vector::Random( vec_t minVal, vec_t maxVal )
|
||||
{
|
||||
x = minVal + ((float)rand() / (float)RAND_MAX) * (maxVal - minVal);
|
||||
y = minVal + ((float)rand() / (float)RAND_MAX) * (maxVal - minVal);
|
||||
z = minVal + ((float)rand() / (float)RAND_MAX) * (maxVal - minVal);
|
||||
x = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
|
||||
y = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
|
||||
z = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
|
||||
CHECK_VALID(*this);
|
||||
}
|
||||
|
||||
@ -1082,14 +1120,6 @@ inline void VectorScale ( const Vector& in, vec_t scale, Vector& result )
|
||||
VectorMultiply( in, scale, result );
|
||||
}
|
||||
|
||||
inline void VectorMA( const Vector& start, float scale, const Vector& direction, Vector& dest )
|
||||
{
|
||||
CHECK_VALID(start);
|
||||
CHECK_VALID(direction);
|
||||
dest.x = start.x + scale * direction.x;
|
||||
dest.y = start.y + scale * direction.y;
|
||||
dest.z = start.z + scale * direction.z;
|
||||
}
|
||||
|
||||
FORCEINLINE void VectorDivide( const Vector& a, vec_t b, Vector& c )
|
||||
{
|
||||
@ -1131,6 +1161,12 @@ inline void VectorLerp(const Vector& src1, const Vector& src2, vec_t t, Vector&
|
||||
dest.z = src1.z + (src2.z - src1.z) * t;
|
||||
}
|
||||
|
||||
inline Vector VectorLerp(const Vector& src1, const Vector& src2, vec_t t )
|
||||
{
|
||||
Vector result;
|
||||
VectorLerp( src1, src2, t, result );
|
||||
return result;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Temporary storage for vector results so const Vector& results can be returned
|
||||
@ -1431,6 +1467,13 @@ inline void VectorMax( const Vector &a, const Vector &b, Vector &result )
|
||||
result.z = fpmax(a.z, b.z);
|
||||
}
|
||||
|
||||
inline float ComputeVolume( const Vector &vecMins, const Vector &vecMaxs )
|
||||
{
|
||||
Vector vecDelta;
|
||||
VectorSubtract( vecMaxs, vecMins, vecDelta );
|
||||
return DotProduct( vecDelta, vecDelta );
|
||||
}
|
||||
|
||||
// Get a random vector.
|
||||
inline Vector RandomVector( float minVal, float maxVal )
|
||||
{
|
||||
@ -1610,7 +1653,7 @@ public:
|
||||
}
|
||||
|
||||
#endif
|
||||
};
|
||||
} ALIGN16_POST;
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
@ -1643,6 +1686,9 @@ public:
|
||||
|
||||
extern void AngleQuaternion( RadianEuler const &angles, Quaternion &qt );
|
||||
extern void QuaternionAngles( Quaternion const &q, RadianEuler &angles );
|
||||
|
||||
FORCEINLINE void NetworkVarConstruct( Quaternion &q ) { q.x = q.y = q.z = q.w = 0.0f; }
|
||||
|
||||
inline Quaternion::Quaternion(RadianEuler const &angle)
|
||||
{
|
||||
AngleQuaternion( angle, *this );
|
||||
@ -1790,6 +1836,8 @@ private:
|
||||
#endif
|
||||
};
|
||||
|
||||
FORCEINLINE void NetworkVarConstruct( QAngle &q ) { q.x = q.y = q.z = 0.0f; }
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Allows us to specifically pass the vector by value when we need to
|
||||
//-----------------------------------------------------------------------------
|
||||
@ -1853,9 +1901,9 @@ inline void QAngle::Init( vec_t ix, vec_t iy, vec_t iz )
|
||||
|
||||
inline void QAngle::Random( vec_t minVal, vec_t maxVal )
|
||||
{
|
||||
x = minVal + ((float)rand() / (float)RAND_MAX) * (maxVal - minVal);
|
||||
y = minVal + ((float)rand() / (float)RAND_MAX) * (maxVal - minVal);
|
||||
z = minVal + ((float)rand() / (float)RAND_MAX) * (maxVal - minVal);
|
||||
x = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
|
||||
y = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
|
||||
z = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
|
||||
CHECK_VALID(*this);
|
||||
}
|
||||
|
||||
@ -2128,11 +2176,16 @@ inline void AngularImpulseToQAngle( const AngularImpulse &impulse, QAngle &angle
|
||||
}
|
||||
|
||||
#if !defined( _X360 )
|
||||
extern float (*pfInvRSquared)( const float *v );
|
||||
|
||||
FORCEINLINE vec_t InvRSquared( float const *v )
|
||||
{
|
||||
return (*pfInvRSquared)(v);
|
||||
#if defined(__i386__) || defined(_M_IX86)
|
||||
float sqrlen = v[0]*v[0]+v[1]*v[1]+v[2]*v[2] + 1.0e-10f, result;
|
||||
_mm_store_ss(&result, _mm_rcp_ss( _mm_max_ss( _mm_set_ss(1.0f), _mm_load_ss(&sqrlen) ) ));
|
||||
return result;
|
||||
#else
|
||||
return 1.f/fpmax(1.f, v[0]*v[0]+v[1]*v[1]+v[2]*v[2]);
|
||||
#endif
|
||||
}
|
||||
|
||||
FORCEINLINE vec_t InvRSquared( const Vector &v )
|
||||
@ -2140,36 +2193,63 @@ FORCEINLINE vec_t InvRSquared( const Vector &v )
|
||||
return InvRSquared(&v.x);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// call directly
|
||||
FORCEINLINE float _VMX_InvRSquared( const Vector &v )
|
||||
#if defined(__i386__) || defined(_M_IX86)
|
||||
inline void _SSE_RSqrtInline( float a, float* out )
|
||||
{
|
||||
XMVECTOR xmV = XMVector3ReciprocalLength( XMLoadVector3( v.Base() ) );
|
||||
xmV = XMVector3Dot( xmV, xmV );
|
||||
return xmV.x;
|
||||
__m128 xx = _mm_load_ss( &a );
|
||||
__m128 xr = _mm_rsqrt_ss( xx );
|
||||
__m128 xt;
|
||||
xt = _mm_mul_ss( xr, xr );
|
||||
xt = _mm_mul_ss( xt, xx );
|
||||
xt = _mm_sub_ss( _mm_set_ss(3.f), xt );
|
||||
xt = _mm_mul_ss( xt, _mm_set_ss(0.5f) );
|
||||
xr = _mm_mul_ss( xr, xt );
|
||||
_mm_store_ss( out, xr );
|
||||
}
|
||||
|
||||
#define InvRSquared(x) _VMX_InvRSquared(x)
|
||||
|
||||
#endif // _X360
|
||||
|
||||
#if !defined( _X360 )
|
||||
extern float (FASTCALL *pfVectorNormalize)(Vector& v);
|
||||
#endif
|
||||
|
||||
// FIXME: Change this back to a #define once we get rid of the vec_t version
|
||||
FORCEINLINE float VectorNormalize( Vector& v )
|
||||
FORCEINLINE float VectorNormalize( Vector& vec )
|
||||
{
|
||||
return (*pfVectorNormalize)(v);
|
||||
#ifndef DEBUG // stop crashing my edit-and-continue!
|
||||
#if defined(__i386__) || defined(_M_IX86)
|
||||
#define DO_SSE_OPTIMIZATION
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined( DO_SSE_OPTIMIZATION )
|
||||
float sqrlen = vec.LengthSqr() + 1.0e-10f, invlen;
|
||||
_SSE_RSqrtInline(sqrlen, &invlen);
|
||||
vec.x *= invlen;
|
||||
vec.y *= invlen;
|
||||
vec.z *= invlen;
|
||||
return sqrlen * invlen;
|
||||
#else
|
||||
extern float (FASTCALL *pfVectorNormalize)(Vector& v);
|
||||
return (*pfVectorNormalize)(vec);
|
||||
#endif
|
||||
}
|
||||
|
||||
// FIXME: Obsolete version of VectorNormalize, once we remove all the friggin float*s
|
||||
FORCEINLINE float VectorNormalize( float * v )
|
||||
{
|
||||
return VectorNormalize(*(reinterpret_cast<Vector *>(v)));
|
||||
}
|
||||
|
||||
FORCEINLINE void VectorNormalizeFast( Vector &vec )
|
||||
{
|
||||
VectorNormalize(vec);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
FORCEINLINE float _VMX_InvRSquared( const Vector &v )
|
||||
{
|
||||
XMVECTOR xmV = XMVector3ReciprocalLength( XMLoadVector3( v.Base() ) );
|
||||
xmV = XMVector3Dot( xmV, xmV );
|
||||
return xmV.x;
|
||||
}
|
||||
|
||||
// call directly
|
||||
FORCEINLINE float _VMX_VectorNormalize( Vector &vec )
|
||||
{
|
||||
@ -2180,6 +2260,9 @@ FORCEINLINE float _VMX_VectorNormalize( Vector &vec )
|
||||
vec.z *= den;
|
||||
return mag;
|
||||
}
|
||||
|
||||
#define InvRSquared(x) _VMX_InvRSquared(x)
|
||||
|
||||
// FIXME: Change this back to a #define once we get rid of the vec_t version
|
||||
FORCEINLINE float VectorNormalize( Vector& v )
|
||||
{
|
||||
@ -2191,18 +2274,6 @@ FORCEINLINE float VectorNormalize( float *pV )
|
||||
return _VMX_VectorNormalize(*(reinterpret_cast<Vector*>(pV)));
|
||||
}
|
||||
|
||||
#endif // _X360
|
||||
|
||||
#if !defined( _X360 )
|
||||
extern void (FASTCALL *pfVectorNormalizeFast)(Vector& v);
|
||||
|
||||
FORCEINLINE void VectorNormalizeFast( Vector& v )
|
||||
{
|
||||
(*pfVectorNormalizeFast)(v);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// call directly
|
||||
FORCEINLINE void VectorNormalizeFast( Vector &vec )
|
||||
{
|
||||
@ -2215,11 +2286,19 @@ FORCEINLINE void VectorNormalizeFast( Vector &vec )
|
||||
|
||||
#endif // _X360
|
||||
|
||||
|
||||
inline vec_t Vector::NormalizeInPlace()
|
||||
{
|
||||
return VectorNormalize( *this );
|
||||
}
|
||||
|
||||
inline Vector Vector::Normalized() const
|
||||
{
|
||||
Vector norm = *this;
|
||||
VectorNormalize( norm );
|
||||
return norm;
|
||||
}
|
||||
|
||||
inline bool Vector::IsLengthGreaterThan( float val ) const
|
||||
{
|
||||
return LengthSqr() > val*val;
|
||||
|
@ -239,8 +239,8 @@ inline void Vector2D::Init( vec_t ix, vec_t iy )
|
||||
|
||||
inline void Vector2D::Random( float minVal, float maxVal )
|
||||
{
|
||||
x = minVal + ((float)rand() / (float)RAND_MAX) * (maxVal - minVal);
|
||||
y = minVal + ((float)rand() / (float)RAND_MAX) * (maxVal - minVal);
|
||||
x = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
|
||||
y = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
|
||||
}
|
||||
|
||||
inline void Vector2DClear( Vector2D& a )
|
||||
|
@ -132,11 +132,7 @@ const Vector4D vec4_invalid( FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX );
|
||||
// SSE optimized routines
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
#ifdef _WIN32
|
||||
class __declspec(align(16)) Vector4DAligned : public Vector4D
|
||||
#elif defined _LINUX || defined __APPLE__
|
||||
class __attribute__((aligned(16))) Vector4DAligned : public Vector4D
|
||||
#endif
|
||||
class ALIGN16 Vector4DAligned : public Vector4D
|
||||
{
|
||||
public:
|
||||
Vector4DAligned(void) {}
|
||||
@ -154,7 +150,7 @@ private:
|
||||
|
||||
// No assignment operators either...
|
||||
Vector4DAligned& operator=( Vector4DAligned const& src );
|
||||
};
|
||||
} ALIGN16_POST;
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Vector4D related operations
|
||||
@ -249,10 +245,10 @@ inline void Vector4D::Init( vec_t ix, vec_t iy, vec_t iz, vec_t iw )
|
||||
|
||||
inline void Vector4D::Random( vec_t minVal, vec_t maxVal )
|
||||
{
|
||||
x = minVal + ((vec_t)rand() / (float)RAND_MAX) * (maxVal - minVal);
|
||||
y = minVal + ((vec_t)rand() / (float)RAND_MAX) * (maxVal - minVal);
|
||||
z = minVal + ((vec_t)rand() / (float)RAND_MAX) * (maxVal - minVal);
|
||||
w = minVal + ((vec_t)rand() / (float)RAND_MAX) * (maxVal - minVal);
|
||||
x = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
|
||||
y = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
|
||||
z = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
|
||||
w = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
|
||||
}
|
||||
|
||||
inline void Vector4DClear( Vector4D& a )
|
||||
|
@ -54,6 +54,7 @@ public:
|
||||
// Creates a matrix where the X axis = forward
|
||||
// the Y axis = left, and the Z axis = up
|
||||
VMatrix( const Vector& forward, const Vector& left, const Vector& up );
|
||||
VMatrix( const Vector& forward, const Vector& left, const Vector& up, const Vector& translation );
|
||||
|
||||
// Construct from a 3x4 matrix
|
||||
VMatrix( const matrix3x4_t& matrix3x4 );
|
||||
@ -106,7 +107,6 @@ public:
|
||||
void PreTranslate(const Vector &vTrans);
|
||||
void PostTranslate(const Vector &vTrans);
|
||||
|
||||
matrix3x4_t& As3x4();
|
||||
const matrix3x4_t& As3x4() const;
|
||||
void CopyFrom3x4( const matrix3x4_t &m3x4 );
|
||||
void Set3x4( matrix3x4_t& matrix3x4 ) const;
|
||||
@ -199,6 +199,9 @@ public:
|
||||
// Setup a matrix for origin and angles.
|
||||
void SetupMatrixOrgAngles( const Vector &origin, const QAngle &vAngles );
|
||||
|
||||
// Setup a matrix for angles and no translation.
|
||||
void SetupMatrixAngles( const QAngle &vAngles );
|
||||
|
||||
// General inverse. This may fail so check the return!
|
||||
bool InverseGeneral(VMatrix &vInverse) const;
|
||||
|
||||
@ -457,6 +460,16 @@ inline VMatrix::VMatrix( const Vector& xAxis, const Vector& yAxis, const Vector&
|
||||
);
|
||||
}
|
||||
|
||||
inline VMatrix::VMatrix( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector& translation )
|
||||
{
|
||||
Init(
|
||||
xAxis.x, yAxis.x, zAxis.x, translation.x,
|
||||
xAxis.y, yAxis.y, zAxis.y, translation.y,
|
||||
xAxis.z, yAxis.z, zAxis.z, translation.z,
|
||||
0.0f, 0.0f, 0.0f, 1.0f
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
inline void VMatrix::Init(
|
||||
vec_t m00, vec_t m01, vec_t m02, vec_t m03,
|
||||
@ -616,11 +629,6 @@ inline const matrix3x4_t& VMatrix::As3x4() const
|
||||
return *((const matrix3x4_t*)this);
|
||||
}
|
||||
|
||||
inline matrix3x4_t& VMatrix::As3x4()
|
||||
{
|
||||
return *((matrix3x4_t*)this);
|
||||
}
|
||||
|
||||
inline void VMatrix::CopyFrom3x4( const matrix3x4_t &m3x4 )
|
||||
{
|
||||
memcpy( m, m3x4.Base(), sizeof( matrix3x4_t ) );
|
||||
|
18
public/minmax.h
Normal file
18
public/minmax.h
Normal file
@ -0,0 +1,18 @@
|
||||
//========= Copyright Valve Corporation, All rights reserved. ============//
|
||||
//
|
||||
// Purpose:
|
||||
//
|
||||
// $NoKeywords: $
|
||||
//=============================================================================//
|
||||
|
||||
#ifndef MINMAX_H
|
||||
#define MINMAX_H
|
||||
|
||||
#ifndef V_min
|
||||
#define V_min(a,b) (((a) < (b)) ? (a) : (b))
|
||||
#endif
|
||||
#ifndef V_max
|
||||
#define V_max(a,b) (((a) > (b)) ? (a) : (b))
|
||||
#endif
|
||||
|
||||
#endif // MINMAX_H
|
@ -109,16 +109,38 @@ FORCEINLINE float fpmax( float a, float b )
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
template< class T >
|
||||
inline T clamp( T const &val, T const &minVal, T const &maxVal )
|
||||
{
|
||||
if( val < minVal )
|
||||
return minVal;
|
||||
else if( val > maxVal )
|
||||
return maxVal;
|
||||
else
|
||||
return val;
|
||||
}
|
||||
|
||||
// This is the preferred clamp operator. Using the clamp macro can lead to
|
||||
// unexpected side-effects or more expensive code. Even the clamp (all
|
||||
// lower-case) function can generate more expensive code because of the
|
||||
// mixed types involved.
|
||||
template< class T >
|
||||
T Clamp( T const &val, T const &minVal, T const &maxVal )
|
||||
{
|
||||
if( val < minVal )
|
||||
return minVal;
|
||||
else if( val > maxVal )
|
||||
return maxVal;
|
||||
else
|
||||
return val;
|
||||
}
|
||||
|
||||
// This is the preferred Min operator. Using the MIN macro can lead to unexpected
|
||||
// side-effects or more expensive code.
|
||||
template< class T >
|
||||
T Min( T const &val1, T const &val2 )
|
||||
{
|
||||
return val1 < val2 ? val1 : val2;
|
||||
}
|
||||
|
||||
// This is the preferred Max operator. Using the MAX macro can lead to unexpected
|
||||
// side-effects or more expensive code.
|
||||
template< class T >
|
||||
T Max( T const &val1, T const &val2 )
|
||||
{
|
||||
return val1 > val2 ? val1 : val2;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef FALSE
|
||||
@ -247,7 +269,7 @@ struct colorVec
|
||||
|
||||
|
||||
#ifndef NOTE_UNUSED
|
||||
#define NOTE_UNUSED(x) (x = x) // for pesky compiler / lint warnings
|
||||
#define NOTE_UNUSED(x) (void)(x) // for pesky compiler / lint warnings
|
||||
#endif
|
||||
#ifdef __cplusplus
|
||||
|
||||
|
@ -32,11 +32,12 @@
|
||||
|
||||
#define SETBITS(iBitVector, bits) ((iBitVector) |= (bits))
|
||||
#define CLEARBITS(iBitVector, bits) ((iBitVector) &= ~(bits))
|
||||
#define FBitSet(iBitVector, bit) ((iBitVector) & (bit))
|
||||
#define FBitSet(iBitVector, bits) ((iBitVector) & (bits))
|
||||
|
||||
inline bool IsPowerOfTwo( int value )
|
||||
template <typename T>
|
||||
inline bool IsPowerOfTwo( T value )
|
||||
{
|
||||
return (value & ( value - 1 )) == 0;
|
||||
return (value & ( value - (T)1 )) == (T)0;
|
||||
}
|
||||
|
||||
#define CONST_INTEGER_AS_STRING(x) #x //Wraps the integer in quotes, allowing us to form constant strings with it
|
||||
|
@ -1,4 +1,4 @@
|
||||
//========= Copyright © 1996-2005, Valve Corporation, All rights reserved. ============//
|
||||
//========= Copyright <EFBFBD> 1996-2005, Valve Corporation, All rights reserved. ============//
|
||||
//
|
||||
// Purpose:
|
||||
//
|
||||
@ -11,11 +11,14 @@
|
||||
#pragma once
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
#include "tier0/platform.h"
|
||||
|
||||
PLATFORM_INTERFACE int64 g_ClockSpeed;
|
||||
PLATFORM_INTERFACE uint32_t g_dwClockSpeed;
|
||||
PLATFORM_INTERFACE uint64 g_ClockSpeed;
|
||||
#if defined( _X360 ) && defined( _CERT )
|
||||
PLATFORM_INTERFACE uint32_t g_dwFakeFastCounter;
|
||||
#endif
|
||||
@ -30,20 +33,20 @@ friend class CFastTimer;
|
||||
|
||||
public:
|
||||
CCycleCount();
|
||||
CCycleCount( int64 cycles );
|
||||
CCycleCount( uint64 cycles );
|
||||
|
||||
void Sample(); // Sample the clock. This takes about 34 clocks to execute (or 26,000 calls per millisecond on a P900).
|
||||
|
||||
void Init(); // Set to zero.
|
||||
void Init( float initTimeMsec );
|
||||
void Init( double initTimeMsec ) { Init( (float)initTimeMsec ); }
|
||||
void Init( int64 cycles );
|
||||
void Init( uint64 cycles );
|
||||
bool IsLessThan( CCycleCount const &other ) const; // Compare two counts.
|
||||
|
||||
// Convert to other time representations. These functions are slow, so it's preferable to call them
|
||||
// during display rather than inside a timing block.
|
||||
uint32_t GetCycles() const;
|
||||
int64 GetLongCycles() const;
|
||||
uint64 GetLongCycles() const;
|
||||
|
||||
uint32_t GetMicroseconds() const;
|
||||
uint64 GetUlMicroseconds() const;
|
||||
@ -63,12 +66,12 @@ public:
|
||||
// dest = rSrc1 - rSrc2
|
||||
static void Sub( CCycleCount const &rSrc1, CCycleCount const &rSrc2, CCycleCount &dest ); // Add two samples together.
|
||||
|
||||
static int64 GetTimestamp();
|
||||
static uint64 GetTimestamp();
|
||||
|
||||
int64 m_Int64;
|
||||
uint64 m_Int64;
|
||||
};
|
||||
|
||||
class CClockSpeedInit
|
||||
class PLATFORM_CLASS CClockSpeedInit
|
||||
{
|
||||
public:
|
||||
CClockSpeedInit()
|
||||
@ -76,21 +79,7 @@ public:
|
||||
Init();
|
||||
}
|
||||
|
||||
static void Init()
|
||||
{
|
||||
#if defined( _X360 ) && !defined( _CERT )
|
||||
PMCStart();
|
||||
PMCInitIntervalTimer( 0 );
|
||||
#endif
|
||||
const CPUInformation& pi = GetCPUInformation();
|
||||
|
||||
g_ClockSpeed = pi.m_Speed;
|
||||
g_dwClockSpeed = (uint32_t)g_ClockSpeed;
|
||||
|
||||
g_ClockSpeedMicrosecondsMultiplier = 1000000.0 / (double)g_ClockSpeed;
|
||||
g_ClockSpeedMillisecondsMultiplier = 1000.0 / (double)g_ClockSpeed;
|
||||
g_ClockSpeedSecondsMultiplier = 1.0f / (double)g_ClockSpeed;
|
||||
}
|
||||
static void Init();
|
||||
};
|
||||
|
||||
class CFastTimer
|
||||
@ -104,7 +93,7 @@ public:
|
||||
CCycleCount GetDurationInProgress() const; // Call without ending. Not that cheap.
|
||||
|
||||
// Return number of cycles per second on this processor.
|
||||
static inline uint32_t GetClockSpeed();
|
||||
static inline int64 GetClockSpeed();
|
||||
|
||||
private:
|
||||
CCycleCount m_Duration;
|
||||
@ -233,8 +222,6 @@ private:
|
||||
unsigned m_nIters;
|
||||
CCycleCount m_Total;
|
||||
CCycleCount m_Peak;
|
||||
// bool m_fReport;
|
||||
// const tchar *m_pszName;
|
||||
};
|
||||
|
||||
// -------------------------------------------------------------------------- //
|
||||
@ -257,87 +244,37 @@ private:
|
||||
|
||||
inline CCycleCount::CCycleCount()
|
||||
{
|
||||
Init( (int64)0 );
|
||||
Init( (uint64)0 );
|
||||
}
|
||||
|
||||
inline CCycleCount::CCycleCount( int64 cycles )
|
||||
inline CCycleCount::CCycleCount( uint64 cycles )
|
||||
{
|
||||
Init( cycles );
|
||||
}
|
||||
|
||||
inline void CCycleCount::Init()
|
||||
{
|
||||
Init( (int64)0 );
|
||||
Init( (uint64)0 );
|
||||
}
|
||||
|
||||
inline void CCycleCount::Init( float initTimeMsec )
|
||||
{
|
||||
if ( g_ClockSpeedMillisecondsMultiplier > 0 )
|
||||
Init( (int64)(initTimeMsec / g_ClockSpeedMillisecondsMultiplier) );
|
||||
Init( (uint64)(initTimeMsec / g_ClockSpeedMillisecondsMultiplier) );
|
||||
else
|
||||
Init( (int64)0 );
|
||||
Init( (uint64)0 );
|
||||
}
|
||||
|
||||
inline void CCycleCount::Init( int64 cycles )
|
||||
inline void CCycleCount::Init( uint64 cycles )
|
||||
{
|
||||
m_Int64 = cycles;
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4189) // warning C4189: local variable is initialized but not referenced
|
||||
#endif
|
||||
|
||||
inline void CCycleCount::Sample()
|
||||
{
|
||||
#if defined( _X360 )
|
||||
#if !defined( _CERT )
|
||||
// read the highest resolution timer directly (ticks at native 3.2GHz), bypassing any calls into PMC
|
||||
// can only resolve 32 bits, rollover is ~1.32 secs
|
||||
// based on PMCGetIntervalTimer() from the April 2007 XDK
|
||||
int64 temp;
|
||||
__asm
|
||||
{
|
||||
lis r11,08FFFh
|
||||
ld r11,011E0h(r11)
|
||||
rldicl r11,r11,32,32
|
||||
// unforunate can't get the inline assembler to write directly into desired target
|
||||
std r11,temp
|
||||
}
|
||||
m_Int64 = temp;
|
||||
#else
|
||||
m_Int64 = ++g_dwFakeFastCounter;
|
||||
#endif
|
||||
#elif defined( _WIN32 ) && !defined( _WIN64 )
|
||||
uint32_t* pSample = (uint32_t *)&m_Int64;
|
||||
__asm
|
||||
{
|
||||
// force the cpu to synchronize the instruction queue
|
||||
// NJS: CPUID can really impact performance in tight loops.
|
||||
//cpuid
|
||||
//cpuid
|
||||
//cpuid
|
||||
mov ecx, pSample
|
||||
rdtsc
|
||||
mov [ecx], eax
|
||||
mov [ecx+4], edx
|
||||
}
|
||||
#elif defined( _LINUX )
|
||||
uint32_t* pSample = (uint32_t *)&m_Int64;
|
||||
__asm__ __volatile__ (
|
||||
"rdtsc\n\t"
|
||||
"movl %%eax, (%0)\n\t"
|
||||
"movl %%edx, 4(%0)\n\t"
|
||||
: /* no output regs */
|
||||
: "D" (pSample)
|
||||
: "%eax", "%edx" );
|
||||
#endif
|
||||
m_Int64 = Plat_Rdtsc();
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
|
||||
inline CCycleCount& CCycleCount::operator+=( CCycleCount const &other )
|
||||
{
|
||||
m_Int64 += other.m_Int64;
|
||||
@ -355,7 +292,7 @@ inline void CCycleCount::Sub( CCycleCount const &rSrc1, CCycleCount const &rSrc2
|
||||
dest.m_Int64 = rSrc1.m_Int64 - rSrc2.m_Int64;
|
||||
}
|
||||
|
||||
inline int64 CCycleCount::GetTimestamp()
|
||||
inline uint64 CCycleCount::GetTimestamp()
|
||||
{
|
||||
CCycleCount c;
|
||||
c.Sample();
|
||||
@ -373,7 +310,7 @@ inline uint32_t CCycleCount::GetCycles() const
|
||||
return (uint32_t)m_Int64;
|
||||
}
|
||||
|
||||
inline int64 CCycleCount::GetLongCycles() const
|
||||
inline uint64 CCycleCount::GetLongCycles() const
|
||||
{
|
||||
return m_Int64;
|
||||
}
|
||||
@ -397,7 +334,7 @@ inline double CCycleCount::GetMicrosecondsF() const
|
||||
|
||||
inline void CCycleCount::SetMicroseconds( uint32_t nMicroseconds )
|
||||
{
|
||||
m_Int64 = ((int64)nMicroseconds * g_ClockSpeed) / 1000000;
|
||||
m_Int64 = ((uint64)nMicroseconds * g_ClockSpeed) / 1000000;
|
||||
}
|
||||
|
||||
|
||||
@ -438,10 +375,10 @@ inline void CFastTimer::End()
|
||||
if ( IsX360() )
|
||||
{
|
||||
// have to handle rollover, hires timer is only accurate to 32 bits
|
||||
// more than one overflow should not have occured, otherwise caller should use a slower timer
|
||||
// more than one overflow should not have occurred, otherwise caller should use a slower timer
|
||||
if ( (uint64)cnt.m_Int64 <= (uint64)m_Duration.m_Int64 )
|
||||
{
|
||||
// rollover occured
|
||||
// rollover occurred
|
||||
cnt.m_Int64 += 0x100000000LL;
|
||||
}
|
||||
}
|
||||
@ -460,10 +397,10 @@ inline CCycleCount CFastTimer::GetDurationInProgress() const
|
||||
if ( IsX360() )
|
||||
{
|
||||
// have to handle rollover, hires timer is only accurate to 32 bits
|
||||
// more than one overflow should not have occured, otherwise caller should use a slower timer
|
||||
// more than one overflow should not have occurred, otherwise caller should use a slower timer
|
||||
if ( (uint64)cnt.m_Int64 <= (uint64)m_Duration.m_Int64 )
|
||||
{
|
||||
// rollover occured
|
||||
// rollover occurred
|
||||
cnt.m_Int64 += 0x100000000LL;
|
||||
}
|
||||
}
|
||||
@ -475,9 +412,9 @@ inline CCycleCount CFastTimer::GetDurationInProgress() const
|
||||
}
|
||||
|
||||
|
||||
inline uint32_t CFastTimer::GetClockSpeed()
|
||||
inline int64 CFastTimer::GetClockSpeed()
|
||||
{
|
||||
return g_dwClockSpeed;
|
||||
return g_ClockSpeed;
|
||||
}
|
||||
|
||||
|
||||
@ -553,15 +490,20 @@ inline CAverageTimeMarker::~CAverageTimeMarker()
|
||||
|
||||
// CLimitTimer
|
||||
// Use this to time whether a desired interval of time has passed. It's extremely fast
|
||||
// to check while running.
|
||||
// to check while running. NOTE: CMicroSecOverage() and CMicroSecLeft() are not as fast to check.
|
||||
class CLimitTimer
|
||||
{
|
||||
public:
|
||||
CLimitTimer() {}
|
||||
CLimitTimer( uint64 cMicroSecDuration ) { SetLimit( cMicroSecDuration ); }
|
||||
void SetLimit( uint64 m_cMicroSecDuration );
|
||||
bool BLimitReached( void );
|
||||
bool BLimitReached() const;
|
||||
|
||||
int CMicroSecOverage() const;
|
||||
uint64 CMicroSecLeft() const;
|
||||
|
||||
private:
|
||||
int64 m_lCycleLimit;
|
||||
uint64 m_lCycleLimit;
|
||||
};
|
||||
|
||||
|
||||
@ -569,9 +511,9 @@ private:
|
||||
// Purpose: Initializes the limit timer with a period of time to measure.
|
||||
// Input : cMicroSecDuration - How long a time period to measure
|
||||
//-----------------------------------------------------------------------------
|
||||
inline void CLimitTimer::SetLimit( uint64 m_cMicroSecDuration )
|
||||
inline void CLimitTimer::SetLimit( uint64 cMicroSecDuration )
|
||||
{
|
||||
int64 dlCycles = ( ( uint64 ) m_cMicroSecDuration * ( int64 ) g_dwClockSpeed ) / ( int64 ) 1000000L;
|
||||
uint64 dlCycles = ( ( uint64 ) cMicroSecDuration * g_ClockSpeed ) / ( uint64 ) 1000000L;
|
||||
CCycleCount cycleCount;
|
||||
cycleCount.Sample( );
|
||||
m_lCycleLimit = cycleCount.GetLongCycles( ) + dlCycles;
|
||||
@ -582,7 +524,7 @@ inline void CLimitTimer::SetLimit( uint64 m_cMicroSecDuration )
|
||||
// Purpose: Determines whether our specified time period has passed
|
||||
// Output: true if at least the specified time period has passed
|
||||
//-----------------------------------------------------------------------------
|
||||
inline bool CLimitTimer::BLimitReached( )
|
||||
inline bool CLimitTimer::BLimitReached() const
|
||||
{
|
||||
CCycleCount cycleCount;
|
||||
cycleCount.Sample( );
|
||||
@ -590,5 +532,38 @@ inline bool CLimitTimer::BLimitReached( )
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Purpose: If we're over our specified time period, return the amount of the overage.
|
||||
// Output: # of microseconds since we reached our specified time period.
|
||||
//-----------------------------------------------------------------------------
|
||||
inline int CLimitTimer::CMicroSecOverage() const
|
||||
{
|
||||
CCycleCount cycleCount;
|
||||
cycleCount.Sample();
|
||||
uint64 lcCycles = cycleCount.GetLongCycles();
|
||||
|
||||
if ( lcCycles < m_lCycleLimit )
|
||||
return 0;
|
||||
|
||||
return( ( int ) ( ( lcCycles - m_lCycleLimit ) * ( uint64 ) 1000000L / g_ClockSpeed ) );
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Purpose: If we're under our specified time period, return the amount under.
|
||||
// Output: # of microseconds until we reached our specified time period, 0 if we've passed it
|
||||
//-----------------------------------------------------------------------------
|
||||
inline uint64 CLimitTimer::CMicroSecLeft() const
|
||||
{
|
||||
CCycleCount cycleCount;
|
||||
cycleCount.Sample();
|
||||
uint64 lcCycles = cycleCount.GetLongCycles();
|
||||
|
||||
if ( lcCycles >= m_lCycleLimit )
|
||||
return 0;
|
||||
|
||||
return( ( uint64 ) ( ( m_lCycleLimit - lcCycles ) * ( uint64 ) 1000000L / g_ClockSpeed ) );
|
||||
}
|
||||
|
||||
|
||||
#endif // FASTTIMER_H
|
||||
|
@ -382,7 +382,7 @@ public:
|
||||
|
||||
#pragma warning(disable:4290)
|
||||
#pragma warning(push)
|
||||
#include <typeinfo.h>
|
||||
#include <typeinfo>
|
||||
|
||||
// MEM_DEBUG_CLASSNAME is opt-in.
|
||||
// Note: typeid().name() is not threadsafe, so if the project needs to access it in multiple threads
|
||||
|
@ -44,10 +44,11 @@ typedef uint32_t ThreadId_t;
|
||||
// feature enables
|
||||
#define NEW_SOFTWARE_LIGHTING
|
||||
|
||||
#if defined(_LINUX) || defined(__APPLE__)
|
||||
#ifdef POSIX
|
||||
// need this for _alloca
|
||||
#include <alloca.h>
|
||||
#endif // _LINUX
|
||||
#include <time.h>
|
||||
#endif
|
||||
|
||||
#if defined __APPLE__
|
||||
#include <stdlib.h>
|
||||
@ -234,6 +235,14 @@ typedef unsigned int uint;
|
||||
#define abstract_class class NO_VTABLE
|
||||
#endif
|
||||
|
||||
|
||||
// MSVC CRT uses 0x7fff while gcc uses MAX_INT, leading to mismatches between platforms
|
||||
// As a result, we pick the least common denominator here. This should be used anywhere
|
||||
// you might typically want to use RAND_MAX
|
||||
#define VALVE_RAND_MAX 0x7fff
|
||||
|
||||
|
||||
|
||||
/*
|
||||
FIXME: Enable this when we no longer fear change =)
|
||||
|
||||
@ -242,32 +251,32 @@ FIXME: Enable this when we no longer fear change =)
|
||||
#include <float.h>
|
||||
|
||||
// Maximum and minimum representable values
|
||||
#define INT8_MAX SCHAR_MAX
|
||||
#define INT16_MAX SHRT_MAX
|
||||
#define INT32_MAX LONG_MAX
|
||||
#define INT64_MAX (((int64)~0) >> 1)
|
||||
#define INT8_MAX SCHAR_MAX
|
||||
#define INT16_MAX SHRT_MAX
|
||||
#define INT32_MAX LONG_MAX
|
||||
#define INT64_MAX (((int64)~0) >> 1)
|
||||
|
||||
#define INT8_MIN SCHAR_MIN
|
||||
#define INT16_MIN SHRT_MIN
|
||||
#define INT32_MIN LONG_MIN
|
||||
#define INT64_MIN (((int64)1) << 63)
|
||||
#define INT8_MIN SCHAR_MIN
|
||||
#define INT16_MIN SHRT_MIN
|
||||
#define INT32_MIN LONG_MIN
|
||||
#define INT64_MIN (((int64)1) << 63)
|
||||
|
||||
#define UINT8_MAX ((uint8)~0)
|
||||
#define UINT16_MAX ((uint16)~0)
|
||||
#define UINT32_MAX ((uint32)~0)
|
||||
#define UINT64_MAX ((uint64)~0)
|
||||
#define UINT8_MAX ((uint8)~0)
|
||||
#define UINT16_MAX ((uint16)~0)
|
||||
#define UINT32_MAX ((uint32)~0)
|
||||
#define UINT64_MAX ((uint64)~0)
|
||||
|
||||
#define UINT8_MIN 0
|
||||
#define UINT16_MIN 0
|
||||
#define UINT32_MIN 0
|
||||
#define UINT64_MIN 0
|
||||
#define UINT8_MIN 0
|
||||
#define UINT16_MIN 0
|
||||
#define UINT32_MIN 0
|
||||
#define UINT64_MIN 0
|
||||
|
||||
#ifndef UINT_MIN
|
||||
#define UINT_MIN UINT32_MIN
|
||||
#define UINT_MIN UINT32_MIN
|
||||
#endif
|
||||
|
||||
#define FLOAT32_MAX FLT_MAX
|
||||
#define FLOAT64_MAX DBL_MAX
|
||||
#define FLOAT32_MAX FLT_MAX
|
||||
#define FLOAT64_MAX DBL_MAX
|
||||
|
||||
#define FLOAT32_MIN FLT_MIN
|
||||
#define FLOAT64_MIN DBL_MIN
|
||||
@ -332,11 +341,35 @@ typedef void * HINSTANCE;
|
||||
#define DECL_ALIGN(x) /* */
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
// MSVC has the align at the start of the struct
|
||||
#define ALIGN4 DECL_ALIGN(4)
|
||||
#define ALIGN8 DECL_ALIGN(8)
|
||||
#define ALIGN16 DECL_ALIGN(16)
|
||||
#define ALIGN32 DECL_ALIGN(32)
|
||||
#define ALIGN128 DECL_ALIGN(128)
|
||||
|
||||
#define ALIGN4_POST
|
||||
#define ALIGN8_POST
|
||||
#define ALIGN16_POST
|
||||
#define ALIGN32_POST
|
||||
#define ALIGN128_POST
|
||||
#elif defined( GNUC )
|
||||
// gnuc has the align decoration at the end
|
||||
#define ALIGN4
|
||||
#define ALIGN8
|
||||
#define ALIGN16
|
||||
#define ALIGN32
|
||||
#define ALIGN128
|
||||
|
||||
#define ALIGN4_POST DECL_ALIGN(4)
|
||||
#define ALIGN8_POST DECL_ALIGN(8)
|
||||
#define ALIGN16_POST DECL_ALIGN(16)
|
||||
#define ALIGN32_POST DECL_ALIGN(32)
|
||||
#define ALIGN128_POST DECL_ALIGN(128)
|
||||
#else
|
||||
#error
|
||||
#endif
|
||||
|
||||
// Pull in the /analyze code annotations.
|
||||
#include "annotations.h"
|
||||
@ -829,17 +862,20 @@ inline void StoreLittleDWord( uint32_t *base, unsigned int dwordIndex, uint32_t
|
||||
#ifndef STATIC_TIER0
|
||||
|
||||
#ifdef TIER0_DLL_EXPORT
|
||||
#define PLATFORM_INTERFACE DLL_EXPORT
|
||||
#define PLATFORM_OVERLOAD DLL_GLOBAL_EXPORT
|
||||
#define PLATFORM_INTERFACE DLL_EXPORT
|
||||
#define PLATFORM_OVERLOAD DLL_GLOBAL_EXPORT
|
||||
#define PLATFORM_CLASS DLL_CLASS_EXPORT
|
||||
#else
|
||||
#define PLATFORM_INTERFACE DLL_IMPORT
|
||||
#define PLATFORM_OVERLOAD DLL_GLOBAL_IMPORT
|
||||
#define PLATFORM_INTERFACE DLL_IMPORT
|
||||
#define PLATFORM_OVERLOAD DLL_GLOBAL_IMPORT
|
||||
#define PLATFORM_CLASS DLL_CLASS_IMPORT
|
||||
#endif
|
||||
|
||||
#else // BUILD_AS_DLL
|
||||
|
||||
#define PLATFORM_INTERFACE extern
|
||||
#define PLATFORM_OVERLOAD
|
||||
#define PLATFORM_CLASS
|
||||
|
||||
#endif // BUILD_AS_DLL
|
||||
|
||||
@ -854,6 +890,41 @@ PLATFORM_INTERFACE bool Plat_IsInBenchmarkMode();
|
||||
|
||||
PLATFORM_INTERFACE double Plat_FloatTime(); // Returns time in seconds since the module was loaded.
|
||||
PLATFORM_INTERFACE uint32_t Plat_MSTime(); // Time in milliseconds.
|
||||
PLATFORM_INTERFACE char * Plat_ctime( const time_t *timep, char *buf, size_t bufsize );
|
||||
PLATFORM_INTERFACE struct tm * Plat_gmtime( const time_t *timep, struct tm *result );
|
||||
PLATFORM_INTERFACE time_t Plat_timegm( struct tm *timeptr );
|
||||
PLATFORM_INTERFACE struct tm * Plat_localtime( const time_t *timep, struct tm *result );
|
||||
|
||||
#if defined( _WIN32 ) && defined( _MSC_VER ) && ( _MSC_VER >= 1400 )
|
||||
extern "C" unsigned __int64 __rdtsc();
|
||||
#pragma intrinsic(__rdtsc)
|
||||
#endif
|
||||
|
||||
inline uint64 Plat_Rdtsc()
|
||||
{
|
||||
#if defined( _X360 )
|
||||
return ( uint64 )__mftb32();
|
||||
#elif defined( _WIN64 )
|
||||
return ( uint64 )__rdtsc();
|
||||
#elif defined( _WIN32 )
|
||||
#if defined( _MSC_VER ) && ( _MSC_VER >= 1400 )
|
||||
return ( uint64 )__rdtsc();
|
||||
#else
|
||||
__asm rdtsc;
|
||||
__asm ret;
|
||||
#endif
|
||||
#elif defined( __i386__ )
|
||||
uint64 val;
|
||||
__asm__ __volatile__ ( "rdtsc" : "=A" (val) );
|
||||
return val;
|
||||
#elif defined( __x86_64__ )
|
||||
uint32 lo, hi;
|
||||
__asm__ __volatile__ ( "rdtsc" : "=a" (lo), "=d" (hi));
|
||||
return ( ( ( uint64 )hi ) << 32 ) | lo;
|
||||
#else
|
||||
#error
|
||||
#endif
|
||||
}
|
||||
|
||||
// b/w compatibility
|
||||
#define Sys_FloatTime Plat_FloatTime
|
||||
@ -901,13 +972,10 @@ struct CPUInformation // Size: Win32=64, Win64=72
|
||||
CPUInformation(): m_Size(0){}
|
||||
};
|
||||
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wunknown-pragmas"
|
||||
#pragma clang diagnostic ignored "-Wreturn-type-c-linkage"
|
||||
#endif
|
||||
|
||||
PLATFORM_INTERFACE const CPUInformation& GetCPUInformation();
|
||||
// Have to return a pointer, not a reference, because references are not compatible with the
|
||||
// extern "C" implied by PLATFORM_INTERFACE.
|
||||
PLATFORM_INTERFACE const CPUInformation* GetCPUInformation();
|
||||
|
||||
|
||||
PLATFORM_INTERFACE void GetCurrentDate( int *pDay, int *pMonth, int *pYear );
|
||||
|
Loading…
Reference in New Issue
Block a user