mirror of
https://github.com/alliedmodders/hl2sdk.git
synced 2024-12-23 01:59:43 +08:00
5779 lines
153 KiB
C++
5779 lines
153 KiB
C++
//========= Copyright © 1996-2005, Valve Corporation, All rights reserved. ============//
|
|
//
|
|
// Purpose: Math primitives.
|
|
//
|
|
// $Workfile: $
|
|
// $NoKeywords: $
|
|
//=============================================================================//
|
|
|
|
#if !defined(_STATIC_LINKED) || defined(_SHARED_LIB)
|
|
|
|
|
|
/// FIXME: As soon as all references to mathlib.c are gone, include it in here
|
|
|
|
#include <math.h>
|
|
#include <float.h> // Needed for FLT_EPSILON
|
|
|
|
#include "basetypes.h"
|
|
#include <memory.h>
|
|
#include "tier0/dbg.h"
|
|
|
|
#include "tier0/vprof.h"
|
|
//#define _VPROF_MATHLIB
|
|
|
|
#ifdef _MSC_VER
|
|
#pragma warning(disable:4244) // "conversion from 'const int' to 'float', possible loss of data"
|
|
#pragma warning(disable:4730) // "mixing _m64 and floating point expressions may result in incorrect code"
|
|
#endif
|
|
|
|
#include "mathlib.h"
|
|
#include "amd3dx.h"
|
|
#include "vector.h"
|
|
|
|
// memdbgon must be the last include file in a .cpp file!!!
|
|
#include "tier0/memdbgon.h"
|
|
|
|
static bool s_bMathlibInitialized = false;
|
|
|
|
|
|
#ifdef PARANOID
|
|
// User must provide an implementation of Sys_Error()
|
|
void Sys_Error (char *error, ...);
|
|
#endif
|
|
#ifdef _XBOX
|
|
#include <xmmintrin.h>
|
|
#endif
|
|
|
|
const Vector vec3_origin(0,0,0);
|
|
const QAngle vec3_angle(0,0,0);
|
|
const Vector vec3_invalid( FLT_MAX, FLT_MAX, FLT_MAX );
|
|
const int nanmask = 255<<23;
|
|
|
|
|
|
// Math routines which have alternate versions for MMX/SSE/3DNOW in MATHLIB.C
|
|
// void _VectorMA( const float *start, float scale, const float *direction, float *dest );
|
|
|
|
#ifdef PFN_VECTORMA
|
|
void __cdecl _SSE_VectorMA( const float *start, float scale, const float *direction, float *dest );
|
|
#endif
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Macros and constants required by some of the SSE assembly:
|
|
//-----------------------------------------------------------------------------
|
|
|
|
#ifdef _WIN32
|
|
#define _PS_EXTERN_CONST(Name, Val) \
|
|
const __declspec(align(16)) float _ps_##Name[4] = { Val, Val, Val, Val }
|
|
|
|
#define _PS_EXTERN_CONST_TYPE(Name, Type, Val) \
|
|
const __declspec(align(16)) Type _ps_##Name[4] = { Val, Val, Val, Val }; \
|
|
|
|
#define _EPI32_CONST(Name, Val) \
|
|
static const __declspec(align(16)) __int32 _epi32_##Name[4] = { Val, Val, Val, Val }
|
|
|
|
#define _PS_CONST(Name, Val) \
|
|
static const __declspec(align(16)) float _ps_##Name[4] = { Val, Val, Val, Val }
|
|
|
|
#elif _LINUX
|
|
#define _PS_EXTERN_CONST(Name, Val) \
|
|
const __attribute__((aligned(16))) float _ps_##Name[4] = { Val, Val, Val, Val }
|
|
|
|
#define _PS_EXTERN_CONST_TYPE(Name, Type, Val) \
|
|
const __attribute__((aligned(16))) Type _ps_##Name[4] = { Val, Val, Val, Val }; \
|
|
|
|
#define _EPI32_CONST(Name, Val) \
|
|
static const __attribute__((aligned(16))) int32 _epi32_##Name[4] = { Val, Val, Val, Val }
|
|
|
|
#define _PS_CONST(Name, Val) \
|
|
static const __attribute__((aligned(16))) float _ps_##Name[4] = { Val, Val, Val, Val }
|
|
|
|
#endif
|
|
|
|
_PS_EXTERN_CONST(am_0, 0.0f);
|
|
_PS_EXTERN_CONST(am_1, 1.0f);
|
|
_PS_EXTERN_CONST(am_m1, -1.0f);
|
|
_PS_EXTERN_CONST(am_0p5, 0.5f);
|
|
_PS_EXTERN_CONST(am_1p5, 1.5f);
|
|
_PS_EXTERN_CONST(am_pi, (float)M_PI);
|
|
_PS_EXTERN_CONST(am_pi_o_2, (float)(M_PI / 2.0));
|
|
_PS_EXTERN_CONST(am_2_o_pi, (float)(2.0 / M_PI));
|
|
_PS_EXTERN_CONST(am_pi_o_4, (float)(M_PI / 4.0));
|
|
_PS_EXTERN_CONST(am_4_o_pi, (float)(4.0 / M_PI));
|
|
_PS_EXTERN_CONST_TYPE(am_sign_mask, int32, 0x80000000);
|
|
_PS_EXTERN_CONST_TYPE(am_inv_sign_mask, int32, ~0x80000000);
|
|
_PS_EXTERN_CONST_TYPE(am_min_norm_pos,int32, 0x00800000);
|
|
_PS_EXTERN_CONST_TYPE(am_mant_mask, int32, 0x7f800000);
|
|
_PS_EXTERN_CONST_TYPE(am_inv_mant_mask, int32, ~0x7f800000);
|
|
|
|
_EPI32_CONST(1, 1);
|
|
_EPI32_CONST(2, 2);
|
|
|
|
_PS_CONST(sincos_p0, 0.15707963267948963959e1f);
|
|
_PS_CONST(sincos_p1, -0.64596409750621907082e0f);
|
|
_PS_CONST(sincos_p2, 0.7969262624561800806e-1f);
|
|
_PS_CONST(sincos_p3, -0.468175413106023168e-2f);
|
|
|
|
static const uint32 _sincos_masks[] = { (uint32)0x0, (uint32)~0x0 };
|
|
static const uint32 _sincos_inv_masks[] = { (uint32)~0x0, (uint32)0x0 };
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Standard C implementations of optimized routines:
|
|
//-----------------------------------------------------------------------------
|
|
float _sqrtf(float _X)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
return sqrtf(_X);
|
|
}
|
|
|
|
float _rsqrtf(float x)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
|
|
return 1.f / _sqrtf( x );
|
|
}
|
|
|
|
float FASTCALL _VectorNormalize (Vector& vec)
|
|
{
|
|
#ifdef _VPROF_MATHLIB
|
|
VPROF_BUDGET( "_VectorNormalize", "Mathlib" );
|
|
#endif
|
|
Assert( s_bMathlibInitialized );
|
|
float radius = sqrtf(vec.x*vec.x + vec.y*vec.y + vec.z*vec.z);
|
|
|
|
// FLT_EPSILON is added to the radius to eliminate the possibility of divide by zero.
|
|
float iradius = 1.f / ( radius + FLT_EPSILON );
|
|
|
|
vec.x *= iradius;
|
|
vec.y *= iradius;
|
|
vec.z *= iradius;
|
|
|
|
return radius;
|
|
}
|
|
|
|
// TODO: Add fast C VectorNormalizeFast.
|
|
// Perhaps use approximate rsqrt trick, if the accuracy isn't too bad.
|
|
void FASTCALL _VectorNormalizeFast (Vector& vec)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
|
|
// FLT_EPSILON is added to the radius to eliminate the possibility of divide by zero.
|
|
float iradius = 1.f / ( sqrtf(vec.x*vec.x + vec.y*vec.y + vec.z*vec.z) + FLT_EPSILON );
|
|
|
|
vec.x *= iradius;
|
|
vec.y *= iradius;
|
|
vec.z *= iradius;
|
|
|
|
}
|
|
|
|
float _InvRSquared(const float* v)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
float r2 = DotProduct(v, v);
|
|
return r2 < 1.f ? 1.f : 1/r2;
|
|
}
|
|
|
|
// Math routines done in optimized assembly math package routines
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// 3D Now Implementations of optimized routines:
|
|
//-----------------------------------------------------------------------------
|
|
float _3DNow_Sqrt(float x)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
float root = 0.f;
|
|
#ifdef _WIN32
|
|
_asm
|
|
{
|
|
femms
|
|
movd mm0, x
|
|
PFRSQRT (mm1,mm0)
|
|
punpckldq mm0, mm0
|
|
PFMUL (mm0, mm1)
|
|
movd root, mm0
|
|
femms
|
|
}
|
|
#elif _LINUX
|
|
__asm __volatile__( "femms" );
|
|
__asm __volatile__
|
|
(
|
|
"pfrsqrt %y0, %y1 \n\t"
|
|
"punpckldq %y1, %y1 \n\t"
|
|
"pfmul %y1, %y0 \n\t"
|
|
: "=y" (root), "=y" (x)
|
|
:"0" (x)
|
|
);
|
|
__asm __volatile__( "femms" );
|
|
#else
|
|
#error
|
|
#endif
|
|
|
|
return root;
|
|
}
|
|
|
|
// NJS FIXME: Need to test Recripricol squareroot performance and accuraccy
|
|
// on AMD's before using the specialized instruction.
|
|
float _3DNow_RSqrt(float x)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
|
|
return 1.f / _3DNow_Sqrt(x);
|
|
}
|
|
|
|
|
|
float FASTCALL _3DNow_VectorNormalize (Vector& vec)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
float *v = &vec[0];
|
|
float radius = 0.f;
|
|
|
|
if ( v[0] || v[1] || v[2] )
|
|
{
|
|
#ifdef _WIN32
|
|
_asm
|
|
{
|
|
mov eax, v
|
|
femms
|
|
movq mm0, QWORD PTR [eax]
|
|
movd mm1, DWORD PTR [eax+8]
|
|
movq mm2, mm0
|
|
movq mm3, mm1
|
|
PFMUL (mm0, mm0)
|
|
PFMUL (mm1, mm1)
|
|
PFACC (mm0, mm0)
|
|
PFADD (mm1, mm0)
|
|
PFRSQRT (mm0, mm1)
|
|
punpckldq mm1, mm1
|
|
PFMUL (mm1, mm0)
|
|
PFMUL (mm2, mm0)
|
|
PFMUL (mm3, mm0)
|
|
movq QWORD PTR [eax], mm2
|
|
movd DWORD PTR [eax+8], mm3
|
|
movd radius, mm1
|
|
femms
|
|
}
|
|
#elif _LINUX
|
|
long long a,c;
|
|
int b,d;
|
|
memcpy(&a,&vec[0],sizeof(a));
|
|
memcpy(&b,&vec[2],sizeof(b));
|
|
memcpy(&c,&vec[0],sizeof(c));
|
|
memcpy(&d,&vec[2],sizeof(d));
|
|
|
|
__asm __volatile__( "femms" );
|
|
__asm __volatile__
|
|
(
|
|
"pfmul %y3, %y3\n\t"
|
|
"pfmul %y0, %y0 \n\t"
|
|
"pfacc %y3, %y3 \n\t"
|
|
"pfadd %y3, %y0 \n\t"
|
|
"pfrsqrt %y0, %y3 \n\t"
|
|
"punpckldq %y0, %y0 \n\t"
|
|
"pfmul %y3, %y0 \n\t"
|
|
"pfmul %y3, %y2 \n\t"
|
|
"pfmul %y3, %y1 \n\t"
|
|
: "=y" (radius), "=y" (c), "=y" (d)
|
|
: "y" (a), "0" (b), "1" (c), "2" (d)
|
|
);
|
|
memcpy(&vec[0],&c,sizeof(c));
|
|
memcpy(&vec[2],&d,sizeof(d));
|
|
__asm __volatile__( "femms" );
|
|
|
|
#else
|
|
#error
|
|
#endif
|
|
}
|
|
return radius;
|
|
}
|
|
|
|
void FASTCALL _3DNow_VectorNormalizeFast (Vector& vec)
|
|
{
|
|
_3DNow_VectorNormalize( vec );
|
|
}
|
|
|
|
// JAY: This complains with the latest processor pack
|
|
#ifdef _MSC_VER
|
|
#pragma warning(disable: 4730)
|
|
#endif
|
|
|
|
float _3DNow_InvRSquared(const float* v)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
float r2 = 1.f;
|
|
#ifdef _WIN32
|
|
_asm { // AMD 3DNow only routine
|
|
mov eax, v
|
|
femms
|
|
movq mm0, QWORD PTR [eax]
|
|
movd mm1, DWORD PTR [eax+8]
|
|
movd mm2, [r2]
|
|
PFMUL (mm0, mm0)
|
|
PFMUL (mm1, mm1)
|
|
PFACC (mm0, mm0)
|
|
PFADD (mm1, mm0)
|
|
PFMAX (mm1, mm2)
|
|
PFRCP (mm0, mm1)
|
|
movd [r2], mm0
|
|
femms
|
|
}
|
|
#elif _LINUX
|
|
long long a,c;
|
|
int b;
|
|
memcpy(&a,&v[0],sizeof(a));
|
|
memcpy(&b,&v[2],sizeof(b));
|
|
memcpy(&c,&v[0],sizeof(c));
|
|
|
|
__asm __volatile__( "femms" );
|
|
__asm __volatile__
|
|
(
|
|
"PFMUL %y2, %y2 \n\t"
|
|
"PFMUL %y3, %y3 \n\t"
|
|
"PFACC %y2, %y2 \n\t"
|
|
"PFADD %y2, %y3 \n\t"
|
|
"PFMAX %y3, %y4 \n\t"
|
|
"PFRCP %y3, %y2 \n\t"
|
|
"movq %y2, %y0 \n\t"
|
|
: "=y" (r2)
|
|
: "0" (r2), "y" (a), "y" (b), "y" (c)
|
|
);
|
|
__asm __volatile__( "femms" );
|
|
#else
|
|
#error
|
|
#endif
|
|
|
|
return r2;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// SSE implementations of optimized routines:
|
|
//-----------------------------------------------------------------------------
|
|
float _SSE_Sqrt(float x)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
float root = 0.f;
|
|
#ifdef _WIN32
|
|
_asm
|
|
{
|
|
sqrtss xmm0, x
|
|
movss root, xmm0
|
|
}
|
|
#elif _LINUX
|
|
__asm__ __volatile__(
|
|
"movss %1,%%xmm2\n"
|
|
"sqrtss %%xmm2,%%xmm1\n"
|
|
"movss %%xmm1,%0"
|
|
: "=m" (root)
|
|
: "m" (x)
|
|
);
|
|
#endif
|
|
return root;
|
|
}
|
|
|
|
// Single iteration NewtonRaphson reciprocal square root:
|
|
// 0.5 * rsqrtps * (3 - x * rsqrtps(x) * rsqrtps(x))
|
|
// Very low error, and fine to use in place of 1.f / sqrtf(x).
|
|
#if 0
|
|
float _SSE_RSqrtAccurate(float x)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
|
|
float rroot;
|
|
_asm
|
|
{
|
|
rsqrtss xmm0, x
|
|
movss rroot, xmm0
|
|
}
|
|
|
|
return (0.5f * rroot) * (3.f - (x * rroot) * rroot);
|
|
}
|
|
#else
|
|
|
|
// Intel / Kipps SSE RSqrt. Significantly faster than above.
|
|
inline float _SSE_RSqrtAccurate(float a)
|
|
{
|
|
float x;
|
|
float half = 0.5f;
|
|
float three = 3.f;
|
|
|
|
#ifdef _WIN32
|
|
__asm
|
|
{
|
|
movss xmm3, a;
|
|
movss xmm1, half;
|
|
movss xmm2, three;
|
|
rsqrtss xmm0, xmm3;
|
|
|
|
mulss xmm3, xmm0;
|
|
mulss xmm1, xmm0;
|
|
mulss xmm3, xmm0;
|
|
subss xmm2, xmm3;
|
|
mulss xmm1, xmm2;
|
|
|
|
movss x, xmm1;
|
|
}
|
|
#elif _LINUX
|
|
__asm__ __volatile__(
|
|
"movss %1, %%xmm3 \n\t"
|
|
"movss %2, %%xmm1 \n\t"
|
|
"movss %3, %%xmm2 \n\t"
|
|
"rsqrtss %%xmm3, %%xmm0 \n\t"
|
|
"mulss %%xmm0, %%xmm3 \n\t"
|
|
"mulss %%xmm0, %%xmm1 \n\t"
|
|
"mulss %%xmm0, %%xmm3 \n\t"
|
|
"subss %%xmm3, %%xmm2 \n\t"
|
|
"mulss %%xmm2, %%xmm1 \n\t"
|
|
"movss %%xmm1, %0 \n\t"
|
|
: "=m" (x)
|
|
: "m" (a), "m" (half), "m" (three)
|
|
);
|
|
#else
|
|
#error
|
|
#endif
|
|
|
|
return x;
|
|
}
|
|
#endif
|
|
|
|
// Simple SSE rsqrt. Usually accurate to around 6 (relative) decimal places
|
|
// or so, so ok for closed transforms. (ie, computing lighting normals)
|
|
inline float _SSE_RSqrtFast(float x)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
|
|
float rroot;
|
|
#ifdef _WIN32
|
|
_asm
|
|
{
|
|
rsqrtss xmm0, x
|
|
movss rroot, xmm0
|
|
}
|
|
#elif _LINUX
|
|
__asm__ __volatile__(
|
|
"rsqrtss %1, %%xmm0 \n\t"
|
|
"movss %%xmm0, %0 \n\t"
|
|
: "=m" (x)
|
|
: "m" (rroot)
|
|
: "%xmm0"
|
|
);
|
|
#else
|
|
#error
|
|
#endif
|
|
|
|
return rroot;
|
|
}
|
|
|
|
float FASTCALL _SSE_VectorNormalize (Vector& vec)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
|
|
// NOTE: This is necessary to prevent an memory overwrite...
|
|
// sice vec only has 3 floats, we can't "movaps" directly into it.
|
|
#ifdef _WIN32
|
|
__declspec(align(16)) float result[4];
|
|
#elif _LINUX
|
|
__attribute__((aligned(16))) float result[4];
|
|
#endif
|
|
|
|
float *v = &vec[0];
|
|
#ifdef _WIN32
|
|
float *r = &result[0];
|
|
#endif
|
|
|
|
float radius = 0.f;
|
|
// Blah, get rid of these comparisons ... in reality, if you have all 3 as zero, it shouldn't
|
|
// be much of a performance win, considering you will very likely miss 3 branch predicts in a row.
|
|
if ( v[0] || v[1] || v[2] )
|
|
{
|
|
#ifdef _WIN32
|
|
_asm
|
|
{
|
|
mov eax, v
|
|
mov edx, r
|
|
#ifdef ALIGNED_VECTOR
|
|
movaps xmm4, [eax] // r4 = vx, vy, vz, X
|
|
movaps xmm1, xmm4 // r1 = r4
|
|
#else
|
|
movups xmm4, [eax] // r4 = vx, vy, vz, X
|
|
movaps xmm1, xmm4 // r1 = r4
|
|
#endif
|
|
mulps xmm1, xmm4 // r1 = vx * vx, vy * vy, vz * vz, X
|
|
movhlps xmm3, xmm1 // r3 = vz * vz, X, X, X
|
|
movaps xmm2, xmm1 // r2 = r1
|
|
shufps xmm2, xmm2, 1 // r2 = vy * vy, X, X, X
|
|
addss xmm1, xmm2 // r1 = (vx * vx) + (vy * vy), X, X, X
|
|
addss xmm1, xmm3 // r1 = (vx * vx) + (vy * vy) + (vz * vz), X, X, X
|
|
sqrtss xmm1, xmm1 // r1 = sqrt((vx * vx) + (vy * vy) + (vz * vz)), X, X, X
|
|
movss radius, xmm1 // radius = sqrt((vx * vx) + (vy * vy) + (vz * vz))
|
|
rcpss xmm1, xmm1 // r1 = 1/radius, X, X, X
|
|
shufps xmm1, xmm1, 0 // r1 = 1/radius, 1/radius, 1/radius, X
|
|
mulps xmm4, xmm1 // r4 = vx * 1/radius, vy * 1/radius, vz * 1/radius, X
|
|
movaps [edx], xmm4 // v = vx * 1/radius, vy * 1/radius, vz * 1/radius, X
|
|
}
|
|
#elif _LINUX
|
|
__asm__ __volatile__(
|
|
#ifdef ALIGNED_VECTOR
|
|
"movaps %2, %%xmm4 \n\t"
|
|
"movaps %%xmm4, %%xmm1 \n\t"
|
|
#else
|
|
"movups %2, %%xmm4 \n\t"
|
|
"movaps %%xmm4, %%xmm1 \n\t"
|
|
#endif
|
|
"mulps %%xmm4, %%xmm1 \n\t"
|
|
"movhlps %%xmm1, %%xmm3 \n\t"
|
|
"movaps %%xmm1, %%xmm2 \n\t"
|
|
"shufps $1, %%xmm2, %%xmm2 \n\t"
|
|
"addss %%xmm2, %%xmm1 \n\t"
|
|
"addss %%xmm3, %%xmm1 \n\t"
|
|
"sqrtss %%xmm1, %%xmm1 \n\t"
|
|
"movss %%xmm1, %0 \n\t"
|
|
"rcpss %%xmm1, %%xmm1 \n\t"
|
|
"shufps $0, %%xmm1, %%xmm1 \n\t"
|
|
"mulps %%xmm1, %%xmm4 \n\t"
|
|
"movaps %%xmm4, %1 \n\t"
|
|
: "=m" (radius), "=m" (result)
|
|
: "m" (*v)
|
|
);
|
|
#else
|
|
#error
|
|
#endif
|
|
vec.x = result[0];
|
|
vec.y = result[1];
|
|
vec.z = result[2];
|
|
|
|
}
|
|
|
|
return radius;
|
|
}
|
|
|
|
void FASTCALL _SSE_VectorNormalizeFast (Vector& vec)
|
|
{
|
|
float ool = _SSE_RSqrtAccurate( FLT_EPSILON + vec.x * vec.x + vec.y * vec.y + vec.z * vec.z );
|
|
|
|
vec.x *= ool;
|
|
vec.y *= ool;
|
|
vec.z *= ool;
|
|
}
|
|
|
|
float _SSE_InvRSquared(const float* v)
|
|
{
|
|
float inv_r2 = 1.f;
|
|
#ifdef _WIN32
|
|
_asm { // Intel SSE only routine
|
|
mov eax, v
|
|
movss xmm5, inv_r2 // x5 = 1.0, 0, 0, 0
|
|
#ifdef ALIGNED_VECTOR
|
|
movaps xmm4, [eax] // x4 = vx, vy, vz, X
|
|
#else
|
|
movups xmm4, [eax] // x4 = vx, vy, vz, X
|
|
#endif
|
|
movaps xmm1, xmm4 // x1 = x4
|
|
mulps xmm1, xmm4 // x1 = vx * vx, vy * vy, vz * vz, X
|
|
movhlps xmm3, xmm1 // x3 = vz * vz, X, X, X
|
|
movaps xmm2, xmm1 // x2 = x1
|
|
shufps xmm2, xmm2, 1 // x2 = vy * vy, X, X, X
|
|
addss xmm1, xmm2 // x1 = (vx * vx) + (vy * vy), X, X, X
|
|
addss xmm1, xmm3 // x1 = (vx * vx) + (vy * vy) + (vz * vz), X, X, X
|
|
maxss xmm1, xmm5 // x1 = MAX( 1.0, x1 )
|
|
rcpss xmm0, xmm1 // x0 = 1 / MAX( 1.0, x1 )
|
|
movss inv_r2, xmm0 // inv_r2 = x0
|
|
}
|
|
#elif _LINUX
|
|
__asm__ __volatile__(
|
|
#ifdef ALIGNED_VECTOR
|
|
"movaps %1, %%xmm4 \n\t"
|
|
#else
|
|
"movups %1, %%xmm4 \n\t"
|
|
#endif
|
|
"movaps %%xmm4, %%xmm1 \n\t"
|
|
"mulps %%xmm4, %%xmm1 \n\t"
|
|
"movhlps %%xmm1, %%xmm3 \n\t"
|
|
"movaps %%xmm1, %%xmm2 \n\t"
|
|
"shufps $1, %%xmm2, %%xmm2 \n\t"
|
|
"addss %%xmm2, %%xmm1 \n\t"
|
|
"addss %%xmm3, %%xmm1 \n\t"
|
|
"maxss %%xmm5, %%xmm1 \n\t"
|
|
"rcpss %%xmm1, %%xmm0 \n\t"
|
|
"movss %%xmm0, %0 \n\t"
|
|
: "=m" (inv_r2)
|
|
: "m" (*v), "m" (inv_r2)
|
|
);
|
|
|
|
#else
|
|
#error
|
|
#endif
|
|
|
|
return inv_r2;
|
|
}
|
|
|
|
#ifdef _WIN32
|
|
|
|
void _SSE_SinCos(float x, float* s, float* c)
|
|
{
|
|
|
|
float t4, t8, t12;
|
|
|
|
__asm
|
|
{
|
|
movss xmm0, x
|
|
movss t12, xmm0
|
|
movss xmm1, _ps_am_inv_sign_mask
|
|
mov eax, t12
|
|
mulss xmm0, _ps_am_2_o_pi
|
|
andps xmm0, xmm1
|
|
and eax, 0x80000000
|
|
|
|
cvttss2si edx, xmm0
|
|
mov ecx, edx
|
|
mov t12, esi
|
|
mov esi, edx
|
|
add edx, 0x1
|
|
shl ecx, (31 - 1)
|
|
shl edx, (31 - 1)
|
|
|
|
movss xmm4, _ps_am_1
|
|
cvtsi2ss xmm3, esi
|
|
mov t8, eax
|
|
and esi, 0x1
|
|
|
|
subss xmm0, xmm3
|
|
movss xmm3, _sincos_inv_masks[esi * 4]
|
|
minss xmm0, xmm4
|
|
|
|
subss xmm4, xmm0
|
|
|
|
movss xmm6, xmm4
|
|
andps xmm4, xmm3
|
|
and ecx, 0x80000000
|
|
movss xmm2, xmm3
|
|
andnps xmm3, xmm0
|
|
and edx, 0x80000000
|
|
movss xmm7, t8
|
|
andps xmm0, xmm2
|
|
mov t8, ecx
|
|
mov t4, edx
|
|
orps xmm4, xmm3
|
|
|
|
mov eax, s //mov eax, [esp + 4 + 16]
|
|
mov edx, c //mov edx, [esp + 4 + 16 + 4]
|
|
|
|
andnps xmm2, xmm6
|
|
orps xmm0, xmm2
|
|
|
|
movss xmm2, t8
|
|
movss xmm1, xmm0
|
|
movss xmm5, xmm4
|
|
xorps xmm7, xmm2
|
|
movss xmm3, _ps_sincos_p3
|
|
mulss xmm0, xmm0
|
|
mulss xmm4, xmm4
|
|
movss xmm2, xmm0
|
|
movss xmm6, xmm4
|
|
orps xmm1, xmm7
|
|
movss xmm7, _ps_sincos_p2
|
|
mulss xmm0, xmm3
|
|
mulss xmm4, xmm3
|
|
movss xmm3, _ps_sincos_p1
|
|
addss xmm0, xmm7
|
|
addss xmm4, xmm7
|
|
movss xmm7, _ps_sincos_p0
|
|
mulss xmm0, xmm2
|
|
mulss xmm4, xmm6
|
|
addss xmm0, xmm3
|
|
addss xmm4, xmm3
|
|
movss xmm3, t4
|
|
mulss xmm0, xmm2
|
|
mulss xmm4, xmm6
|
|
orps xmm5, xmm3
|
|
mov esi, t12
|
|
addss xmm0, xmm7
|
|
addss xmm4, xmm7
|
|
mulss xmm0, xmm1
|
|
mulss xmm4, xmm5
|
|
|
|
// use full stores since caller might reload with full loads
|
|
movss [eax], xmm0
|
|
movss [edx], xmm4
|
|
|
|
}
|
|
}
|
|
|
|
|
|
float _SSE_cos( float x)
|
|
{
|
|
float temp;
|
|
__asm
|
|
{
|
|
movss xmm0, x
|
|
movss xmm1, _ps_am_inv_sign_mask
|
|
andps xmm0, xmm1
|
|
addss xmm0, _ps_am_pi_o_2
|
|
mulss xmm0, _ps_am_2_o_pi
|
|
|
|
cvttss2si ecx, xmm0
|
|
movss xmm5, _ps_am_1
|
|
mov edx, ecx
|
|
shl edx, (31 - 1)
|
|
cvtsi2ss xmm1, ecx
|
|
and edx, 0x80000000
|
|
and ecx, 0x1
|
|
|
|
subss xmm0, xmm1
|
|
movss xmm6, _sincos_masks[ecx * 4]
|
|
minss xmm0, xmm5
|
|
|
|
movss xmm1, _ps_sincos_p3
|
|
subss xmm5, xmm0
|
|
|
|
andps xmm5, xmm6
|
|
movss xmm7, _ps_sincos_p2
|
|
andnps xmm6, xmm0
|
|
mov temp, edx
|
|
orps xmm5, xmm6
|
|
movss xmm0, xmm5
|
|
|
|
mulss xmm5, xmm5
|
|
movss xmm4, _ps_sincos_p1
|
|
movss xmm2, xmm5
|
|
mulss xmm5, xmm1
|
|
movss xmm1, _ps_sincos_p0
|
|
addss xmm5, xmm7
|
|
mulss xmm5, xmm2
|
|
movss xmm3, temp
|
|
addss xmm5, xmm4
|
|
mulss xmm5, xmm2
|
|
orps xmm0, xmm3
|
|
addss xmm5, xmm1
|
|
mulss xmm0, xmm5
|
|
|
|
movss x, xmm0
|
|
|
|
}
|
|
|
|
return x;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// SSE2 implementations of optimized routines:
|
|
//-----------------------------------------------------------------------------
|
|
#ifndef _XBOX
|
|
void _SSE2_SinCos(float x, float* s, float* c) // any x
|
|
{
|
|
__asm
|
|
{
|
|
movss xmm0, x
|
|
movaps xmm7, xmm0
|
|
movss xmm1, _ps_am_inv_sign_mask
|
|
movss xmm2, _ps_am_sign_mask
|
|
movss xmm3, _ps_am_2_o_pi
|
|
andps xmm0, xmm1
|
|
andps xmm7, xmm2
|
|
mulss xmm0, xmm3
|
|
|
|
pxor xmm3, xmm3
|
|
movd xmm5, _epi32_1
|
|
movss xmm4, _ps_am_1
|
|
|
|
cvttps2dq xmm2, xmm0
|
|
pand xmm5, xmm2
|
|
movd xmm1, _epi32_2
|
|
pcmpeqd xmm5, xmm3
|
|
movd xmm3, _epi32_1
|
|
cvtdq2ps xmm6, xmm2
|
|
paddd xmm3, xmm2
|
|
pand xmm2, xmm1
|
|
pand xmm3, xmm1
|
|
subss xmm0, xmm6
|
|
pslld xmm2, (31 - 1)
|
|
minss xmm0, xmm4
|
|
|
|
mov eax, s // mov eax, [esp + 4 + 16]
|
|
mov edx, c // mov edx, [esp + 4 + 16 + 4]
|
|
|
|
subss xmm4, xmm0
|
|
pslld xmm3, (31 - 1)
|
|
|
|
movaps xmm6, xmm4
|
|
xorps xmm2, xmm7
|
|
movaps xmm7, xmm5
|
|
andps xmm6, xmm7
|
|
andnps xmm7, xmm0
|
|
andps xmm0, xmm5
|
|
andnps xmm5, xmm4
|
|
movss xmm4, _ps_sincos_p3
|
|
orps xmm6, xmm7
|
|
orps xmm0, xmm5
|
|
movss xmm5, _ps_sincos_p2
|
|
|
|
movaps xmm1, xmm0
|
|
movaps xmm7, xmm6
|
|
mulss xmm0, xmm0
|
|
mulss xmm6, xmm6
|
|
orps xmm1, xmm2
|
|
orps xmm7, xmm3
|
|
movaps xmm2, xmm0
|
|
movaps xmm3, xmm6
|
|
mulss xmm0, xmm4
|
|
mulss xmm6, xmm4
|
|
movss xmm4, _ps_sincos_p1
|
|
addss xmm0, xmm5
|
|
addss xmm6, xmm5
|
|
movss xmm5, _ps_sincos_p0
|
|
mulss xmm0, xmm2
|
|
mulss xmm6, xmm3
|
|
addss xmm0, xmm4
|
|
addss xmm6, xmm4
|
|
mulss xmm0, xmm2
|
|
mulss xmm6, xmm3
|
|
addss xmm0, xmm5
|
|
addss xmm6, xmm5
|
|
mulss xmm0, xmm1
|
|
mulss xmm6, xmm7
|
|
|
|
// use full stores since caller might reload with full loads
|
|
movss [eax], xmm0
|
|
movss [edx], xmm6
|
|
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#ifndef _XBOX
|
|
float _SSE2_cos(float x)
|
|
{
|
|
__asm
|
|
{
|
|
movss xmm0, x
|
|
movss xmm1, _ps_am_inv_sign_mask
|
|
movss xmm2, _ps_am_pi_o_2
|
|
movss xmm3, _ps_am_2_o_pi
|
|
andps xmm0, xmm1
|
|
addss xmm0, xmm2
|
|
mulss xmm0, xmm3
|
|
|
|
pxor xmm3, xmm3
|
|
movd xmm5, _epi32_1
|
|
movss xmm4, _ps_am_1
|
|
cvttps2dq xmm2, xmm0
|
|
pand xmm5, xmm2
|
|
movd xmm1, _epi32_2
|
|
pcmpeqd xmm5, xmm3
|
|
cvtdq2ps xmm6, xmm2
|
|
pand xmm2, xmm1
|
|
pslld xmm2, (31 - 1)
|
|
|
|
subss xmm0, xmm6
|
|
movss xmm3, _ps_sincos_p3
|
|
minss xmm0, xmm4
|
|
subss xmm4, xmm0
|
|
andps xmm0, xmm5
|
|
andnps xmm5, xmm4
|
|
orps xmm0, xmm5
|
|
|
|
movaps xmm1, xmm0
|
|
movss xmm4, _ps_sincos_p2
|
|
mulss xmm0, xmm0
|
|
movss xmm5, _ps_sincos_p1
|
|
orps xmm1, xmm2
|
|
movaps xmm7, xmm0
|
|
mulss xmm0, xmm3
|
|
movss xmm6, _ps_sincos_p0
|
|
addss xmm0, xmm4
|
|
mulss xmm0, xmm7
|
|
addss xmm0, xmm5
|
|
mulss xmm0, xmm7
|
|
addss xmm0, xmm6
|
|
mulss xmm0, xmm1
|
|
movss x, xmm0
|
|
}
|
|
|
|
return x;
|
|
}
|
|
#endif
|
|
#elif _LINUX
|
|
#warning "_SSE_sincos,_SSE_cos,_SSE2_cos,_SSE_sincos NOT implemented!"
|
|
#else
|
|
#error
|
|
#endif
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Function pointers selecting the appropriate implementation from above:
|
|
//-----------------------------------------------------------------------------
|
|
float (*pfSqrt)(float x) = _sqrtf;
|
|
float (*pfRSqrt)(float x) = _rsqrtf;
|
|
float (*pfRSqrtFast)(float x) = _rsqrtf;
|
|
float (FASTCALL *pfVectorNormalize)(Vector& v) = _VectorNormalize;
|
|
void (FASTCALL *pfVectorNormalizeFast)(Vector& v) = _VectorNormalizeFast;
|
|
float (*pfInvRSquared)(const float* v) = _InvRSquared;
|
|
void (*pfFastSinCos)(float x, float* s, float* c) = SinCos;
|
|
float (*pfFastCos)(float x) = cosf;
|
|
|
|
|
|
float SinCosTable[SIN_TABLE_SIZE];
|
|
|
|
void InitSinCosTable()
|
|
{
|
|
for( int i = 0; i < SIN_TABLE_SIZE; i++ )
|
|
{
|
|
SinCosTable[i] = sin(i * 2.0 * M_PI / SIN_TABLE_SIZE);
|
|
}
|
|
}
|
|
|
|
#ifdef PARANOID
|
|
/*
|
|
==================
|
|
BOPS_Error
|
|
|
|
Split out like this for ASM to call.
|
|
==================
|
|
*/
|
|
void BOPS_Error (void)
|
|
{
|
|
Sys_Error ("BoxOnPlaneSide: Bad signbits");
|
|
}
|
|
#endif
|
|
|
|
|
|
#if !id386 && !_LINUX
|
|
|
|
/*
|
|
==================
|
|
BoxOnPlaneSide
|
|
|
|
Returns 1, 2, or 1 + 2
|
|
==================
|
|
*/
|
|
int BoxOnPlaneSide (const float *emins, const float *emaxs, const cplane_t *p)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
float dist1, dist2;
|
|
int sides;
|
|
|
|
#if 0 // this is done by the BOX_ON_PLANE_SIDE macro before calling this
|
|
// function
|
|
// fast axial cases
|
|
if (p->type < 3)
|
|
{
|
|
if (p->dist <= emins[p->type])
|
|
return 1;
|
|
if (p->dist >= emaxs[p->type])
|
|
return 2;
|
|
return 3;
|
|
}
|
|
#endif
|
|
|
|
// general case
|
|
switch (p->signbits)
|
|
{
|
|
case 0:
|
|
dist1 = p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
|
|
dist2 = p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
|
|
break;
|
|
case 1:
|
|
dist1 = p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
|
|
dist2 = p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
|
|
break;
|
|
case 2:
|
|
dist1 = p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
|
|
dist2 = p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
|
|
break;
|
|
case 3:
|
|
dist1 = p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
|
|
dist2 = p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
|
|
break;
|
|
case 4:
|
|
dist1 = p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
|
|
dist2 = p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
|
|
break;
|
|
case 5:
|
|
dist1 = p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
|
|
dist2 = p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
|
|
break;
|
|
case 6:
|
|
dist1 = p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
|
|
dist2 = p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
|
|
break;
|
|
case 7:
|
|
dist1 = p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
|
|
dist2 = p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
|
|
break;
|
|
default:
|
|
dist1 = dist2 = 0; // shut up compiler
|
|
#ifdef PARANOID
|
|
BOPS_Error ();
|
|
#endif
|
|
break;
|
|
}
|
|
|
|
#if 0
|
|
int i;
|
|
float corners[2][3];
|
|
|
|
for (i=0 ; i<3 ; i++)
|
|
{
|
|
if (plane->normal[i] < 0)
|
|
{
|
|
corners[0][i] = emins[i];
|
|
corners[1][i] = emaxs[i];
|
|
}
|
|
else
|
|
{
|
|
corners[1][i] = emins[i];
|
|
corners[0][i] = emaxs[i];
|
|
}
|
|
}
|
|
dist = DotProduct (plane->normal, corners[0]) - plane->dist;
|
|
dist2 = DotProduct (plane->normal, corners[1]) - plane->dist;
|
|
sides = 0;
|
|
if (dist1 >= 0)
|
|
sides = 1;
|
|
if (dist2 < 0)
|
|
sides |= 2;
|
|
|
|
#endif
|
|
|
|
sides = 0;
|
|
if (dist1 >= p->dist)
|
|
sides = 1;
|
|
if (dist2 < p->dist)
|
|
sides |= 2;
|
|
|
|
#ifdef PARANOID
|
|
if (sides == 0)
|
|
Sys_Error ("BoxOnPlaneSide: sides==0");
|
|
#endif
|
|
|
|
return sides;
|
|
}
|
|
|
|
#endif
|
|
//#endif
|
|
|
|
qboolean VectorsEqual( const float *v1, const float *v2 )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
return ( ( v1[0] == v2[0] ) &&
|
|
( v1[1] == v2[1] ) &&
|
|
( v1[2] == v2[2] ) );
|
|
}
|
|
|
|
|
|
|
|
int LineSphereIntersection(
|
|
const Vector &vSphereCenter,
|
|
const float fSphereRadius,
|
|
const Vector &vLinePt,
|
|
const Vector &vLineDir,
|
|
float *fIntersection1,
|
|
float *fIntersection2)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
// Line = P + Vt
|
|
// Sphere = r (assume we've translated to origin)
|
|
// (P + Vt)^2 = r^2
|
|
// VVt^2 + 2PVt + (PP - r^2)
|
|
// Solve as quadratic: (-b +/- sqrt(b^2 - 4ac)) / 2a
|
|
// If (b^2 - 4ac) is < 0 there is no solution.
|
|
// If (b^2 - 4ac) is = 0 there is one solution (a case this function doesn't support).
|
|
// If (b^2 - 4ac) is > 0 there are two solutions.
|
|
Vector P;
|
|
float a, b, c, sqr, insideSqr;
|
|
|
|
|
|
// Translate sphere to origin.
|
|
P[0] = vLinePt[0] - vSphereCenter[0];
|
|
P[1] = vLinePt[1] - vSphereCenter[1];
|
|
P[2] = vLinePt[2] - vSphereCenter[2];
|
|
|
|
a = vLineDir.Dot(vLineDir);
|
|
b = 2.0f * P.Dot(vLineDir);
|
|
c = P.Dot(P) - (fSphereRadius * fSphereRadius);
|
|
|
|
insideSqr = b*b - 4*a*c;
|
|
if(insideSqr <= 0.000001f)
|
|
return 0;
|
|
|
|
// Ok, two solutions.
|
|
sqr = (float)sqrt(insideSqr);
|
|
|
|
*fIntersection1 = (-b + sqr) / (2.0f * a);
|
|
*fIntersection2 = (-b - sqr) / (2.0f * a);
|
|
|
|
return 1;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Purpose: Generates Euler angles given a left-handed orientation matrix. The
|
|
// columns of the matrix contain the forward, left, and up vectors.
|
|
// Input : matrix - Left-handed orientation matrix.
|
|
// angles[PITCH, YAW, ROLL]. Receives right-handed counterclockwise
|
|
// rotations in degrees around Y, Z, and X respectively.
|
|
//-----------------------------------------------------------------------------
|
|
|
|
void MatrixAngles( const matrix3x4_t& matrix, RadianEuler &angles, Vector &position )
|
|
{
|
|
MatrixGetColumn( matrix, 3, position );
|
|
MatrixAngles( matrix, angles );
|
|
}
|
|
|
|
void MatrixAngles( const matrix3x4_t &matrix, Quaternion &q, Vector &pos )
|
|
{
|
|
#ifdef _VPROF_MATHLIB
|
|
VPROF_BUDGET( "MatrixQuaternion", "Mathlib" );
|
|
#endif
|
|
float trace;
|
|
trace = matrix[0][0] + matrix[1][1] + matrix[2][2] + 1.0f;
|
|
if( trace > 1.0f + FLT_EPSILON )
|
|
{
|
|
// VPROF_INCREMENT_COUNTER("MatrixQuaternion A",1);
|
|
q.x = ( matrix[2][1] - matrix[1][2] );
|
|
q.y = ( matrix[0][2] - matrix[2][0] );
|
|
q.z = ( matrix[1][0] - matrix[0][1] );
|
|
q.w = trace;
|
|
}
|
|
else if ( matrix[0][0] > matrix[1][1] && matrix[0][0] > matrix[2][2] )
|
|
{
|
|
// VPROF_INCREMENT_COUNTER("MatrixQuaternion B",1);
|
|
trace = 1.0f + matrix[0][0] - matrix[1][1] - matrix[2][2];
|
|
q.x = trace;
|
|
q.y = (matrix[1][0] + matrix[0][1] );
|
|
q.z = (matrix[0][2] + matrix[2][0] );
|
|
q.w = (matrix[2][1] - matrix[1][2] );
|
|
}
|
|
else if (matrix[1][1] > matrix[2][2])
|
|
{
|
|
// VPROF_INCREMENT_COUNTER("MatrixQuaternion C",1);
|
|
trace = 1.0f + matrix[1][1] - matrix[0][0] - matrix[2][2];
|
|
q.x = (matrix[0][1] + matrix[1][0] );
|
|
q.y = trace;
|
|
q.z = (matrix[2][1] + matrix[1][2] );
|
|
q.w = (matrix[0][2] - matrix[2][0] );
|
|
}
|
|
else
|
|
{
|
|
// VPROF_INCREMENT_COUNTER("MatrixQuaternion D",1);
|
|
trace = 1.0f + matrix[2][2] - matrix[0][0] - matrix[1][1];
|
|
q.x = (matrix[0][2] + matrix[2][0] );
|
|
q.y = (matrix[2][1] + matrix[1][2] );
|
|
q.z = trace;
|
|
q.w = (matrix[1][0] - matrix[0][1] );
|
|
}
|
|
|
|
QuaternionNormalize( q );
|
|
|
|
#if 0
|
|
// check against the angle version
|
|
RadianEuler ang;
|
|
MatrixAngles( matrix, ang );
|
|
Quaternion test;
|
|
AngleQuaternion( ang, test );
|
|
float d = QuaternionDotProduct( q, test );
|
|
Assert( fabs(d) > 0.99 && fabs(d) < 1.01 );
|
|
#endif
|
|
|
|
MatrixGetColumn( matrix, 3, pos );
|
|
}
|
|
|
|
void MatrixAngles( const matrix3x4_t& matrix, float *angles )
|
|
{
|
|
#ifdef _VPROF_MATHLIB
|
|
VPROF_BUDGET( "MatrixAngles", "Mathlib" );
|
|
#endif
|
|
Assert( s_bMathlibInitialized );
|
|
float forward[3];
|
|
float left[3];
|
|
float up[3];
|
|
|
|
//
|
|
// Extract the basis vectors from the matrix. Since we only need the Z
|
|
// component of the up vector, we don't get X and Y.
|
|
//
|
|
forward[0] = matrix[0][0];
|
|
forward[1] = matrix[1][0];
|
|
forward[2] = matrix[2][0];
|
|
left[0] = matrix[0][1];
|
|
left[1] = matrix[1][1];
|
|
left[2] = matrix[2][1];
|
|
up[2] = matrix[2][2];
|
|
|
|
float xyDist = sqrtf( forward[0] * forward[0] + forward[1] * forward[1] );
|
|
|
|
// enough here to get angles?
|
|
if ( xyDist > 0.001f )
|
|
{
|
|
// (yaw) y = ATAN( forward.y, forward.x ); -- in our space, forward is the X axis
|
|
angles[1] = RAD2DEG( atan2f( forward[1], forward[0] ) );
|
|
|
|
// The engine does pitch inverted from this, but we always end up negating it in the DLL
|
|
// UNDONE: Fix the engine to make it consistent
|
|
// (pitch) x = ATAN( -forward.z, sqrt(forward.x*forward.x+forward.y*forward.y) );
|
|
angles[0] = RAD2DEG( atan2f( -forward[2], xyDist ) );
|
|
|
|
// (roll) z = ATAN( left.z, up.z );
|
|
angles[2] = RAD2DEG( atan2f( left[2], up[2] ) );
|
|
}
|
|
else // forward is mostly Z, gimbal lock-
|
|
{
|
|
// (yaw) y = ATAN( -left.x, left.y ); -- forward is mostly z, so use right for yaw
|
|
angles[1] = RAD2DEG( atan2f( -left[0], left[1] ) );
|
|
|
|
// The engine does pitch inverted from this, but we always end up negating it in the DLL
|
|
// UNDONE: Fix the engine to make it consistent
|
|
// (pitch) x = ATAN( -forward.z, sqrt(forward.x*forward.x+forward.y*forward.y) );
|
|
angles[0] = RAD2DEG( atan2f( -forward[2], xyDist ) );
|
|
|
|
// Assume no roll in this case as one degree of freedom has been lost (i.e. yaw == roll)
|
|
angles[2] = 0;
|
|
}
|
|
}
|
|
|
|
#if 0
|
|
/*
|
|
=============
|
|
DecomposeRotation
|
|
=============
|
|
*/
|
|
// What is this for and why is this different from MatrixAngle?
|
|
void DecomposeRotation( const matrix3x4_t& mat, float* out )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
float cp;
|
|
qboolean fixYaw = 0;
|
|
qboolean fixRoll = 0;
|
|
|
|
// Start off in radians
|
|
out[ PITCH ] = asin( -mat[ 0 ][ 2 ] );
|
|
|
|
cp = cos( out[ PITCH ] );
|
|
if( cp )
|
|
{
|
|
float cy = mat[ 0 ][ 0 ] / cp;
|
|
float cr = mat[ 2 ][ 2 ] / cp;
|
|
|
|
// fix xome rounding error
|
|
if( cy > 1.0 ) cy = 1.0;
|
|
else if( cy < -1.0 ) cy = -1.0;
|
|
if( cr > 1.0 ) cr = 1.0;
|
|
else if( cr < -1.0 ) cr = -1.0;
|
|
|
|
out[ YAW ] = acos( cy );
|
|
out[ ROLL ] = acos( cr );
|
|
}
|
|
else
|
|
{
|
|
out[ YAW ] = 0;
|
|
out[ ROLL ] = acos( mat[ 1 ][ 1 ] );
|
|
}
|
|
|
|
// convert angles to degrees
|
|
out[ PITCH ] *= 180.0 / M_PI;
|
|
out[ YAW ] *= 180.0 / M_PI;
|
|
out[ ROLL ] *= 180.0 / M_PI;
|
|
|
|
// correct for quadrants
|
|
fixYaw = mat[ 0 ][ 1 ] / cp > 0;
|
|
fixRoll = ( mat[ 1 ][ 2 ] / -cp < 0 );
|
|
|
|
if( fixYaw )
|
|
out[ YAW ] = 360 - out[ YAW ];
|
|
if( fixRoll )
|
|
out[ ROLL ] = 360 - out[ ROLL ];
|
|
|
|
// normalize
|
|
#if 0
|
|
if( out[ PITCH ] < 0 )
|
|
out[ PITCH ] += 360;
|
|
if( out[ YAW ] < 0 )
|
|
out[ YAW ] += 360;
|
|
if( out[ ROLL ] < 0 )
|
|
out[ ROLL ] += 360;
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
|
|
void VectorTransform (const float *in1, const matrix3x4_t& in2, float *out)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Assert( in1 != out );
|
|
out[0] = DotProduct(in1, in2[0]) + in2[0][3];
|
|
out[1] = DotProduct(in1, in2[1]) + in2[1][3];
|
|
out[2] = DotProduct(in1, in2[2]) + in2[2][3];
|
|
}
|
|
|
|
// SSE Version of VectorTransform
|
|
void VectorTransformSSE(const float *in1, const matrix3x4_t& in2, float *out1)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Assert( in1 != out1 );
|
|
|
|
#ifdef _WIN32
|
|
__asm
|
|
{
|
|
mov eax, in1;
|
|
mov ecx, in2;
|
|
mov edx, out1;
|
|
|
|
movss xmm0, [eax];
|
|
mulss xmm0, [ecx];
|
|
movss xmm1, [eax+4];
|
|
mulss xmm1, [ecx+4];
|
|
movss xmm2, [eax+8];
|
|
mulss xmm2, [ecx+8];
|
|
addss xmm0, xmm1;
|
|
addss xmm0, xmm2;
|
|
addss xmm0, [ecx+12]
|
|
movss [edx], xmm0;
|
|
add ecx, 16;
|
|
|
|
movss xmm0, [eax];
|
|
mulss xmm0, [ecx];
|
|
movss xmm1, [eax+4];
|
|
mulss xmm1, [ecx+4];
|
|
movss xmm2, [eax+8];
|
|
mulss xmm2, [ecx+8];
|
|
addss xmm0, xmm1;
|
|
addss xmm0, xmm2;
|
|
addss xmm0, [ecx+12]
|
|
movss [edx+4], xmm0;
|
|
add ecx, 16;
|
|
|
|
movss xmm0, [eax];
|
|
mulss xmm0, [ecx];
|
|
movss xmm1, [eax+4];
|
|
mulss xmm1, [ecx+4];
|
|
movss xmm2, [eax+8];
|
|
mulss xmm2, [ecx+8];
|
|
addss xmm0, xmm1;
|
|
addss xmm0, xmm2;
|
|
addss xmm0, [ecx+12]
|
|
movss [edx+8], xmm0;
|
|
}
|
|
#elif _LINUX
|
|
#warning "VectorTransformSSE C implementation only"
|
|
out1[0] = DotProduct(in1, in2[0]) + in2[0][3];
|
|
out1[1] = DotProduct(in1, in2[1]) + in2[1][3];
|
|
out1[2] = DotProduct(in1, in2[2]) + in2[2][3];
|
|
#else
|
|
#error
|
|
#endif
|
|
}
|
|
|
|
void VectorITransform (const float *in1, const matrix3x4_t& in2, float *out)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
float in1t[3];
|
|
|
|
in1t[0] = in1[0] - in2[0][3];
|
|
in1t[1] = in1[1] - in2[1][3];
|
|
in1t[2] = in1[2] - in2[2][3];
|
|
|
|
out[0] = in1t[0] * in2[0][0] + in1t[1] * in2[1][0] + in1t[2] * in2[2][0];
|
|
out[1] = in1t[0] * in2[0][1] + in1t[1] * in2[1][1] + in1t[2] * in2[2][1];
|
|
out[2] = in1t[0] * in2[0][2] + in1t[1] * in2[1][2] + in1t[2] * in2[2][2];
|
|
}
|
|
|
|
|
|
void VectorRotate( const float *in1, const matrix3x4_t& in2, float *out )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Assert( in1 != out );
|
|
out[0] = DotProduct( in1, in2[0] );
|
|
out[1] = DotProduct( in1, in2[1] );
|
|
out[2] = DotProduct( in1, in2[2] );
|
|
}
|
|
|
|
void VectorRotate( const Vector &in1, const QAngle &in2, Vector &out )
|
|
{
|
|
matrix3x4_t matRotate;
|
|
AngleMatrix( in2, matRotate );
|
|
VectorRotate( in1, matRotate, out );
|
|
}
|
|
|
|
void VectorRotateSSE( const float *in1, const matrix3x4_t& in2, float *out1 )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Assert( in1 != out1 );
|
|
|
|
#ifdef _WIN32
|
|
__asm
|
|
{
|
|
mov eax, in1;
|
|
mov ecx, in2;
|
|
mov edx, out1;
|
|
|
|
movss xmm0, [eax];
|
|
mulss xmm0, [ecx];
|
|
movss xmm1, [eax+4];
|
|
mulss xmm1, [ecx+4];
|
|
movss xmm2, [eax+8];
|
|
mulss xmm2, [ecx+8];
|
|
addss xmm0, xmm1;
|
|
addss xmm0, xmm2;
|
|
movss [edx], xmm0;
|
|
add ecx, 16;
|
|
|
|
movss xmm0, [eax];
|
|
mulss xmm0, [ecx];
|
|
movss xmm1, [eax+4];
|
|
mulss xmm1, [ecx+4];
|
|
movss xmm2, [eax+8];
|
|
mulss xmm2, [ecx+8];
|
|
addss xmm0, xmm1;
|
|
addss xmm0, xmm2;
|
|
movss [edx+4], xmm0;
|
|
add ecx, 16;
|
|
|
|
movss xmm0, [eax];
|
|
mulss xmm0, [ecx];
|
|
movss xmm1, [eax+4];
|
|
mulss xmm1, [ecx+4];
|
|
movss xmm2, [eax+8];
|
|
mulss xmm2, [ecx+8];
|
|
addss xmm0, xmm1;
|
|
addss xmm0, xmm2;
|
|
movss [edx+8], xmm0;
|
|
}
|
|
#elif _LINUX
|
|
#warning "VectorRotateSSE C implementation only"
|
|
out1[0] = DotProduct( in1, in2[0] );
|
|
out1[1] = DotProduct( in1, in2[1] );
|
|
out1[2] = DotProduct( in1, in2[2] );
|
|
|
|
#else
|
|
#error
|
|
#endif
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// NOTE: This actually works, but we need to be sure it actually optimizes anything
|
|
// I've actually really only added it to make sure it's in source control
|
|
//-----------------------------------------------------------------------------
|
|
void TransformAndRotate( const Vector &srcPos, const Vector &srcNorm,
|
|
const matrix3x4_t& mat, Vector &pos, Vector &norm )
|
|
{
|
|
#ifdef _WIN32
|
|
const float *pMat = &mat[0][0];
|
|
const float *pNormal = &srcNorm.x;
|
|
float *pNormalOut = &norm.x;
|
|
#endif
|
|
const float *pPos = &srcPos.x;
|
|
float *pPosOut = &pos.x;
|
|
|
|
// Vector pt, nt;
|
|
// pt[0] = DotProduct(vert.m_vecPosition.Base(), mat[0]) + mat[0][3];
|
|
// pt[1] = DotProduct(vert.m_vecPosition.Base(), mat[1]) + mat[1][3];
|
|
// pt[2] = DotProduct(vert.m_vecPosition.Base(), mat[2]) + mat[2][3];
|
|
// nt[0] = DotProduct( vert.m_vecNormal.Base(), mat[0] );
|
|
// nt[1] = DotProduct( vert.m_vecNormal.Base(), mat[1] );
|
|
// nt[2] = DotProduct( vert.m_vecNormal.Base(), mat[2] );
|
|
|
|
#ifdef _WIN32
|
|
__asm
|
|
{
|
|
mov eax, DWORD PTR [pMat]
|
|
mov edx, DWORD PTR [pPos]
|
|
mov ecx, DWORD PTR [pNormal]
|
|
mov esi, DWORD PTR [pNormalOut]
|
|
mov edi, DWORD PTR [pPosOut]
|
|
|
|
fld DWORD PTR[eax + 3*4] ; m03
|
|
fld DWORD PTR[eax + 0] ; m00 | m03
|
|
fld DWORD PTR[edx + 0] ; pos.x | m00 | m03
|
|
fld DWORD PTR[ecx + 0] ; norm.x | pos.x | m00 | m03
|
|
fld DWORD PTR[edx + 4] ; pos.y | norm.x | pos.x | m00 | m03
|
|
fld DWORD PTR[eax + 1*4] ; m01 | pos.y | norm.x | pos.x | m00 | m03
|
|
fld DWORD PTR[ecx + 4] ; norm.y | m01 | pos.y | norm.x | pos.x | m00 | m03
|
|
|
|
fxch st(5) ; m00 | m01 | pos.y | norm.x | pos.x | norm.y | m03
|
|
fmul st(4), st(0) ; m00 | m01 | pos.y | norm.x | pos.x * m00 | norm.y | m03
|
|
fmulp st(3), st(0) ; m01 | pos.y | norm.x * m00 | pos.x * m00 | norm.y | m03
|
|
fmul st(1), st(0) ; m01 | pos.y * m01 | norm.x * m00 | pos.x * m00 | norm.y | m03
|
|
fmulp st(4), st(0) ; pos.y * m01 | norm.x * m00 | pos.x * m00 | norm.y * m01 | m03
|
|
|
|
fld DWORD PTR[eax + 2*4] ; m02 | pos.y * m01 | norm.x * m00 | pos.x * m00 | norm.y * m01 | m03
|
|
fld DWORD PTR[edx + 8] ; pos.z | m02 | pos.y * m01 | norm.x * m00 | pos.x * m00 | norm.y * m01 | m03
|
|
fld DWORD PTR[ecx + 8] ; norm.z | pos.z | m02 | pos.y * m01 | norm.x * m00 | pos.x * m00 | norm.y * m01 | m03
|
|
|
|
fxch st(5) ; pos.x * m00 | pos.z | m02 | pos.y * m01 | norm.x * m00 | norm.z | norm.y * m01 | m03
|
|
faddp st(3), st(0) ; pos.z | m02 | pos.x * m00 + pos.y * m01 | norm.x * m00 | norm.z | norm.y * m01 | m03
|
|
fxch st(3) ; norm.x * m00 | m02 | pos.x * m00 + pos.y * m01 | pos.z | norm.z | norm.y * m01 | m03
|
|
faddp st(5), st(0) ; m02 | pos.x * m00 + pos.y * m01 | pos.z | norm.z | norm.y * m01 + norm.x * m00 | m03
|
|
fmul st(2), st(0) ; m02 | pos.x * m00 + pos.y * m01 | pos.z * m02 | norm.z | norm.y * m01 + norm.x * m00 | m03
|
|
fmulp st(3), st(0) ; pos.x * m00 + pos.y * m01 | pos.z * m02 | norm.z * m02 | norm.y * m01 + norm.x * m00 | m03
|
|
faddp st(4), st(0) ; pos.z * m02 | norm.z * m02 | norm.y * m01 + norm.x * m00 | pos.x * m00 + pos.y * m01 + m03
|
|
|
|
; NOTE: Need two more instructions before we can start using the .z stuff
|
|
|
|
fld DWORD PTR[eax + 1*16 + 3*4] ; m13 | pos.z * m02 | norm.z * m02 | norm.y * m01 + norm.x * m00 | pos.x * m00 + pos.y * m01 + m03
|
|
fld DWORD PTR[eax + 1*16 + 0] ; m10 | m13 | pos.z * m02 | norm.z * m02 | norm.y * m01 + norm.x * m00 | pos.x * m00 + pos.y * m01 + m03
|
|
fld DWORD PTR[edx + 0] ; pos.x | m10 | m13 | pos.z * m02 | norm.z * m02 | norm.y * m01 + norm.x * m00 | pos.x * m00 + pos.y * m01 + m03
|
|
fld DWORD PTR[ecx + 0] ; norm.x | pos.x | m10 | m13 | pos.z * m02 | norm.z * m02 | norm.y * m01 + norm.x * m00 | pos.x * m00 + pos.y * m01 + m03
|
|
|
|
fxch st(7) ; pos.x * m00 + pos.y * m01 + m03 | pos.x | m10 | m13 | pos.z * m02 | norm.z * m02 | norm.y * m01 + norm.x * m00 | norm.x
|
|
faddp st(4), st(0) ; pos.x | m10 | m13 | pos.x * m00 + pos.y * m01 + pos.z * m02 + m03 | norm.z * m02 | norm.y * m01 + norm.x * m00 | norm.x
|
|
fxch st(5) ; norm.y * m01 + norm.x * m00 | m10 | m13 | pos.x * m00 + pos.y * m01 + pos.z * m02 + m03 | norm.z * m02 | pos.x | norm.x
|
|
faddp st(4), st(0) ; m10 | m13 | pos.x * m00 + pos.y * m01 + pos.z * m02 + m03 | norm.x * m00 + norm.y * m01 + norm.z * m02 | pos.x | norm.x
|
|
fmul st(4), st(0) ; m10 | m13 | pos.x * m00 + pos.y * m01 + pos.z * m02 + m03 | norm.x * m00 + norm.y * m01 + norm.z * m02 | pos.x * m10 | norm.x
|
|
fmulp st(5), st(0) ; m13 | pos.x * m00 + pos.y * m01 + pos.z * m02 + m03 | norm.x * m00 + norm.y * m01 + norm.z * m02 | pos.x * m10 | norm.x * m10
|
|
fxch st(2) ; norm.x * m00 + norm.y * m01 + norm.z * m02 | pos.x * m00 + pos.y * m01 + pos.z * m02 + m03 | m13 | pos.x * m10 | norm.x * m10
|
|
|
|
fstp DWORD PTR[esi] ; pos.x * m00 + pos.y * m01 + pos.z * m02 + m03 | m13 | pos.x * m10 | norm.x * m10
|
|
fstp DWORD PTR[edi] ; m13 | pos.x * m10 | norm.x * m10
|
|
|
|
fld DWORD PTR[eax + 1*16 + 1*4] ; m11 | m13 | pos.x * m10 | norm.x * m10
|
|
fld DWORD PTR[edx + 4] ; pos.y | m11 | m13 | pos.x * m10 | norm.x * m10
|
|
fld DWORD PTR[ecx + 4] ; norm.y| pos.y | m11 | m13 | pos.x * m10 | norm.x * m10
|
|
fld DWORD PTR[eax + 1*16 + 2*4] ; m12 | norm.y| pos.y | m11 | m13 | pos.x * m10 | norm.x * m10
|
|
fld DWORD PTR[edx + 8] ; pos.z | m12 | norm.y | pos.y | m11 | m13 | pos.x * m10 | norm.x * m10
|
|
|
|
fxch st(4) ; m11 | m12 | norm.y | pos.y | pos.z | m13 | pos.x * m10 | norm.x * m10
|
|
fmul st(3), st(0) ; m11 | m12 | norm.y | pos.y * m11 | pos.z | m13 | pos.x * m10 | norm.x * m10
|
|
fmulp st(2), st(0) ; m12 | norm.y * m11 | pos.y * m11 | pos.z | m13 | pos.x * m10 | norm.x * m10
|
|
|
|
fld DWORD PTR[ecx + 8] ; norm.z | m12 | norm.y * m11 | pos.y * m11 | pos.z | m13 | pos.x * m10 | norm.x * m10
|
|
fxch st(1) ; m12 | norm.z | norm.y * m11 | pos.y * m11 | pos.z | m13 | pos.x * m10 | norm.x * m10
|
|
fmul st(4), st(0) ; m12 | norm.z | norm.y * m11 | pos.y * m11 | pos.z * m12 | m13 | pos.x * m10 | norm.x * m10
|
|
fmulp st(1), st(0) ; norm.z * m12 | norm.y * m11 | pos.y * m11 | pos.z * m12 | m13 | pos.x * m10 | norm.x * m10
|
|
|
|
fld DWORD PTR[eax + 2*16 + 3*4] ; m23 | norm.z * m12 | norm.y * m11 | pos.y * m11 | pos.z * m12 | m13 | pos.x * m10 | norm.x * m10
|
|
|
|
fxch st(6) ; pos.x * m10 | norm.z * m12 | norm.y * m11 | pos.y * m11 | pos.z * m12 | m13 | m23 | norm.x * m10
|
|
faddp st(3), st(0) ; norm.z * m12 | norm.y * m11 | pos.x * m10 + pos.y * m11 | pos.z * m12 | m13 | m23 | norm.x * m10
|
|
fxch st(4) ; m13 | norm.y * m11 | pos.x * m10 + pos.y * m11 | pos.z * m12 | norm.z * m12 | m23 | norm.x * m10
|
|
faddp st(3), st(0) ; norm.y * m11 | pos.x * m10 + pos.y * m11 | pos.z * m12 + m13 | norm.z * m12 | m23 | norm.x * m10
|
|
faddp st(5), st(0) ; pos.x * m10 + pos.y * m11 | pos.z * m12 + m13 | norm.z * m12 | m23 | norm.x * m10 + norm.y * m11
|
|
|
|
fld DWORD PTR[eax + 2*16 + 0] ; m20 | pos.x * m10 + pos.y * m11 | pos.z * m12 + m13 | norm.z * m12 | m23 | norm.x * m10 + norm.y * m11
|
|
fld DWORD PTR[edx + 0] ; pos.x | m20 | pos.x * m10 + pos.y * m11 | pos.z * m12 + m13 | norm.z * m12 | m23 | norm.x * m10 + norm.y * m11
|
|
fld DWORD PTR[ecx + 0] ; norm.x | pos.x | m20 | pos.x * m10 + pos.y * m11 | pos.z * m12 + m13 | norm.z * m12 | m23 | norm.x * m10 + norm.y * m11
|
|
|
|
fxch st(3) ; pos.x * m10 + pos.y * m11 | pos.x | m20 | norm.x | pos.z * m12 + m13 | norm.z * m12 | m23 | norm.x * m10 + norm.y * m11
|
|
faddp st(4), st(0) ; pos.x | m20 | norm.x | pos.x * m10 + pos.y * m11 + pos.z * m12 + m13 | norm.z * m12 | m23 | norm.x * m10 + norm.y * m11
|
|
fxch st(4) ; norm.z * m12 | m20 | norm.x | pos.x * m10 + pos.y * m11 + pos.z * m12 + m13 | pos.x | m23 | norm.x * m10 + norm.y * m11
|
|
faddp st(6), st(0) ; m20 | norm.x | pos.x * m10 + pos.y * m11 + pos.z * m12 + m13 | pos.x | m23 | norm.x * m10 + norm.y * m11 + norm.z * m12
|
|
fmul st(3), st(0) ; m20 | norm.x | pos.x * m10 + pos.y * m11 + pos.z * m12 + m13 | pos.x * m20 | m23 | norm.x * m10 + norm.y * m11 + norm.z * m12
|
|
fmulp st(1), st(0) ; norm.x * m20 | pos.x * m10 + pos.y * m11 + pos.z * m12 + m13 | pos.x * m20 | m23 | norm.x * m10 + norm.y * m11 + norm.z * m12
|
|
|
|
fld DWORD PTR[eax + 2*16 + 1*4] ; m21 | norm.x * m20 | pos.x * m10 + pos.y * m11 + pos.z * m12 + m13 | pos.x * m20 | m23 | norm.x * m10 + norm.y * m11 + norm.z * m12
|
|
fld DWORD PTR[edx + 4] ; pos.y | m21 | norm.x * m20 | pos.x * m10 + pos.y * m11 + pos.z * m12 + m13 | pos.x * m20 | m23 | norm.x * m10 + norm.y * m11 + norm.z * m12
|
|
fld DWORD PTR[ecx + 4] ; norm.y | pos.y | m21 | norm.x * m20 | pos.x * m10 + pos.y * m11 + pos.z * m12 + m13 | pos.x * m20 | m23 | norm.x * m10 + norm.y * m11 + norm.z * m12
|
|
|
|
fxch st(4) ; pos.x * m10 + pos.y * m11 + pos.z * m12 + m13 | pos.y | m21 | norm.x * m20 | norm.y | pos.x * m20 | m23 | norm.x * m10 + norm.y * m11 + norm.z * m12
|
|
fstp DWORD PTR[edi + 4] ; pos.y | m21 | norm.x * m20 | norm.y | pos.x * m20 | m23 | norm.x * m10 + norm.y * m11 + norm.z * m12
|
|
fxch st(6) ; norm.x * m10 + norm.y * m11 + norm.z * m12 | m21 | norm.x * m20 | norm.y | pos.x * m20 | m23 | pos.y
|
|
fstp DWORD PTR[esi + 4] ; m21 | norm.x * m20 | norm.y | pos.x * m20 | m23 | pos.y
|
|
|
|
fmul st(5), st(0) ; m21 | norm.x * m20 | norm.y | pos.x * m20 | m23 | pos.y * m21
|
|
fmulp st(2), st(0) ; norm.x * m20 | norm.y * m21 | pos.x * m20 | m23 | pos.y * m21
|
|
|
|
fld DWORD PTR[eax + 2*16 + 2*4] ; m22 | norm.x * m20 | norm.y * m21 | pos.x * m20 | m23 | pos.y * m21
|
|
fld DWORD PTR[edx + 8] ; pos.z | m22 | norm.x * m20 | norm.y * m21 | pos.x * m20 | m23 | pos.y * m21
|
|
fld DWORD PTR[ecx + 8] ; norm.z | pos.z | m22 | norm.x * m20 | norm.y * m21 | pos.x * m20 | m23 | pos.y * m21
|
|
|
|
fxch st(5) ; pos.x * m20 | pos.z | m22 | norm.x * m20 | norm.y * m21 | norm.z | m23 | pos.y * m21
|
|
faddp st(6), st(0) ; pos.z | m22 | norm.x * m20 | norm.y * m21 | norm.z | pos.x * m20 + m23 | pos.y * m21
|
|
fxch st(2) ; norm.x * m20 | m22 | pos.z | norm.y * m21 | norm.z | pos.x * m20 + m23 | pos.y * m21
|
|
faddp st(3), st(0) ; m22 | pos.z | norm.x * m20 + norm.y * m21 | norm.z | pos.x * m20 + m23 | pos.y * m21
|
|
fmul st(1), st(0) ; m22 | pos.z * m22 | norm.x * m20 + norm.y * m21 | norm.z | pos.x * m20 + m23 | pos.y * m21
|
|
fmulp st(3), st(0) ; pos.z * m22 | norm.x * m20 + norm.y * m21 | norm.z * m22 | pos.x * m20 + m23 | pos.y * m21
|
|
fxch st(4) ; pos.y * m21 | norm.x * m20 + norm.y * m21 | norm.z * m22 | pos.x * m20 + m23 | pos.z * m22
|
|
faddp st(3), st(0) ; norm.x * m20 + norm.y * m21 | norm.z * m22 | pos.x * m20 + pos.y * m21 + m23 | pos.z * m22
|
|
|
|
; STALLS AHOY!!!
|
|
|
|
faddp st(1), st(0) ; norm.x * m20 + norm.y * m21 + norm.z * m22 | pos.x * m20 + pos.y * m21 + m23 | pos.z * m22
|
|
fxch st(2) ; pos.z * m22 | pos.x * m20 + pos.y * m21 + m23 | norm.x * m20 + norm.y * m21 + norm.z * m22
|
|
faddp st(1), st(0) ; pos.x * m20 + pos.y * m21 + pos.z * m22 + m23 | norm.x * m20 + norm.y * m21 + norm.z * m22
|
|
fxch st(1) ; norm.x * m20 + norm.y * m21 + norm.z * m22 | pos.x * m20 + pos.y * m21 + pos.z * m22 + m23
|
|
fstp DWORD PTR[esi + 8] ; pos.x * m20 + pos.y * m21 + pos.z * m22 + m23
|
|
fstp DWORD PTR[edi + 8]
|
|
}
|
|
#elif _LINUX
|
|
#warning "TransformAndRotate C implementation only"
|
|
VectorTransform( pPos, mat, pPosOut);
|
|
VectorRotate( pPosOut, mat, pPosOut);
|
|
norm=pos;
|
|
VectorNormalize(norm);
|
|
#else
|
|
#error
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
// rotate by the inverse of the matrix
|
|
void VectorIRotate( const float *in1, const matrix3x4_t& in2, float *out )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Assert( in1 != out );
|
|
out[0] = in1[0]*in2[0][0] + in1[1]*in2[1][0] + in1[2]*in2[2][0];
|
|
out[1] = in1[0]*in2[0][1] + in1[1]*in2[1][1] + in1[2]*in2[2][1];
|
|
out[2] = in1[0]*in2[0][2] + in1[1]*in2[1][2] + in1[2]*in2[2][2];
|
|
}
|
|
|
|
#ifndef VECTOR_NO_SLOW_OPERATIONS
|
|
|
|
QAngle TransformAnglesToLocalSpace( const QAngle &angles, const matrix3x4_t &parentMatrix )
|
|
{
|
|
matrix3x4_t angToWorld, worldToParent, localMatrix;
|
|
MatrixInvert( parentMatrix, worldToParent );
|
|
AngleMatrix( angles, angToWorld );
|
|
ConcatTransforms( worldToParent, angToWorld, localMatrix );
|
|
|
|
QAngle out;
|
|
MatrixAngles( localMatrix, out );
|
|
return out;
|
|
}
|
|
|
|
QAngle TransformAnglesToWorldSpace( const QAngle &angles, const matrix3x4_t &parentMatrix )
|
|
{
|
|
matrix3x4_t angToParent, angToWorld;
|
|
AngleMatrix( angles, angToParent );
|
|
ConcatTransforms( parentMatrix, angToParent, angToWorld );
|
|
QAngle out;
|
|
MatrixAngles( angToWorld, out );
|
|
return out;
|
|
}
|
|
|
|
#endif
|
|
|
|
void MatrixInitialize( matrix3x4_t &mat, const Vector &vecOrigin, const Vector &vecXAxis, const Vector &vecYAxis, const Vector &vecZAxis )
|
|
{
|
|
MatrixSetColumn( vecXAxis, 0, mat );
|
|
MatrixSetColumn( vecYAxis, 1, mat );
|
|
MatrixSetColumn( vecZAxis, 2, mat );
|
|
MatrixSetColumn( vecOrigin, 3, mat );
|
|
}
|
|
|
|
void MatrixCopy( const matrix3x4_t& in, matrix3x4_t& out )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
memcpy( out.Base(), in.Base(), sizeof( float ) * 3 * 4 );
|
|
}
|
|
|
|
void MatrixInvert( const matrix3x4_t& in, matrix3x4_t& out )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
if ( &in == &out )
|
|
{
|
|
V_swap(out[0][1],out[1][0]);
|
|
V_swap(out[0][2],out[2][0]);
|
|
V_swap(out[1][2],out[2][1]);
|
|
}
|
|
else
|
|
{
|
|
// I'm guessing this only works on a 3x4 orthonormal matrix
|
|
out[0][0] = in[0][0];
|
|
out[0][1] = in[1][0];
|
|
out[0][2] = in[2][0];
|
|
|
|
out[1][0] = in[0][1];
|
|
out[1][1] = in[1][1];
|
|
out[1][2] = in[2][1];
|
|
|
|
out[2][0] = in[0][2];
|
|
out[2][1] = in[1][2];
|
|
out[2][2] = in[2][2];
|
|
}
|
|
|
|
float tmp[3];
|
|
tmp[0] = in[0][3];
|
|
tmp[1] = in[1][3];
|
|
tmp[2] = in[2][3];
|
|
|
|
out[0][3] = -DotProduct( tmp, out[0] );
|
|
out[1][3] = -DotProduct( tmp, out[1] );
|
|
out[2][3] = -DotProduct( tmp, out[2] );
|
|
}
|
|
|
|
void MatrixGetColumn( const matrix3x4_t& in, int column, Vector &out )
|
|
{
|
|
out.x = in[0][column];
|
|
out.y = in[1][column];
|
|
out.z = in[2][column];
|
|
}
|
|
|
|
void MatrixSetColumn( const Vector &in, int column, matrix3x4_t& out )
|
|
{
|
|
out[0][column] = in.x;
|
|
out[1][column] = in.y;
|
|
out[2][column] = in.z;
|
|
}
|
|
|
|
|
|
int VectorCompare (const float *v1, const float *v2)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
int i;
|
|
|
|
for (i=0 ; i<3 ; i++)
|
|
if (v1[i] != v2[i])
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
#if _WIN32
|
|
_declspec(naked)
|
|
#ifndef PFN_VECTORMA
|
|
void VectorMA( const float *start, float scale, const float *direction, float *dest )
|
|
#else
|
|
void _VectorMA( const float *start, float scale, const float *direction, float *dest )
|
|
#endif
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
_asm {
|
|
mov eax, DWORD PTR [esp+0x04] ; *start, s0..s2
|
|
mov ecx, DWORD PTR [esp+0x0c] ; *direction, d0..d2
|
|
mov edx, DWORD PTR [esp+0x10] ; *dest
|
|
fld DWORD PTR [esp+0x08] ; sc
|
|
fld DWORD PTR [eax] ; s0 | sc
|
|
fld DWORD PTR [ecx] ; d0 | s0 | sc
|
|
fld DWORD PTR [eax+4] ; s1 | d0 | s0 | sc
|
|
fld DWORD PTR [ecx+4] ; d1 | s1 | d0 | s0 | sc
|
|
fld DWORD PTR [eax+8] ; s2 | d1 | s1 | d0 | s0 | sc
|
|
fxch st(4) ; s0 | d1 | s1 | d0 | s2 | sc
|
|
fld DWORD PTR [ecx+8] ; d2 | s0 | d1 | s1 | d0 | s2 | sc
|
|
fxch st(4) ; d0 | s0 | d1 | s1 | d2 | s2 | sc
|
|
fmul st,st(6) ; d0*sc | s0 | d1 | s1 | d2 | s2 | sc
|
|
fxch st(2) ; d1 | s0 | d0*sc | s1 | d2 | s2 | sc
|
|
fmul st,st(6) ; d1*sc | s0 | d0*sc | s1 | d2 | s2 | sc
|
|
fxch st(4) ; d2 | s0 | d0*sc | s1 | d1*sc | s2 | sc
|
|
fmulp st(6),st ; s0 | d0*sc | s1 | d1*sc | s2 | d2*sc
|
|
faddp st(1),st ; s0+d0*sc | s1 | d1*sc | s2 | d2*sc
|
|
fstp DWORD PTR [edx] ; s1 | d1*sc | s2 | d2*sc
|
|
faddp st(1),st ; s1+d1*sc | s2 | d2*sc
|
|
fstp DWORD PTR [edx+4] ; s2 | d2*sc
|
|
faddp st(1),st ; s2+d2*sc
|
|
fstp DWORD PTR [edx+8] ; [Empty stack]
|
|
ret
|
|
}
|
|
}
|
|
#else
|
|
#ifndef PFN_VECTORMA
|
|
void VectorMA( const float *start, float scale, const float *direction, float *dest )
|
|
#else
|
|
void _VectorMA( const float *start, float scale, const float *direction, float *dest )
|
|
#endif
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
dest[0] = start[0] + scale*direction[0];
|
|
dest[1] = start[1] + scale*direction[1];
|
|
dest[2] = start[2] + scale*direction[2];
|
|
}
|
|
#endif
|
|
|
|
#ifdef _WIN32
|
|
void
|
|
_declspec(naked)
|
|
_SSE_VectorMA( const float *start, float scale, const float *direction, float *dest )
|
|
{
|
|
// FIXME: This don't work!! It will overwrite memory in the write to dest
|
|
Assert(0);
|
|
|
|
Assert( s_bMathlibInitialized );
|
|
_asm { // Intel SSE only routine
|
|
mov eax, DWORD PTR [esp+0x04] ; *start, s0..s2
|
|
mov ecx, DWORD PTR [esp+0x0c] ; *direction, d0..d2
|
|
mov edx, DWORD PTR [esp+0x10] ; *dest
|
|
movss xmm2, [esp+0x08] ; x2 = scale, 0, 0, 0
|
|
#ifdef ALIGNED_VECTOR
|
|
movaps xmm3, [ecx] ; x3 = dir0,dir1,dir2,X
|
|
pshufd xmm2, xmm2, 0 ; x2 = scale, scale, scale, scale
|
|
movaps xmm1, [eax] ; x1 = start1, start2, start3, X
|
|
mulps xmm3, xmm2 ; x3 *= x2
|
|
addps xmm3, xmm1 ; x3 += x1
|
|
movaps [edx], xmm3 ; *dest = x3
|
|
#else
|
|
movups xmm3, [ecx] ; x3 = dir0,dir1,dir2,X
|
|
pshufd xmm2, xmm2, 0 ; x2 = scale, scale, scale, scale
|
|
movups xmm1, [eax] ; x1 = start1, start2, start3, X
|
|
mulps xmm3, xmm2 ; x3 *= x2
|
|
addps xmm3, xmm1 ; x3 += x1
|
|
movups [edx], xmm3 ; *dest = x3
|
|
#endif
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#ifdef OLD_DOTPRODUCT
|
|
|
|
_declspec(naked)
|
|
vec_t __cdecl DotProduct (const vec_t *a, const vec_t *c)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
_asm {
|
|
mov eax, DWORD PTR [esp+4] ; *a
|
|
mov ecx, DWORD PTR [esp+8] ; *c
|
|
fld DWORD PTR [eax] ; a0
|
|
fmul DWORD PTR [ecx] ; a0*c0
|
|
fld DWORD PTR [eax+4] ; a1 | a0*c0
|
|
fmul DWORD PTR [ecx+4] ; a1*c1 | a0*c0
|
|
fld DWORD PTR [eax+8] ; a2 | a1*c1 | a0*c0
|
|
fmul DWORD PTR [ecx+8] ; a2*c2 | a1*c1 | a0*c0
|
|
fxch st(2) ; a0*c0 | a1*c1 | a2*c2
|
|
faddp st(1), st ; a1*c1+a0*c0 | a2*c2
|
|
faddp st(1), st ; a2*c2+a0*c0+a1*c1
|
|
ret
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
// SSE DotProduct -- it's a smidgen faster than the asm DotProduct...
|
|
// Should be validated too! :)
|
|
// NJS: (Nov 1 2002) -NOT- faster. may time a couple cycles faster in a single function like
|
|
// this, but when inlined, and instruction scheduled, the C version is faster.
|
|
// Verified this via VTune
|
|
|
|
/*
|
|
vec_t DotProduct (const vec_t *a, const vec_t *c)
|
|
{
|
|
vec_t temp;
|
|
|
|
__asm
|
|
{
|
|
mov eax, a;
|
|
mov ecx, c;
|
|
mov edx, DWORD PTR [temp]
|
|
movss xmm0, [eax];
|
|
mulss xmm0, [ecx];
|
|
movss xmm1, [eax+4];
|
|
mulss xmm1, [ecx+4];
|
|
movss xmm2, [eax+8];
|
|
mulss xmm2, [ecx+8];
|
|
addss xmm0, xmm1;
|
|
addss xmm0, xmm2;
|
|
movss [edx], xmm0;
|
|
fld DWORD PTR [edx];
|
|
ret
|
|
}
|
|
}
|
|
*/
|
|
|
|
void CrossProduct (const float* v1, const float* v2, float* cross)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Assert( v1 != cross );
|
|
Assert( v2 != cross );
|
|
cross[0] = v1[1]*v2[2] - v1[2]*v2[1];
|
|
cross[1] = v1[2]*v2[0] - v1[0]*v2[2];
|
|
cross[2] = v1[0]*v2[1] - v1[1]*v2[0];
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int Q_log2(int val)
|
|
{
|
|
int answer=0;
|
|
while (val>>=1)
|
|
answer++;
|
|
return answer;
|
|
}
|
|
|
|
void VectorMatrix( const Vector &forward, matrix3x4_t& matrix)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Vector right, left, up, tmp;
|
|
|
|
if (forward[0] == 0 && forward[1] == 0)
|
|
{
|
|
// pitch 90 degrees up/down from identity
|
|
left[0] = 0;
|
|
left[1] = 1;
|
|
left[2] = 0;
|
|
up[0] = -forward[2]; // NOTE: This should be 1.0f or -1.0f, Assert?
|
|
up[1] = 0;
|
|
up[2] = 0;
|
|
}
|
|
else
|
|
{
|
|
tmp[0] = 0; tmp[1] = 0; tmp[2] = 1.0;
|
|
CrossProduct( tmp, forward, left );
|
|
VectorNormalize( left );
|
|
CrossProduct( forward, left, up );
|
|
VectorNormalize( up );
|
|
}
|
|
MatrixSetColumn( forward, 0, matrix );
|
|
MatrixSetColumn( left, 1, matrix );
|
|
MatrixSetColumn( up, 2, matrix );
|
|
}
|
|
|
|
|
|
void VectorVectors( const Vector &forward, Vector &right, Vector &up )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Vector tmp;
|
|
|
|
if (forward[0] == 0 && forward[1] == 0)
|
|
{
|
|
// pitch 90 degrees up/down from identity
|
|
right[0] = 0;
|
|
right[1] = -1;
|
|
right[2] = 0;
|
|
up[0] = -forward[2];
|
|
up[1] = 0;
|
|
up[2] = 0;
|
|
}
|
|
else
|
|
{
|
|
tmp[0] = 0; tmp[1] = 0; tmp[2] = 1.0;
|
|
CrossProduct( forward, tmp, right );
|
|
VectorNormalize( right );
|
|
CrossProduct( right, forward, up );
|
|
VectorNormalize( up );
|
|
}
|
|
}
|
|
|
|
void VectorAngles( const float *forward, float *angles )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
float tmp, yaw, pitch;
|
|
|
|
if (forward[1] == 0 && forward[0] == 0)
|
|
{
|
|
yaw = 0;
|
|
if (forward[2] > 0)
|
|
pitch = 270;
|
|
else
|
|
pitch = 90;
|
|
}
|
|
else
|
|
{
|
|
yaw = (atan2(forward[1], forward[0]) * 180 / M_PI);
|
|
if (yaw < 0)
|
|
yaw += 360;
|
|
|
|
tmp = sqrt (forward[0]*forward[0] + forward[1]*forward[1]);
|
|
pitch = (atan2(-forward[2], tmp) * 180 / M_PI);
|
|
if (pitch < 0)
|
|
pitch += 360;
|
|
}
|
|
|
|
angles[0] = pitch;
|
|
angles[1] = yaw;
|
|
angles[2] = 0;
|
|
}
|
|
|
|
|
|
/*
|
|
================
|
|
R_ConcatRotations
|
|
================
|
|
*/
|
|
void ConcatRotations (const float in1[3][3], const float in2[3][3], float out[3][3])
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Assert( in1 != out );
|
|
Assert( in2 != out );
|
|
out[0][0] = in1[0][0] * in2[0][0] + in1[0][1] * in2[1][0] +
|
|
in1[0][2] * in2[2][0];
|
|
out[0][1] = in1[0][0] * in2[0][1] + in1[0][1] * in2[1][1] +
|
|
in1[0][2] * in2[2][1];
|
|
out[0][2] = in1[0][0] * in2[0][2] + in1[0][1] * in2[1][2] +
|
|
in1[0][2] * in2[2][2];
|
|
out[1][0] = in1[1][0] * in2[0][0] + in1[1][1] * in2[1][0] +
|
|
in1[1][2] * in2[2][0];
|
|
out[1][1] = in1[1][0] * in2[0][1] + in1[1][1] * in2[1][1] +
|
|
in1[1][2] * in2[2][1];
|
|
out[1][2] = in1[1][0] * in2[0][2] + in1[1][1] * in2[1][2] +
|
|
in1[1][2] * in2[2][2];
|
|
out[2][0] = in1[2][0] * in2[0][0] + in1[2][1] * in2[1][0] +
|
|
in1[2][2] * in2[2][0];
|
|
out[2][1] = in1[2][0] * in2[0][1] + in1[2][1] * in2[1][1] +
|
|
in1[2][2] * in2[2][1];
|
|
out[2][2] = in1[2][0] * in2[0][2] + in1[2][1] * in2[1][2] +
|
|
in1[2][2] * in2[2][2];
|
|
}
|
|
|
|
|
|
/*
|
|
================
|
|
R_ConcatTransforms
|
|
================
|
|
*/
|
|
void ConcatTransforms (const matrix3x4_t& in1, const matrix3x4_t& in2, matrix3x4_t& out)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
if ( &in1 == &out )
|
|
{
|
|
matrix3x4_t in1b;
|
|
MatrixCopy( in1, in1b );
|
|
ConcatTransforms( in1b, in2, out );
|
|
return;
|
|
}
|
|
if ( &in2 == &out )
|
|
{
|
|
matrix3x4_t in2b;
|
|
MatrixCopy( in2, in2b );
|
|
ConcatTransforms( in1, in2b, out );
|
|
return;
|
|
}
|
|
out[0][0] = in1[0][0] * in2[0][0] + in1[0][1] * in2[1][0] +
|
|
in1[0][2] * in2[2][0];
|
|
out[0][1] = in1[0][0] * in2[0][1] + in1[0][1] * in2[1][1] +
|
|
in1[0][2] * in2[2][1];
|
|
out[0][2] = in1[0][0] * in2[0][2] + in1[0][1] * in2[1][2] +
|
|
in1[0][2] * in2[2][2];
|
|
out[0][3] = in1[0][0] * in2[0][3] + in1[0][1] * in2[1][3] +
|
|
in1[0][2] * in2[2][3] + in1[0][3];
|
|
out[1][0] = in1[1][0] * in2[0][0] + in1[1][1] * in2[1][0] +
|
|
in1[1][2] * in2[2][0];
|
|
out[1][1] = in1[1][0] * in2[0][1] + in1[1][1] * in2[1][1] +
|
|
in1[1][2] * in2[2][1];
|
|
out[1][2] = in1[1][0] * in2[0][2] + in1[1][1] * in2[1][2] +
|
|
in1[1][2] * in2[2][2];
|
|
out[1][3] = in1[1][0] * in2[0][3] + in1[1][1] * in2[1][3] +
|
|
in1[1][2] * in2[2][3] + in1[1][3];
|
|
out[2][0] = in1[2][0] * in2[0][0] + in1[2][1] * in2[1][0] +
|
|
in1[2][2] * in2[2][0];
|
|
out[2][1] = in1[2][0] * in2[0][1] + in1[2][1] * in2[1][1] +
|
|
in1[2][2] * in2[2][1];
|
|
out[2][2] = in1[2][0] * in2[0][2] + in1[2][1] * in2[1][2] +
|
|
in1[2][2] * in2[2][2];
|
|
out[2][3] = in1[2][0] * in2[0][3] + in1[2][1] * in2[1][3] +
|
|
in1[2][2] * in2[2][3] + in1[2][3];
|
|
}
|
|
|
|
|
|
/*
|
|
===================
|
|
FloorDivMod
|
|
|
|
Returns mathematically correct (floor-based) quotient and remainder for
|
|
numer and denom, both of which should contain no fractional part. The
|
|
quotient must fit in 32 bits.
|
|
====================
|
|
*/
|
|
|
|
void FloorDivMod (double numer, double denom, int *quotient,
|
|
int *rem)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
int q, r;
|
|
double x;
|
|
|
|
#ifdef PARANOID
|
|
if (denom <= 0.0)
|
|
Sys_Error ("FloorDivMod: bad denominator %d\n", denom);
|
|
|
|
// if ((floor(numer) != numer) || (floor(denom) != denom))
|
|
// Sys_Error ("FloorDivMod: non-integer numer or denom %f %f\n",
|
|
// numer, denom);
|
|
#endif
|
|
|
|
if (numer >= 0.0)
|
|
{
|
|
|
|
x = floor(numer / denom);
|
|
q = (int)x;
|
|
r = Floor2Int(numer - (x * denom));
|
|
}
|
|
else
|
|
{
|
|
//
|
|
// perform operations with positive values, and fix mod to make floor-based
|
|
//
|
|
x = floor(-numer / denom);
|
|
q = -(int)x;
|
|
r = Floor2Int(-numer - (x * denom));
|
|
if (r != 0)
|
|
{
|
|
q--;
|
|
r = (int)denom - r;
|
|
}
|
|
}
|
|
|
|
*quotient = q;
|
|
*rem = r;
|
|
}
|
|
|
|
|
|
/*
|
|
===================
|
|
GreatestCommonDivisor
|
|
====================
|
|
*/
|
|
int GreatestCommonDivisor (int i1, int i2)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
if (i1 > i2)
|
|
{
|
|
if (i2 == 0)
|
|
return (i1);
|
|
return GreatestCommonDivisor (i2, i1 % i2);
|
|
}
|
|
else
|
|
{
|
|
if (i1 == 0)
|
|
return (i2);
|
|
return GreatestCommonDivisor (i1, i2 % i1);
|
|
}
|
|
}
|
|
|
|
|
|
bool IsDenormal( const float &val )
|
|
{
|
|
const int x = *reinterpret_cast <const int *> (&val); // needs 32-bit int
|
|
const int abs_mantissa = x & 0x007FFFFF;
|
|
const int biased_exponent = x & 0x7F800000;
|
|
|
|
return ( biased_exponent == 0 && abs_mantissa != 0 );
|
|
}
|
|
|
|
|
|
// ************************************** SPECIAL NOTE *******************************************************
|
|
// IFF you find this code to be PROBLEMATIC, PLEASE "#if 0" this and send email to "tkback@valvesoftware.com
|
|
// Note what machine this happened on and BUILD a debug version of the binary and see if it catches the problem.
|
|
// ***********************************************************************************************************
|
|
|
|
#if 0
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
// This code is an TEST OPTIMIZATION for float -> long. DISABLE this (use MSVCRT ver) if it causes problems.
|
|
//
|
|
|
|
// WARNING!!! This is an experimental replacement for the compiler intrinsic "_ftol()'
|
|
// This code is called whenever there is a float to int conversion in c code.
|
|
// Try this 80x87 specific FPU conversion trick! (This normally does a ROUND during FISTP by default)
|
|
// If positive, we subtract 0.5 before doing a round() which equals floor() for positive values
|
|
// If negative, we add 0.5 before doing a round() which equals ceil() for negative values.
|
|
// Together, this simulates "chop to zero" even when the FPU is in normal rounding mode.
|
|
|
|
// The following two values should be "cache line aligned" ie. align 16.
|
|
const unsigned long FPU_HALF = 0x3EFFFFFFL;
|
|
const unsigned long FPU_MAGIC = 0x59C00000L;
|
|
|
|
long int
|
|
__declspec( naked )
|
|
__cdecl _ftol()
|
|
{
|
|
#ifdef _DEBUG
|
|
|
|
// Turn off "Debug Multitreaded C Runtime Lib use in release mode" to avoid this.(why debug?)
|
|
//#ifdef NDEBUG
|
|
//#error Both _DEBUG and NDEBUG set!
|
|
//#endif
|
|
|
|
Assert( s_bMathlibInitialized );
|
|
// Make sure that we are in 64-bit precision(53-bit mantissa) FPU mode. _ftol() needs 64bit.
|
|
_asm {
|
|
fnstcw word ptr [esp-4]
|
|
and word ptr [esp-4], 0x0300
|
|
cmp word ptr [esp-4], 0x0200 ; Precision bits. Must be 0x0200
|
|
je short good_precision
|
|
int 3 ; WRONG Precision set on FPU!!! s/b >32bit Internally.
|
|
good_precision:
|
|
}
|
|
#endif
|
|
|
|
// Start the float to long conversion
|
|
_asm { ; farg
|
|
ftst
|
|
fnstsw ax
|
|
sahf
|
|
jbe short negative
|
|
|
|
fsub DWORD PTR [FPU_HALF] ; Subtract 0.5 from positive float, this changes round() to floor()!
|
|
fadd DWORD PTR [FPU_MAGIC] ; Adjust the mantissa to have the "int" in the low bits.
|
|
fstp QWORD PTR [esp-0Ch] ; Write out the floating point value in 64 bit double format
|
|
mov eax, DWORD PTR [esp-0Ch] ; Grab the low 32 bits of the mantissa which have our "int".
|
|
ret
|
|
|
|
negative:
|
|
fadd DWORD PTR [FPU_HALF] ; Add 0.5 to Negative float, this changes round() to floor()!
|
|
fadd DWORD PTR [FPU_MAGIC] ; Adjust the mantissa to have the "int" in the low bits.
|
|
fstp QWORD PTR [esp-0Ch] ; Write out the floating point value in 64 bit double format
|
|
mov eax, DWORD PTR [esp-0Ch] ; Grab the low 32 bits of the mantissa which have our "int".
|
|
ret
|
|
|
|
}
|
|
}
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif
|
|
|
|
int SignbitsForPlane (cplane_t *out)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
int bits, j;
|
|
|
|
// for fast box on planeside test
|
|
|
|
bits = 0;
|
|
for (j=0 ; j<3 ; j++)
|
|
{
|
|
if (out->normal[j] < 0)
|
|
bits |= 1<<j;
|
|
}
|
|
return bits;
|
|
}
|
|
|
|
|
|
/*
|
|
==================
|
|
BoxOnPlaneSide
|
|
|
|
Returns 1, 2, or 1 + 2
|
|
==================
|
|
*/
|
|
#if 1 || !id386 || defined _LINUX
|
|
int __cdecl BoxOnPlaneSide (const float *emins, const float *emaxs, const cplane_t *p)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
float dist1, dist2;
|
|
int sides;
|
|
|
|
// fast axial cases
|
|
if (p->type < 3)
|
|
{
|
|
if (p->dist <= emins[p->type])
|
|
return 1;
|
|
if (p->dist >= emaxs[p->type])
|
|
return 2;
|
|
return 3;
|
|
}
|
|
|
|
// general case
|
|
switch (p->signbits)
|
|
{
|
|
case 0:
|
|
dist1 = p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
|
|
dist2 = p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
|
|
break;
|
|
case 1:
|
|
dist1 = p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
|
|
dist2 = p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
|
|
break;
|
|
case 2:
|
|
dist1 = p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
|
|
dist2 = p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
|
|
break;
|
|
case 3:
|
|
dist1 = p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
|
|
dist2 = p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
|
|
break;
|
|
case 4:
|
|
dist1 = p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
|
|
dist2 = p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
|
|
break;
|
|
case 5:
|
|
dist1 = p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
|
|
dist2 = p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
|
|
break;
|
|
case 6:
|
|
dist1 = p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
|
|
dist2 = p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
|
|
break;
|
|
case 7:
|
|
dist1 = p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
|
|
dist2 = p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
|
|
break;
|
|
default:
|
|
dist1 = dist2 = 0; // shut up compiler
|
|
Assert( 0 );
|
|
break;
|
|
}
|
|
|
|
sides = 0;
|
|
if (dist1 >= p->dist)
|
|
sides = 1;
|
|
if (dist2 < p->dist)
|
|
sides |= 2;
|
|
|
|
Assert( sides != 0 );
|
|
|
|
return sides;
|
|
}
|
|
#else
|
|
#pragma warning( disable: 4035 )
|
|
|
|
__declspec( naked ) int __cdecl BoxOnPlaneSide (const float *emins, const float *emaxs, const cplane_t *p)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
static int bops_initialized;
|
|
static int Ljmptab[8];
|
|
|
|
__asm {
|
|
|
|
push ebx
|
|
|
|
cmp bops_initialized, 1
|
|
je initialized
|
|
mov bops_initialized, 1
|
|
|
|
mov Ljmptab[0*4], offset Lcase0
|
|
mov Ljmptab[1*4], offset Lcase1
|
|
mov Ljmptab[2*4], offset Lcase2
|
|
mov Ljmptab[3*4], offset Lcase3
|
|
mov Ljmptab[4*4], offset Lcase4
|
|
mov Ljmptab[5*4], offset Lcase5
|
|
mov Ljmptab[6*4], offset Lcase6
|
|
mov Ljmptab[7*4], offset Lcase7
|
|
|
|
initialized:
|
|
|
|
mov edx,ds:dword ptr[4+12+esp]
|
|
mov ecx,ds:dword ptr[4+4+esp]
|
|
xor eax,eax
|
|
mov ebx,ds:dword ptr[4+8+esp]
|
|
mov al,ds:byte ptr[17+edx]
|
|
cmp al,8
|
|
jge Lerror
|
|
fld ds:dword ptr[0+edx]
|
|
fld st(0)
|
|
jmp dword ptr[Ljmptab+eax*4]
|
|
Lcase0:
|
|
fmul ds:dword ptr[ebx]
|
|
fld ds:dword ptr[0+4+edx]
|
|
fxch st(2)
|
|
fmul ds:dword ptr[ecx]
|
|
fxch st(2)
|
|
fld st(0)
|
|
fmul ds:dword ptr[4+ebx]
|
|
fld ds:dword ptr[0+8+edx]
|
|
fxch st(2)
|
|
fmul ds:dword ptr[4+ecx]
|
|
fxch st(2)
|
|
fld st(0)
|
|
fmul ds:dword ptr[8+ebx]
|
|
fxch st(5)
|
|
faddp st(3),st(0)
|
|
fmul ds:dword ptr[8+ecx]
|
|
fxch st(1)
|
|
faddp st(3),st(0)
|
|
fxch st(3)
|
|
faddp st(2),st(0)
|
|
jmp LSetSides
|
|
Lcase1:
|
|
fmul ds:dword ptr[ecx]
|
|
fld ds:dword ptr[0+4+edx]
|
|
fxch st(2)
|
|
fmul ds:dword ptr[ebx]
|
|
fxch st(2)
|
|
fld st(0)
|
|
fmul ds:dword ptr[4+ebx]
|
|
fld ds:dword ptr[0+8+edx]
|
|
fxch st(2)
|
|
fmul ds:dword ptr[4+ecx]
|
|
fxch st(2)
|
|
fld st(0)
|
|
fmul ds:dword ptr[8+ebx]
|
|
fxch st(5)
|
|
faddp st(3),st(0)
|
|
fmul ds:dword ptr[8+ecx]
|
|
fxch st(1)
|
|
faddp st(3),st(0)
|
|
fxch st(3)
|
|
faddp st(2),st(0)
|
|
jmp LSetSides
|
|
Lcase2:
|
|
fmul ds:dword ptr[ebx]
|
|
fld ds:dword ptr[0+4+edx]
|
|
fxch st(2)
|
|
fmul ds:dword ptr[ecx]
|
|
fxch st(2)
|
|
fld st(0)
|
|
fmul ds:dword ptr[4+ecx]
|
|
fld ds:dword ptr[0+8+edx]
|
|
fxch st(2)
|
|
fmul ds:dword ptr[4+ebx]
|
|
fxch st(2)
|
|
fld st(0)
|
|
fmul ds:dword ptr[8+ebx]
|
|
fxch st(5)
|
|
faddp st(3),st(0)
|
|
fmul ds:dword ptr[8+ecx]
|
|
fxch st(1)
|
|
faddp st(3),st(0)
|
|
fxch st(3)
|
|
faddp st(2),st(0)
|
|
jmp LSetSides
|
|
Lcase3:
|
|
fmul ds:dword ptr[ecx]
|
|
fld ds:dword ptr[0+4+edx]
|
|
fxch st(2)
|
|
fmul ds:dword ptr[ebx]
|
|
fxch st(2)
|
|
fld st(0)
|
|
fmul ds:dword ptr[4+ecx]
|
|
fld ds:dword ptr[0+8+edx]
|
|
fxch st(2)
|
|
fmul ds:dword ptr[4+ebx]
|
|
fxch st(2)
|
|
fld st(0)
|
|
fmul ds:dword ptr[8+ebx]
|
|
fxch st(5)
|
|
faddp st(3),st(0)
|
|
fmul ds:dword ptr[8+ecx]
|
|
fxch st(1)
|
|
faddp st(3),st(0)
|
|
fxch st(3)
|
|
faddp st(2),st(0)
|
|
jmp LSetSides
|
|
Lcase4:
|
|
fmul ds:dword ptr[ebx]
|
|
fld ds:dword ptr[0+4+edx]
|
|
fxch st(2)
|
|
fmul ds:dword ptr[ecx]
|
|
fxch st(2)
|
|
fld st(0)
|
|
fmul ds:dword ptr[4+ebx]
|
|
fld ds:dword ptr[0+8+edx]
|
|
fxch st(2)
|
|
fmul ds:dword ptr[4+ecx]
|
|
fxch st(2)
|
|
fld st(0)
|
|
fmul ds:dword ptr[8+ecx]
|
|
fxch st(5)
|
|
faddp st(3),st(0)
|
|
fmul ds:dword ptr[8+ebx]
|
|
fxch st(1)
|
|
faddp st(3),st(0)
|
|
fxch st(3)
|
|
faddp st(2),st(0)
|
|
jmp LSetSides
|
|
Lcase5:
|
|
fmul ds:dword ptr[ecx]
|
|
fld ds:dword ptr[0+4+edx]
|
|
fxch st(2)
|
|
fmul ds:dword ptr[ebx]
|
|
fxch st(2)
|
|
fld st(0)
|
|
fmul ds:dword ptr[4+ebx]
|
|
fld ds:dword ptr[0+8+edx]
|
|
fxch st(2)
|
|
fmul ds:dword ptr[4+ecx]
|
|
fxch st(2)
|
|
fld st(0)
|
|
fmul ds:dword ptr[8+ecx]
|
|
fxch st(5)
|
|
faddp st(3),st(0)
|
|
fmul ds:dword ptr[8+ebx]
|
|
fxch st(1)
|
|
faddp st(3),st(0)
|
|
fxch st(3)
|
|
faddp st(2),st(0)
|
|
jmp LSetSides
|
|
Lcase6:
|
|
fmul ds:dword ptr[ebx]
|
|
fld ds:dword ptr[0+4+edx]
|
|
fxch st(2)
|
|
fmul ds:dword ptr[ecx]
|
|
fxch st(2)
|
|
fld st(0)
|
|
fmul ds:dword ptr[4+ecx]
|
|
fld ds:dword ptr[0+8+edx]
|
|
fxch st(2)
|
|
fmul ds:dword ptr[4+ebx]
|
|
fxch st(2)
|
|
fld st(0)
|
|
fmul ds:dword ptr[8+ecx]
|
|
fxch st(5)
|
|
faddp st(3),st(0)
|
|
fmul ds:dword ptr[8+ebx]
|
|
fxch st(1)
|
|
faddp st(3),st(0)
|
|
fxch st(3)
|
|
faddp st(2),st(0)
|
|
jmp LSetSides
|
|
Lcase7:
|
|
fmul ds:dword ptr[ecx]
|
|
fld ds:dword ptr[0+4+edx]
|
|
fxch st(2)
|
|
fmul ds:dword ptr[ebx]
|
|
fxch st(2)
|
|
fld st(0)
|
|
fmul ds:dword ptr[4+ecx]
|
|
fld ds:dword ptr[0+8+edx]
|
|
fxch st(2)
|
|
fmul ds:dword ptr[4+ebx]
|
|
fxch st(2)
|
|
fld st(0)
|
|
fmul ds:dword ptr[8+ecx]
|
|
fxch st(5)
|
|
faddp st(3),st(0)
|
|
fmul ds:dword ptr[8+ebx]
|
|
fxch st(1)
|
|
faddp st(3),st(0)
|
|
fxch st(3)
|
|
faddp st(2),st(0)
|
|
LSetSides:
|
|
faddp st(2),st(0)
|
|
fcomp ds:dword ptr[12+edx]
|
|
xor ecx,ecx
|
|
fnstsw ax
|
|
fcomp ds:dword ptr[12+edx]
|
|
and ah,1
|
|
xor ah,1
|
|
add cl,ah
|
|
fnstsw ax
|
|
and ah,1
|
|
add ah,ah
|
|
add cl,ah
|
|
pop ebx
|
|
mov eax,ecx
|
|
ret
|
|
Lerror:
|
|
int 3
|
|
}
|
|
}
|
|
#pragma warning( default: 4035 )
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// VectorMA routines
|
|
//-----------------------------------------------------------------------------
|
|
|
|
#if _WIN32
|
|
_declspec(naked)
|
|
#if PFN_VECTORMA
|
|
void __cdecl _VectorMA( const Vector &start, float scale, const Vector &direction, Vector &dest )
|
|
#else
|
|
void __cdecl VectorMA( const Vector &start, float scale, const Vector &direction, Vector &dest )
|
|
#endif
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
_asm {
|
|
mov eax, DWORD PTR [esp+0x04] ; *start, s0..s2
|
|
mov ecx, DWORD PTR [esp+0x0c] ; *direction, d0..d2
|
|
mov edx, DWORD PTR [esp+0x10] ; *dest
|
|
fld DWORD PTR [esp+0x08] ; sc
|
|
fld DWORD PTR [eax] ; s0 | sc
|
|
fld DWORD PTR [ecx] ; d0 | s0 | sc
|
|
fld DWORD PTR [eax+4] ; s1 | d0 | s0 | sc
|
|
fld DWORD PTR [ecx+4] ; d1 | s1 | d0 | s0 | sc
|
|
fld DWORD PTR [eax+8] ; s2 | d1 | s1 | d0 | s0 | sc
|
|
fxch st(4) ; s0 | d1 | s1 | d0 | s2 | sc
|
|
fld DWORD PTR [ecx+8] ; d2 | s0 | d1 | s1 | d0 | s2 | sc
|
|
fxch st(4) ; d0 | s0 | d1 | s1 | d2 | s2 | sc
|
|
fmul st,st(6) ; d0*sc | s0 | d1 | s1 | d2 | s2 | sc
|
|
fxch st(2) ; d1 | s0 | d0*sc | s1 | d2 | s2 | sc
|
|
fmul st,st(6) ; d1*sc | s0 | d0*sc | s1 | d2 | s2 | sc
|
|
fxch st(4) ; d2 | s0 | d0*sc | s1 | d1*sc | s2 | sc
|
|
fmulp st(6),st ; s0 | d0*sc | s1 | d1*sc | s2 | d2*sc
|
|
faddp st(1),st ; s0+d0*sc | s1 | d1*sc | s2 | d2*sc
|
|
fstp DWORD PTR [edx] ; s1 | d1*sc | s2 | d2*sc
|
|
faddp st(1),st ; s1+d1*sc | s2 | d2*sc
|
|
fstp DWORD PTR [edx+4] ; s2 | d2*sc
|
|
faddp st(1),st ; s2+d2*sc
|
|
fstp DWORD PTR [edx+8] ; [Empty stack]
|
|
ret
|
|
}
|
|
}
|
|
#else
|
|
#if PFN_VECTORMA
|
|
void __cdecl _VectorMA( const Vector &start, float scale, const Vector &direction, Vector &dest )
|
|
#else
|
|
void __cdecl VectorMA( const Vector &start, float scale, const Vector &direction, Vector &dest )
|
|
#endif
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
dest[0] = start[0] + scale*direction[0];
|
|
dest[1] = start[1] + scale*direction[1];
|
|
dest[2] = start[2] + scale*direction[2];
|
|
}
|
|
#endif
|
|
|
|
#ifdef _WIN32
|
|
#if PFN_VECTORMA
|
|
void
|
|
_declspec(naked)
|
|
__cdecl _SSE_VectorMA( const Vector &start, float scale, const Vector &direction, Vector &dest )
|
|
{
|
|
// FIXME: This don't work!! It will overwrite memory in the write to dest
|
|
Assert(0);
|
|
Assert( s_bMathlibInitialized );
|
|
_asm
|
|
{
|
|
// Intel SSE only routine
|
|
mov eax, DWORD PTR [esp+0x04] ; *start, s0..s2
|
|
mov ecx, DWORD PTR [esp+0x0c] ; *direction, d0..d2
|
|
mov edx, DWORD PTR [esp+0x10] ; *dest
|
|
movss xmm2, [esp+0x08] ; x2 = scale, 0, 0, 0
|
|
#ifdef ALIGNED_VECTOR
|
|
movaps xmm3, [ecx] ; x3 = dir0,dir1,dir2,X
|
|
pshufd xmm2, xmm2, 0 ; x2 = scale, scale, scale, scale
|
|
movaps xmm1, [eax] ; x1 = start1, start2, start3, X
|
|
mulps xmm3, xmm2 ; x3 *= x2
|
|
addps xmm3, xmm1 ; x3 += x1
|
|
movaps [edx], xmm3 ; *dest = x3
|
|
#else
|
|
movups xmm3, [ecx] ; x3 = dir0,dir1,dir2,X
|
|
pshufd xmm2, xmm2, 0 ; x2 = scale, scale, scale, scale
|
|
movups xmm1, [eax] ; x1 = start1, start2, start3, X
|
|
mulps xmm3, xmm2 ; x3 *= x2
|
|
addps xmm3, xmm1 ; x3 += x1
|
|
movups [edx], xmm3 ; *dest = x3
|
|
#endif
|
|
}
|
|
}
|
|
|
|
float (__cdecl *pfVectorMA)(Vector& v) = _VectorMA;
|
|
#endif
|
|
#endif
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Euler QAngle -> Basis Vectors
|
|
//-----------------------------------------------------------------------------
|
|
|
|
void AngleVectors (const QAngle &angles, Vector *forward)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Assert( forward );
|
|
|
|
float sp, sy, cp, cy;
|
|
|
|
SinCos( DEG2RAD( angles[YAW] ), &sy, &cy );
|
|
SinCos( DEG2RAD( angles[PITCH] ), &sp, &cp );
|
|
|
|
forward->x = cp*cy;
|
|
forward->y = cp*sy;
|
|
forward->z = -sp;
|
|
}
|
|
|
|
void AngleVectors (const QAngle &angles, Vector *forward, Vector *right, Vector *up)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
float sr, sp, sy, cr, cp, cy;
|
|
|
|
SinCos( DEG2RAD( angles[YAW] ), &sy, &cy );
|
|
SinCos( DEG2RAD( angles[PITCH] ), &sp, &cp );
|
|
SinCos( DEG2RAD( angles[ROLL] ), &sr, &cr );
|
|
|
|
if (forward)
|
|
{
|
|
forward->x = cp*cy;
|
|
forward->y = cp*sy;
|
|
forward->z = -sp;
|
|
}
|
|
|
|
if (right)
|
|
{
|
|
right->x = (-1*sr*sp*cy+-1*cr*-sy);
|
|
right->y = (-1*sr*sp*sy+-1*cr*cy);
|
|
right->z = -1*sr*cp;
|
|
}
|
|
|
|
if (up)
|
|
{
|
|
up->x = (cr*sp*cy+-sr*-sy);
|
|
up->y = (cr*sp*sy+-sr*cy);
|
|
up->z = cr*cp;
|
|
}
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Euler QAngle -> Basis Vectors transposed
|
|
//-----------------------------------------------------------------------------
|
|
|
|
void AngleVectorsTranspose (const QAngle &angles, Vector *forward, Vector *right, Vector *up)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
float sr, sp, sy, cr, cp, cy;
|
|
|
|
SinCos( DEG2RAD( angles[YAW] ), &sy, &cy );
|
|
SinCos( DEG2RAD( angles[PITCH] ), &sp, &cp );
|
|
SinCos( DEG2RAD( angles[ROLL] ), &sr, &cr );
|
|
|
|
if (forward)
|
|
{
|
|
forward->x = cp*cy;
|
|
forward->y = (sr*sp*cy+cr*-sy);
|
|
forward->z = (cr*sp*cy+-sr*-sy);
|
|
}
|
|
|
|
if (right)
|
|
{
|
|
right->x = cp*sy;
|
|
right->y = (sr*sp*sy+cr*cy);
|
|
right->z = (cr*sp*sy+-sr*cy);
|
|
}
|
|
|
|
if (up)
|
|
{
|
|
up->x = -sp;
|
|
up->y = sr*cp;
|
|
up->z = cr*cp;
|
|
}
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Forward direction vector -> Euler angles
|
|
//-----------------------------------------------------------------------------
|
|
|
|
void VectorAngles( const Vector& forward, QAngle &angles )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
float tmp, yaw, pitch;
|
|
|
|
if (forward[1] == 0 && forward[0] == 0)
|
|
{
|
|
yaw = 0;
|
|
if (forward[2] > 0)
|
|
pitch = 270;
|
|
else
|
|
pitch = 90;
|
|
}
|
|
else
|
|
{
|
|
yaw = (atan2(forward[1], forward[0]) * 180 / M_PI);
|
|
if (yaw < 0)
|
|
yaw += 360;
|
|
|
|
tmp = FastSqrt (forward[0]*forward[0] + forward[1]*forward[1]);
|
|
pitch = (atan2(-forward[2], tmp) * 180 / M_PI);
|
|
if (pitch < 0)
|
|
pitch += 360;
|
|
}
|
|
|
|
angles[0] = pitch;
|
|
angles[1] = yaw;
|
|
angles[2] = 0;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Forward direction vector with a reference up vector -> Euler angles
|
|
//-----------------------------------------------------------------------------
|
|
|
|
void VectorAngles( const Vector &forward, const Vector &pseudoup, QAngle &angles )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
|
|
Vector left;
|
|
|
|
CrossProduct( forward, pseudoup, left );
|
|
VectorNormalizeFast( left );
|
|
|
|
float xyDist = sqrtf( forward[0] * forward[0] + forward[1] * forward[1] );
|
|
|
|
// enough here to get angles?
|
|
if ( xyDist > 0.001f )
|
|
{
|
|
// (yaw) y = ATAN( forward.y, forward.x ); -- in our space, forward is the X axis
|
|
angles[1] = RAD2DEG( atan2f( forward[1], forward[0] ) );
|
|
|
|
// The engine does pitch inverted from this, but we always end up negating it in the DLL
|
|
// UNDONE: Fix the engine to make it consistent
|
|
// (pitch) x = ATAN( -forward.z, sqrt(forward.x*forward.x+forward.y*forward.y) );
|
|
angles[0] = RAD2DEG( atan2f( -forward[2], xyDist ) );
|
|
|
|
float up_z = (left[0] * forward[1]) - (left[1] * forward[0]);
|
|
|
|
// (roll) z = ATAN( left.z, up.z );
|
|
angles[2] = RAD2DEG( atan2f( left[2], up_z ) );
|
|
}
|
|
else // forward is mostly Z, gimbal lock-
|
|
{
|
|
// (yaw) y = ATAN( -left.x, left.y ); -- forward is mostly z, so use right for yaw
|
|
angles[1] = RAD2DEG( atan2f( left[0], -left[1] ) ); //This was originally copied from the "void MatrixAngles( const matrix3x4_t& matrix, float *angles )" code, and it's 180 degrees off, negated the values and it all works now (Dave Kircher)
|
|
|
|
// The engine does pitch inverted from this, but we always end up negating it in the DLL
|
|
// UNDONE: Fix the engine to make it consistent
|
|
// (pitch) x = ATAN( -forward.z, sqrt(forward.x*forward.x+forward.y*forward.y) );
|
|
angles[0] = RAD2DEG( atan2f( -forward[2], xyDist ) );
|
|
|
|
// Assume no roll in this case as one degree of freedom has been lost (i.e. yaw == roll)
|
|
angles[2] = 0;
|
|
}
|
|
}
|
|
|
|
void SetIdentityMatrix( matrix3x4_t& matrix )
|
|
{
|
|
memset( matrix.Base(), 0, sizeof(float)*3*4 );
|
|
matrix[0][0] = 1.0;
|
|
matrix[1][1] = 1.0;
|
|
matrix[2][2] = 1.0;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Purpose: converts engine euler angles into a matrix
|
|
// Input : vec3_t angles - PITCH, YAW, ROLL
|
|
// Output : *matrix - left-handed column matrix
|
|
// the basis vectors for the rotations will be in the columns as follows:
|
|
// matrix[][0] is forward
|
|
// matrix[][1] is left
|
|
// matrix[][2] is up
|
|
//-----------------------------------------------------------------------------
|
|
void AngleMatrix( RadianEuler const &angles, const Vector &position, matrix3x4_t& matrix )
|
|
{
|
|
AngleMatrix( angles, matrix );
|
|
MatrixSetColumn( position, 3, matrix );
|
|
}
|
|
|
|
void AngleMatrix( const RadianEuler& angles, matrix3x4_t& matrix )
|
|
{
|
|
QAngle quakeEuler( RAD2DEG( angles.y ), RAD2DEG( angles.z ), RAD2DEG( angles.x ) );
|
|
|
|
AngleMatrix( quakeEuler, matrix );
|
|
}
|
|
|
|
|
|
void AngleMatrix( const QAngle &angles, const Vector &position, matrix3x4_t& matrix )
|
|
{
|
|
AngleMatrix( angles, matrix );
|
|
MatrixSetColumn( position, 3, matrix );
|
|
}
|
|
|
|
void AngleMatrix( const QAngle &angles, matrix3x4_t& matrix )
|
|
{
|
|
#ifdef _VPROF_MATHLIB
|
|
VPROF_BUDGET( "AngleMatrix", "Mathlib" );
|
|
#endif
|
|
Assert( s_bMathlibInitialized );
|
|
float sr, sp, sy, cr, cp, cy;
|
|
|
|
SinCos( DEG2RAD( angles[YAW] ), &sy, &cy );
|
|
SinCos( DEG2RAD( angles[PITCH] ), &sp, &cp );
|
|
SinCos( DEG2RAD( angles[ROLL] ), &sr, &cr );
|
|
|
|
// matrix = (YAW * PITCH) * ROLL
|
|
matrix[0][0] = cp*cy;
|
|
matrix[1][0] = cp*sy;
|
|
matrix[2][0] = -sp;
|
|
matrix[0][1] = sr*sp*cy+cr*-sy;
|
|
matrix[1][1] = sr*sp*sy+cr*cy;
|
|
matrix[2][1] = sr*cp;
|
|
matrix[0][2] = (cr*sp*cy+-sr*-sy);
|
|
matrix[1][2] = (cr*sp*sy+-sr*cy);
|
|
matrix[2][2] = cr*cp;
|
|
matrix[0][3] = 0.f;
|
|
matrix[1][3] = 0.f;
|
|
matrix[2][3] = 0.f;
|
|
}
|
|
|
|
void AngleIMatrix( const RadianEuler& angles, matrix3x4_t& matrix )
|
|
{
|
|
QAngle quakeEuler( RAD2DEG( angles.y ), RAD2DEG( angles.z ), RAD2DEG( angles.x ) );
|
|
|
|
AngleIMatrix( quakeEuler, matrix );
|
|
}
|
|
|
|
void AngleIMatrix (const QAngle& angles, matrix3x4_t& matrix )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
float sr, sp, sy, cr, cp, cy;
|
|
|
|
SinCos( DEG2RAD( angles[YAW] ), &sy, &cy );
|
|
SinCos( DEG2RAD( angles[PITCH] ), &sp, &cp );
|
|
SinCos( DEG2RAD( angles[ROLL] ), &sr, &cr );
|
|
|
|
// matrix = (YAW * PITCH) * ROLL
|
|
matrix[0][0] = cp*cy;
|
|
matrix[0][1] = cp*sy;
|
|
matrix[0][2] = -sp;
|
|
matrix[1][0] = sr*sp*cy+cr*-sy;
|
|
matrix[1][1] = sr*sp*sy+cr*cy;
|
|
matrix[1][2] = sr*cp;
|
|
matrix[2][0] = (cr*sp*cy+-sr*-sy);
|
|
matrix[2][1] = (cr*sp*sy+-sr*cy);
|
|
matrix[2][2] = cr*cp;
|
|
matrix[0][3] = 0.f;
|
|
matrix[1][3] = 0.f;
|
|
matrix[2][3] = 0.f;
|
|
}
|
|
|
|
void AngleIMatrix (const QAngle &angles, const Vector &position, matrix3x4_t &mat )
|
|
{
|
|
AngleIMatrix( angles, mat );
|
|
|
|
Vector vecTranslation;
|
|
VectorRotate( position, mat, vecTranslation );
|
|
vecTranslation *= -1.0f;
|
|
MatrixSetColumn( vecTranslation, 3, mat );
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Intersects a sphere against an axis aligned box
|
|
//-----------------------------------------------------------------------------
|
|
bool SphereToAABBIntersection( const Vector& sphCenter, float sphRadius,
|
|
const Vector& boxMin, const Vector& boxMax )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
int i;
|
|
float s;
|
|
float dist = 0;
|
|
|
|
for( i = 0; i < 3; i++ )
|
|
{
|
|
if( sphCenter[i] < boxMin[i] )
|
|
{
|
|
s = sphCenter[i] - boxMin[i];
|
|
dist += s * s;
|
|
}
|
|
else if( sphCenter[i] > boxMax[i] )
|
|
{
|
|
s = sphCenter[i] - boxMax[i];
|
|
dist += s * s;
|
|
}
|
|
}
|
|
|
|
return ( dist <= ( sphRadius * sphRadius ) );
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Bounding box construction methods
|
|
//-----------------------------------------------------------------------------
|
|
|
|
void ClearBounds (Vector& mins, Vector& maxs)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
mins[0] = mins[1] = mins[2] = 99999;
|
|
maxs[0] = maxs[1] = maxs[2] = -99999;
|
|
}
|
|
|
|
void AddPointToBounds (const Vector& v, Vector& mins, Vector& maxs)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
int i;
|
|
vec_t val;
|
|
|
|
for (i=0 ; i<3 ; i++)
|
|
{
|
|
val = v[i];
|
|
if (val < mins[i])
|
|
mins[i] = val;
|
|
if (val > maxs[i])
|
|
maxs[i] = val;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Gamma conversion support
|
|
//-----------------------------------------------------------------------------
|
|
static byte texgammatable[256]; // palette is sent through this to convert to screen gamma
|
|
|
|
static float texturetolinear[256]; // texture (0..255) to linear (0..1)
|
|
static int lineartotexture[1024]; // linear (0..1) to texture (0..255)
|
|
static int lineartoscreen[1024]; // linear (0..1) to gamma corrected vertex light (0..255)
|
|
|
|
// build a lightmap texture to combine with surface texture, adjust for src*dst+dst*src, ramp reprogramming, etc
|
|
float lineartovertex[4096]; // linear (0..4) to screen corrected vertex space (0..1?)
|
|
unsigned char lineartolightmap[4096]; // linear (0..4) to screen corrected texture value (0..255)
|
|
|
|
static float g_Mathlib_GammaToLinear[256]; // gamma (0..1) to linear (0..1)
|
|
static float g_Mathlib_LinearToGamma[256]; // linear (0..1) to gamma (0..1)
|
|
|
|
float power2_n[256]; // 2**(index - 128)
|
|
|
|
void BuildExponentTable( )
|
|
{
|
|
for( int i = 0; i < 256; i++ )
|
|
{
|
|
power2_n[i] = pow( 2.0f, i - 128 ) / 255.0f;
|
|
}
|
|
}
|
|
|
|
void BuildGammaTable( float gamma, float texGamma, float brightness, int overbright )
|
|
{
|
|
int i, inf;
|
|
float g1, g3;
|
|
|
|
// Con_Printf("BuildGammaTable %.1f %.1f %.1f\n", g, v_lightgamma.GetFloat(), v_texgamma.GetFloat() );
|
|
|
|
float g = gamma;
|
|
if (g > 3.0)
|
|
{
|
|
g = 3.0;
|
|
}
|
|
|
|
g = 1.0 / g;
|
|
g1 = texGamma * g;
|
|
|
|
if (brightness <= 0.0)
|
|
{
|
|
g3 = 0.125;
|
|
}
|
|
else if (brightness > 1.0)
|
|
{
|
|
g3 = 0.05;
|
|
}
|
|
else
|
|
{
|
|
g3 = 0.125 - (brightness * brightness) * 0.075;
|
|
}
|
|
|
|
for (i=0 ; i<256 ; i++)
|
|
{
|
|
inf = static_cast<int>(255 * pow ( i/255.f, g1 ));
|
|
if (inf < 0)
|
|
inf = 0;
|
|
if (inf > 255)
|
|
inf = 255;
|
|
texgammatable[i] = inf;
|
|
}
|
|
|
|
for (i=0 ; i<1024 ; i++)
|
|
{
|
|
float f;
|
|
|
|
f = i / 1023.0;
|
|
|
|
// scale up
|
|
if (brightness > 1.0)
|
|
f = f * brightness;
|
|
|
|
// shift up
|
|
if (f <= g3)
|
|
f = (f / g3) * 0.125;
|
|
else
|
|
f = 0.125 + ((f - g3) / (1.0 - g3)) * 0.875;
|
|
|
|
// convert linear space to desired gamma space
|
|
inf = static_cast<int>(255 * pow ( f, g ));
|
|
|
|
if (inf < 0)
|
|
inf = 0;
|
|
if (inf > 255)
|
|
inf = 255;
|
|
lineartoscreen[i] = inf;
|
|
}
|
|
|
|
/*
|
|
for (i=0 ; i<1024 ; i++)
|
|
{
|
|
// convert from screen gamma space to linear space
|
|
lineargammatable[i] = 1023 * pow ( i/1023.0, v_gamma.GetFloat() );
|
|
// convert from linear gamma space to screen space
|
|
screengammatable[i] = 1023 * pow ( i/1023.0, 1.0 / v_gamma.GetFloat() );
|
|
}
|
|
*/
|
|
|
|
for (i=0 ; i<256 ; i++)
|
|
{
|
|
// convert from nonlinear texture space (0..255) to linear space (0..1)
|
|
texturetolinear[i] = pow( i / 255.f, texGamma );
|
|
|
|
// convert from linear space (0..1) to nonlinear (sRGB) space (0..1)
|
|
g_Mathlib_LinearToGamma[i] = pow( i / 255.f, 1.0f / 2.2f );
|
|
|
|
// convert from sRGB gamma space (0..1) to linear space (0..1)
|
|
g_Mathlib_GammaToLinear[i] = pow( i / 255.f, 2.2f );
|
|
}
|
|
|
|
for (i=0 ; i<1024 ; i++)
|
|
{
|
|
// convert from linear space (0..1) to nonlinear texture space (0..255)
|
|
lineartotexture[i] = static_cast<int>(pow( i / 1023.0, 1.0 / texGamma ) * 255);
|
|
}
|
|
|
|
BuildExponentTable();
|
|
|
|
#if 0
|
|
for (i=0 ; i<256 ; i++)
|
|
{
|
|
float f;
|
|
|
|
// convert from nonlinear lightmap space (0..255) to linear space (0..4)
|
|
// f = (i / 255.0) * sqrt( 4 );
|
|
f = i * (2.0 / 255.0);
|
|
f = f * f;
|
|
|
|
texlighttolinear[i] = f;
|
|
}
|
|
#endif
|
|
|
|
{
|
|
float f;
|
|
float overbrightFactor = 1.0f;
|
|
|
|
// Can't do overbright without texcombine
|
|
// UNDONE: Add GAMMA ramp to rectify this
|
|
if ( overbright == 2 )
|
|
{
|
|
overbrightFactor = 0.5;
|
|
}
|
|
else if ( overbright == 4 )
|
|
{
|
|
overbrightFactor = 0.25;
|
|
}
|
|
|
|
for (i=0 ; i<4096 ; i++)
|
|
{
|
|
// convert from linear 0..4 (x1024) to screen corrected vertex space (0..1?)
|
|
f = pow ( i/1024.0, 1.0 / gamma );
|
|
|
|
lineartovertex[i] = f * overbrightFactor;
|
|
if (lineartovertex[i] > 1)
|
|
lineartovertex[i] = 1;
|
|
|
|
int nLightmap = RoundFloatToInt( f * 255 * overbrightFactor );
|
|
nLightmap = clamp( nLightmap, 0, 255 );
|
|
lineartolightmap[i] = (unsigned char)nLightmap;
|
|
}
|
|
}
|
|
}
|
|
|
|
float GammaToLinearFullRange( float gamma )
|
|
{
|
|
return pow( gamma, 2.2f );
|
|
}
|
|
|
|
float LinearToGammaFullRange( float linear )
|
|
{
|
|
return pow( linear, 1.0f / 2.2f );
|
|
}
|
|
|
|
float GammaToLinear( float gamma )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
// return pow( gamma, 2.2f );
|
|
if( gamma < 0.0f )
|
|
{
|
|
return 0.0f;
|
|
}
|
|
Assert( gamma <= 1.0f );
|
|
if( gamma >= 0.95f)
|
|
{
|
|
// Use GammaToLinearFullRange maybe if you trip this.
|
|
return 1.0f;
|
|
}
|
|
|
|
int index = RoundFloatToInt( gamma * 255.0f );
|
|
Assert( index >= 0 && index < 256 );
|
|
return g_Mathlib_GammaToLinear[index];
|
|
}
|
|
|
|
float LinearToGamma( float linear )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
// return pow( linear, 1.0f / 2.2f );
|
|
if( linear < 0.0f )
|
|
{
|
|
return 0.0f;
|
|
}
|
|
if( linear > 1.0f )
|
|
{
|
|
// Use LinearToGammaFullRange maybe if you trip this.
|
|
Assert( 0 );
|
|
return 1.0f;
|
|
}
|
|
|
|
int index = RoundFloatToInt( linear * 255.0f );
|
|
Assert( index >= 0 && index < 256 );
|
|
return g_Mathlib_LinearToGamma[index];
|
|
}
|
|
|
|
// convert texture to linear 0..1 value
|
|
float TextureToLinear( int c )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
if (c < 0)
|
|
return 0;
|
|
if (c > 255)
|
|
return 1.0;
|
|
|
|
return texturetolinear[c];
|
|
}
|
|
|
|
// convert texture to linear 0..1 value
|
|
int LinearToTexture( float f )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
int i;
|
|
i = static_cast<int>(f * 1023); // assume 0..1 range
|
|
if (i < 0)
|
|
i = 0;
|
|
if (i > 1023)
|
|
i = 1023;
|
|
|
|
return lineartotexture[i];
|
|
}
|
|
|
|
|
|
// converts 0..1 linear value to screen gamma (0..255)
|
|
int LinearToScreenGamma( float f )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
int i;
|
|
i = static_cast<int>(f * 1023); // assume 0..1 range
|
|
if (i < 0)
|
|
i = 0;
|
|
if (i > 1023)
|
|
i = 1023;
|
|
|
|
return lineartoscreen[i];
|
|
}
|
|
|
|
// solve a x^2 + b x + c = 0
|
|
bool SolveQuadratic( float a, float b, float c, float &root1, float &root2 )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
if (a == 0)
|
|
{
|
|
if (b != 0)
|
|
{
|
|
// no x^2 component, it's a linear system
|
|
root1 = root2 = -c / b;
|
|
return true;
|
|
}
|
|
if (c == 0)
|
|
{
|
|
// all zero's
|
|
root1 = root2 = 0;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
float tmp = b * b - 4.0f * a * c;
|
|
|
|
if (tmp < 0)
|
|
{
|
|
// imaginary number, bah, no solution.
|
|
return false;
|
|
}
|
|
|
|
tmp = sqrt( tmp );
|
|
root1 = (-b + tmp) / (2.0f * a);
|
|
root2 = (-b - tmp) / (2.0f * a);
|
|
return true;
|
|
}
|
|
|
|
|
|
|
|
// solves for "a, b, c" where "a x^2 + b x + c = y", return true if solution exists
|
|
bool SolveInverseQuadratic( float x1, float y1, float x2, float y2, float x3, float y3, float &a, float &b, float &c )
|
|
{
|
|
float det = (x1 - x2)*(x1 - x3)*(x2 - x3);
|
|
|
|
// FIXME: check with some sort of epsilon
|
|
if (det == 0.0)
|
|
return false;
|
|
|
|
a = (x3*(-y1 + y2) + x2*(y1 - y3) + x1*(-y2 + y3)) / det;
|
|
|
|
b = (x3*x3*(y1 - y2) + x1*x1*(y2 - y3) + x2*x2*(-y1 + y3)) / det;
|
|
|
|
c = (x1*x3*(-x1 + x3)*y2 + x2*x2*(x3*y1 - x1*y3) + x2*(-(x3*x3*y1) + x1*x1*y3)) / det;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool SolveInverseQuadraticMonotonic( float x1, float y1, float x2, float y2, float x3, float y3,
|
|
float &a, float &b, float &c )
|
|
{
|
|
// use SolveInverseQuadratic, but if the sigm of the derivative at the start point is the wrong
|
|
// sign, displace the mid point
|
|
|
|
// first, sort parameters
|
|
if (x1>x2)
|
|
{
|
|
V_swap(x1,x2);
|
|
V_swap(y1,y2);
|
|
}
|
|
if (x2>x3)
|
|
{
|
|
V_swap(x2,x3);
|
|
V_swap(y2,y3);
|
|
}
|
|
if (x1>x2)
|
|
{
|
|
V_swap(x1,x2);
|
|
V_swap(y1,y2);
|
|
}
|
|
// this code is not fast. what it does is when the curve would be non-monotonic, slowly shifts
|
|
// the center point closer to the linear line between the endpoints. Should anyone need htis
|
|
// function to be actually fast, it would be fairly easy to change it to be so.
|
|
for(float blend_to_linear_factor=0.0;blend_to_linear_factor<=1.0;blend_to_linear_factor+=0.05)
|
|
{
|
|
float tempy2=(1-blend_to_linear_factor)*y2+blend_to_linear_factor*FLerp(y1,y3,x1,x3,x2);
|
|
if (!SolveInverseQuadratic(x1,y1,x2,tempy2,x3,y3,a,b,c))
|
|
return false;
|
|
float derivative=2.0*a+b;
|
|
if ( (y1<y2) && (y2<y3)) // monotonically increasing
|
|
{
|
|
if (derivative>=0.0)
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
if ( (y1>y2) && (y2>y3)) // monotonically decreasing
|
|
{
|
|
if (derivative<=0.0)
|
|
return true;
|
|
}
|
|
else
|
|
return true;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
// solves for "a, b, c" where "1/(a x^2 + b x + c ) = y", return true if solution exists
|
|
bool SolveInverseReciprocalQuadratic( float x1, float y1, float x2, float y2, float x3, float y3, float &a, float &b, float &c )
|
|
{
|
|
float det = (x1 - x2)*(x1 - x3)*(x2 - x3)*y1*y2*y3;
|
|
|
|
// FIXME: check with some sort of epsilon
|
|
if (det == 0.0)
|
|
return false;
|
|
|
|
a = (x1*y1*(y2 - y3) + x3*(y1 - y2)*y3 + x2*y2*(-y1 + y3)) / det;
|
|
|
|
b = (x2*x2*y2*(y1 - y3) + x3*x3*(-y1 + y2)*y3 + x1*x1*y1*(-y2 + y3)) / det;
|
|
|
|
c = (x2*(x2 - x3)*x3*y2*y3 + x1*x1*y1*(x2*y2 - x3*y3) + x1*(-(x2*x2*y1*y2) + x3*x3*y1*y3)) / det;
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
// Rotate a vector around the Z axis (YAW)
|
|
void VectorYawRotate( const Vector &in, float flYaw, Vector &out)
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
if (&in == &out )
|
|
{
|
|
Vector tmp;
|
|
tmp = in;
|
|
VectorYawRotate( tmp, flYaw, out );
|
|
return;
|
|
}
|
|
|
|
float sy, cy;
|
|
|
|
SinCos( DEG2RAD(flYaw), &sy, &cy );
|
|
|
|
out.x = in.x * cy - in.y * sy;
|
|
out.y = in.x * sy + in.y * cy;
|
|
out.z = in.z;
|
|
}
|
|
|
|
|
|
|
|
float Bias( float x, float biasAmt )
|
|
{
|
|
// WARNING: not thread safe
|
|
static float lastAmt = -1;
|
|
static float lastExponent = 0;
|
|
if( lastAmt != biasAmt )
|
|
{
|
|
lastExponent = log( biasAmt ) * -1.4427f; // (-1.4427 = 1 / log(0.5))
|
|
}
|
|
return pow( x, lastExponent );
|
|
}
|
|
|
|
|
|
float Gain( float x, float biasAmt )
|
|
{
|
|
// WARNING: not thread safe
|
|
if( x < 0.5 )
|
|
return 0.5f * Bias( 2*x, 1-biasAmt );
|
|
else
|
|
return 1 - 0.5f * Bias( 2 - 2*x, 1-biasAmt );
|
|
}
|
|
|
|
|
|
float SmoothCurve( float x )
|
|
{
|
|
return (1 - cos( x * M_PI )) * 0.5f;
|
|
}
|
|
|
|
|
|
inline float MovePeak( float x, float flPeakPos )
|
|
{
|
|
// Todo: make this higher-order?
|
|
if( x < flPeakPos )
|
|
return x * 0.5f / flPeakPos;
|
|
else
|
|
return 0.5 + 0.5 * (x - flPeakPos) / (1 - flPeakPos);
|
|
}
|
|
|
|
|
|
float SmoothCurve_Tweak( float x, float flPeakPos, float flPeakSharpness )
|
|
{
|
|
float flMovedPeak = MovePeak( x, flPeakPos );
|
|
float flSharpened = Gain( flMovedPeak, flPeakSharpness );
|
|
return SmoothCurve( flSharpened );
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// make sure quaternions are within 180 degrees of one another, if not, reverse q
|
|
//-----------------------------------------------------------------------------
|
|
|
|
void QuaternionAlign( const Quaternion &p, const Quaternion &q, Quaternion &qt )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
|
|
// FIXME: can this be done with a quat dot product?
|
|
|
|
int i;
|
|
// decide if one of the quaternions is backwards
|
|
float a = 0;
|
|
float b = 0;
|
|
for (i = 0; i < 4; i++)
|
|
{
|
|
a += (p[i]-q[i])*(p[i]-q[i]);
|
|
b += (p[i]+q[i])*(p[i]+q[i]);
|
|
}
|
|
if (a > b)
|
|
{
|
|
for (i = 0; i < 4; i++)
|
|
{
|
|
qt[i] = -q[i];
|
|
}
|
|
}
|
|
else if (&qt != &q)
|
|
{
|
|
for (i = 0; i < 4; i++)
|
|
{
|
|
qt[i] = q[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Do a piecewise addition of the quaternion elements. This actually makes little
|
|
// mathematical sense, but it's a cheap way to simulate a slerp.
|
|
//-----------------------------------------------------------------------------
|
|
|
|
void QuaternionBlend( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
|
|
// decide if one of the quaternions is backwards
|
|
Quaternion q2;
|
|
QuaternionAlign( p, q, q2 );
|
|
|
|
QuaternionBlendNoAlign( p, q2, t, qt );
|
|
}
|
|
|
|
|
|
void QuaternionBlendNoAlign( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
float sclp, sclq;
|
|
int i;
|
|
|
|
// 0.0 returns p, 1.0 return q.
|
|
sclp = 1.0f - t;
|
|
sclq = t;
|
|
for (i = 0; i < 4; i++) {
|
|
qt[i] = sclp * p[i] + sclq * q[i];
|
|
}
|
|
QuaternionNormalize( qt );
|
|
}
|
|
|
|
|
|
|
|
void QuaternionIdentityBlend( const Quaternion &p, float t, Quaternion &qt )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
float sclp;
|
|
|
|
sclp = 1.0f - t;
|
|
|
|
qt.x = p.x * sclp;
|
|
qt.y = p.y * sclp;
|
|
qt.z = p.z * sclp;
|
|
if (qt.w < 0.0)
|
|
{
|
|
qt.w = p.w * sclp - t;
|
|
}
|
|
else
|
|
{
|
|
qt.w = p.w * sclp + t;
|
|
}
|
|
QuaternionNormalize( qt );
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Quaternion sphereical linear interpolation
|
|
//-----------------------------------------------------------------------------
|
|
|
|
void QuaternionSlerp( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt )
|
|
{
|
|
Quaternion q2;
|
|
// 0.0 returns p, 1.0 return q.
|
|
|
|
// decide if one of the quaternions is backwards
|
|
QuaternionAlign( p, q, q2 );
|
|
|
|
QuaternionSlerpNoAlign( p, q2, t, qt );
|
|
}
|
|
|
|
|
|
void QuaternionSlerpNoAlign( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
float omega, cosom, sinom, sclp, sclq;
|
|
int i;
|
|
|
|
// 0.0 returns p, 1.0 return q.
|
|
|
|
cosom = p[0]*q[0] + p[1]*q[1] + p[2]*q[2] + p[3]*q[3];
|
|
|
|
if ((1.0f + cosom) > 0.000001f) {
|
|
if ((1.0f - cosom) > 0.000001f) {
|
|
omega = acos( cosom );
|
|
sinom = sin( omega );
|
|
sclp = sin( (1.0f - t)*omega) / sinom;
|
|
sclq = sin( t*omega ) / sinom;
|
|
}
|
|
else {
|
|
// TODO: add short circuit for cosom == 1.0f?
|
|
sclp = 1.0f - t;
|
|
sclq = t;
|
|
}
|
|
for (i = 0; i < 4; i++) {
|
|
qt[i] = sclp * p[i] + sclq * q[i];
|
|
}
|
|
}
|
|
else {
|
|
Assert( &qt != &q );
|
|
|
|
qt[0] = -q[1];
|
|
qt[1] = q[0];
|
|
qt[2] = -q[3];
|
|
qt[3] = q[2];
|
|
sclp = sin( (1.0f - t) * (0.5f * M_PI));
|
|
sclq = sin( t * (0.5f * M_PI));
|
|
for (i = 0; i < 3; i++) {
|
|
qt[i] = sclp * p[i] + sclq * qt[i];
|
|
}
|
|
}
|
|
|
|
Assert( qt.IsValid() );
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Purpose: Returns the angular delta between the two normalized quaternions in degrees.
|
|
//-----------------------------------------------------------------------------
|
|
float QuaternionAngleDiff( const Quaternion &p, const Quaternion &q )
|
|
{
|
|
#if 1
|
|
// this code path is here for 2 reasons:
|
|
// 1 - acos maps 1-epsilon to values much larger than epsilon (vs asin, which maps epsilon to itself)
|
|
// this means that in floats, anything below ~0.05 degrees truncates to 0
|
|
// 2 - normalized quaternions are frequently slightly non-normalized due to float precision issues,
|
|
// and the epsilon off of normalized can be several percents of a degree
|
|
Quaternion qInv, diff;
|
|
QuaternionConjugate( q, qInv );
|
|
QuaternionMult( p, qInv, diff );
|
|
|
|
float sinang = sqrt( diff.x * diff.x + diff.y * diff.y + diff.z * diff.z );
|
|
float angle = RAD2DEG( 2 * asin( sinang ) );
|
|
return angle;
|
|
#else
|
|
Quaternion q2;
|
|
QuaternionAlign( p, q, q2 );
|
|
|
|
Assert( s_bMathlibInitialized );
|
|
float cosom = p.x * q2.x + p.y * q2.y + p.z * q2.z + p.w * q2.w;
|
|
|
|
if ( cosom > -1.0f )
|
|
{
|
|
if ( cosom < 1.0f )
|
|
{
|
|
float omega = 2 * fabs( acos( cosom ) );
|
|
return RAD2DEG( omega );
|
|
}
|
|
return 0.0f;
|
|
}
|
|
|
|
return 180.0f;
|
|
#endif
|
|
}
|
|
|
|
void QuaternionConjugate( const Quaternion &p, Quaternion &q )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Assert( q.IsValid() );
|
|
|
|
q.x = -p.x;
|
|
q.y = -p.y;
|
|
q.z = -p.z;
|
|
q.w = p.w;
|
|
}
|
|
|
|
void QuaternionInvert( const Quaternion &p, Quaternion &q )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Assert( q.IsValid() );
|
|
|
|
QuaternionConjugate( p, q );
|
|
|
|
float magnitudeSqr = QuaternionDotProduct( p, p );
|
|
Assert( magnitudeSqr );
|
|
if ( magnitudeSqr )
|
|
{
|
|
float inv = 1.0f / magnitudeSqr;
|
|
q.x *= inv;
|
|
q.y *= inv;
|
|
q.z *= inv;
|
|
q.w *= inv;
|
|
}
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Make sure the quaternion is of unit length
|
|
//-----------------------------------------------------------------------------
|
|
|
|
float QuaternionNormalize( Quaternion &q )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
float radius, iradius;
|
|
|
|
Assert( q.IsValid() );
|
|
|
|
radius = q[0]*q[0] + q[1]*q[1] + q[2]*q[2] + q[3]*q[3];
|
|
|
|
if ( radius ) // > FLT_EPSILON && ((radius < 1.0f - 4*FLT_EPSILON) || (radius > 1.0f + 4*FLT_EPSILON))
|
|
{
|
|
radius = sqrt(radius);
|
|
iradius = 1.0f/radius;
|
|
q[3] *= iradius;
|
|
q[2] *= iradius;
|
|
q[1] *= iradius;
|
|
q[0] *= iradius;
|
|
}
|
|
return radius;
|
|
}
|
|
|
|
void QuaternionScale( const Quaternion &p, float t, Quaternion &q )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
|
|
#if 0
|
|
Quaternion p0;
|
|
Quaternion q;
|
|
p0.Init( 0.0, 0.0, 0.0, 1.0 );
|
|
|
|
// slerp in "reverse order" so that p doesn't get realigned
|
|
QuaternionSlerp( p, p0, 1.0 - fabs( t ), q );
|
|
if (t < 0.0)
|
|
{
|
|
q.w = -q.w;
|
|
}
|
|
#else
|
|
float r;
|
|
|
|
// FIXME: nick, this isn't overly sensitive to accuracy, and it may be faster to
|
|
// use the cos part (w) of the quaternion (sin(omega)*N,cos(omega)) to figure the new scale.
|
|
float sinom = sqrt( DotProduct( &p.x, &p.x ) );
|
|
sinom = MIN( sinom, 1.f );
|
|
|
|
float sinsom = sin( asin( sinom ) * t );
|
|
|
|
t = sinsom / (sinom + FLT_EPSILON);
|
|
VectorScale( &p.x, t, &q.x );
|
|
|
|
// rescale rotation
|
|
r = 1.0f - sinsom * sinsom;
|
|
|
|
// Assert( r >= 0 );
|
|
if (r < 0.0f)
|
|
r = 0.0f;
|
|
r = sqrt( r );
|
|
|
|
// keep sign of rotation
|
|
if (p.w < 0)
|
|
q.w = -r;
|
|
else
|
|
q.w = r;
|
|
#endif
|
|
|
|
Assert( q.IsValid() );
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
void QuaternionAdd( const Quaternion &p, const Quaternion &q, Quaternion &qt )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Assert( p.IsValid() );
|
|
Assert( q.IsValid() );
|
|
|
|
// decide if one of the quaternions is backwards
|
|
Quaternion q2;
|
|
QuaternionAlign( p, q, q2 );
|
|
|
|
// is this right???
|
|
qt[0] = p[0] + q2[0];
|
|
qt[1] = p[1] + q2[1];
|
|
qt[2] = p[2] + q2[2];
|
|
qt[3] = p[3] + q2[3];
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
float QuaternionDotProduct( const Quaternion &p, const Quaternion &q )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Assert( p.IsValid() );
|
|
Assert( q.IsValid() );
|
|
|
|
return p.x * q.x + p.y * q.y + p.z * q.z + p.w * q.w;
|
|
}
|
|
|
|
|
|
// qt = p * q
|
|
void QuaternionMult( const Quaternion &p, const Quaternion &q, Quaternion &qt )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Assert( p.IsValid() );
|
|
Assert( q.IsValid() );
|
|
|
|
if (&p == &qt)
|
|
{
|
|
Quaternion p2 = p;
|
|
QuaternionMult( p2, q, qt );
|
|
return;
|
|
}
|
|
|
|
// decide if one of the quaternions is backwards
|
|
Quaternion q2;
|
|
QuaternionAlign( p, q, q2 );
|
|
|
|
qt.x = p.x * q2.w + p.y * q2.z - p.z * q2.y + p.w * q2.x;
|
|
qt.y = -p.x * q2.z + p.y * q2.w + p.z * q2.x + p.w * q2.y;
|
|
qt.z = p.x * q2.y - p.y * q2.x + p.z * q2.w + p.w * q2.z;
|
|
qt.w = -p.x * q2.x - p.y * q2.y - p.z * q2.z + p.w * q2.w;
|
|
}
|
|
|
|
|
|
void QuaternionMatrix( const Quaternion &q, const Vector &pos, matrix3x4_t& matrix )
|
|
{
|
|
Assert( pos.IsValid() );
|
|
|
|
QuaternionMatrix( q, matrix );
|
|
|
|
matrix[0][3] = pos.x;
|
|
matrix[1][3] = pos.y;
|
|
matrix[2][3] = pos.z;
|
|
}
|
|
|
|
void QuaternionMatrix( const Quaternion &q, matrix3x4_t& matrix )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Assert( q.IsValid() );
|
|
|
|
#ifdef _VPROF_MATHLIB
|
|
VPROF_BUDGET( "QuaternionMatrix", "Mathlib" );
|
|
#endif
|
|
|
|
// Original code
|
|
// This should produce the same code as below with optimization, but looking at the assmebly,
|
|
// it doesn't. There are 7 extra multiplies in the release build of this, go figure.
|
|
#if 1
|
|
matrix[0][0] = 1.0 - 2.0 * q.y * q.y - 2.0 * q.z * q.z;
|
|
matrix[1][0] = 2.0 * q.x * q.y + 2.0 * q.w * q.z;
|
|
matrix[2][0] = 2.0 * q.x * q.z - 2.0 * q.w * q.y;
|
|
|
|
matrix[0][1] = 2.0f * q.x * q.y - 2.0f * q.w * q.z;
|
|
matrix[1][1] = 1.0f - 2.0f * q.x * q.x - 2.0f * q.z * q.z;
|
|
matrix[2][1] = 2.0f * q.y * q.z + 2.0f * q.w * q.x;
|
|
|
|
matrix[0][2] = 2.0f * q.x * q.z + 2.0f * q.w * q.y;
|
|
matrix[1][2] = 2.0f * q.y * q.z - 2.0f * q.w * q.x;
|
|
matrix[2][2] = 1.0f - 2.0f * q.x * q.x - 2.0f * q.y * q.y;
|
|
#else
|
|
float wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
|
|
|
|
// precalculate common multiplitcations
|
|
x2 = q.x + q.x;
|
|
y2 = q.y + q.y;
|
|
z2 = q.z + q.z;
|
|
xx = q.x * x2;
|
|
xy = q.x * y2;
|
|
xz = q.x * z2;
|
|
yy = q.y * y2;
|
|
yz = q.y * z2;
|
|
zz = q.z * z2;
|
|
wx = q.w * x2;
|
|
wy = q.w * y2;
|
|
wz = q.w * z2;
|
|
|
|
matrix[0][0] = 1.0 - (yy + zz);
|
|
matrix[0][1] = xy - wz;
|
|
matrix[0][2] = xz + wy;
|
|
|
|
matrix[1][0] = xy + wz;
|
|
matrix[1][1] = 1.0 - (xx + zz);
|
|
matrix[1][2] = yz - wx;
|
|
|
|
matrix[2][0] = xz - wy;
|
|
matrix[2][1] = yz + wx;
|
|
matrix[2][2] = 1.0 - (xx + yy);
|
|
#endif
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Purpose: Converts engine-format euler angles to a quaternion
|
|
// Input : angles - Right-handed Euler angles in degrees as follows:
|
|
// [0]: PITCH: Clockwise rotation around the Y axis.
|
|
// [1]: YAW: Counterclockwise rotation around the Z axis.
|
|
// [2]: ROLL: Counterclockwise rotation around the X axis.
|
|
// *outQuat - quaternion of form (i,j,k,real)
|
|
//-----------------------------------------------------------------------------
|
|
void AngleQuaternion( const QAngle &angles, Quaternion &outQuat )
|
|
{
|
|
RadianEuler radians;
|
|
radians.Init( DEG2RAD( angles.z ), DEG2RAD( angles.x ), DEG2RAD( angles.y ) );
|
|
|
|
AngleQuaternion( radians, outQuat );
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Purpose: Converts a quaternion into engine angles
|
|
// Input : *quaternion - q3 + q0.i + q1.j + q2.k
|
|
// *outAngles - PITCH, YAW, ROLL
|
|
//-----------------------------------------------------------------------------
|
|
void QuaternionAngles( const Quaternion &q, QAngle &angles )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Assert( q.IsValid() );
|
|
|
|
#ifdef _VPROF_MATHLIB
|
|
VPROF_BUDGET( "QuaternionAngles", "Mathlib" );
|
|
#endif
|
|
|
|
#if 1
|
|
// FIXME: doing it this way calculates too much data, needs to do an optimized version...
|
|
matrix3x4_t matrix;
|
|
QuaternionMatrix( q, matrix );
|
|
MatrixAngles( matrix, angles );
|
|
#else
|
|
float m11, m12, m13, m23, m33;
|
|
|
|
m11 = ( 2.0f * q.w * q.w ) + ( 2.0f * q.x * q.x ) - 1.0f;
|
|
m12 = ( 2.0f * q.x * q.y ) + ( 2.0f * q.w * q.z );
|
|
m13 = ( 2.0f * q.x * q.z ) - ( 2.0f * q.w * q.y );
|
|
m23 = ( 2.0f * q.y * q.z ) + ( 2.0f * q.w * q.x );
|
|
m33 = ( 2.0f * q.w * q.w ) + ( 2.0f * q.z * q.z ) - 1.0f;
|
|
|
|
// FIXME: this code has a singularity near PITCH +-90
|
|
angles[YAW] = RAD2DEG( atan2(m12, m11) );
|
|
angles[PITCH] = RAD2DEG( asin(-m13) );
|
|
angles[ROLL] = RAD2DEG( atan2(m23, m33) );
|
|
#endif
|
|
|
|
Assert( angles.IsValid() );
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Purpose: Converts a quaternion to an axis / angle in degrees
|
|
// (exponential map)
|
|
//-----------------------------------------------------------------------------
|
|
void QuaternionAxisAngle( const Quaternion &q, Vector &axis, float &angle )
|
|
{
|
|
angle = RAD2DEG(2 * acos(q.w));
|
|
if ( angle > 180 )
|
|
{
|
|
angle -= 360;
|
|
}
|
|
axis.x = q.x;
|
|
axis.y = q.y;
|
|
axis.z = q.z;
|
|
VectorNormalize( axis );
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Purpose: Converts an exponential map (ang/axis) to a quaternion
|
|
//-----------------------------------------------------------------------------
|
|
void AxisAngleQuaternion( const Vector &axis, float angle, Quaternion &q )
|
|
{
|
|
float sa, ca;
|
|
|
|
SinCos( DEG2RAD(angle) * 0.5f, &sa, &ca );
|
|
|
|
q.x = axis.x * sa;
|
|
q.y = axis.y * sa;
|
|
q.z = axis.z * sa;
|
|
q.w = ca;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Purpose: Converts radian-euler axis aligned angles to a quaternion
|
|
// Input : *pfAngles - Right-handed Euler angles in radians
|
|
// *outQuat - quaternion of form (i,j,k,real)
|
|
//-----------------------------------------------------------------------------
|
|
void AngleQuaternion( RadianEuler const &angles, Quaternion &outQuat )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
// Assert( angles.IsValid() );
|
|
|
|
#ifdef _VPROF_MATHLIB
|
|
VPROF_BUDGET( "AngleQuaternion", "Mathlib" );
|
|
#endif
|
|
|
|
float sr, sp, sy, cr, cp, cy;
|
|
|
|
SinCos( angles.z * 0.5f, &sy, &cy );
|
|
SinCos( angles.y * 0.5f, &sp, &cp );
|
|
SinCos( angles.x * 0.5f, &sr, &cr );
|
|
|
|
// NJS: for some reason VC6 wasn't recognizing the common subexpressions:
|
|
float srXcp = sr * cp,
|
|
crXsp = cr * sp;
|
|
outQuat.x = srXcp*cy-crXsp*sy; // X
|
|
outQuat.y = crXsp*cy+srXcp*sy; // Y
|
|
|
|
float crXcp = cr * cp,
|
|
srXsp = sr * sp;
|
|
outQuat.z = crXcp*sy-srXsp*cy; // Z
|
|
outQuat.w = crXcp*cy+srXsp*sy; // W (real component)
|
|
|
|
// Assert( outQuat.IsValid() );
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Purpose: Converts a basis to a quaternion
|
|
//-----------------------------------------------------------------------------
|
|
void BasisToQuaternion( const Vector &vecForward, const Vector &vecRight, const Vector &vecUp, Quaternion &q )
|
|
{
|
|
Assert( fabs( vecForward.LengthSqr() - 1.0f ) < 1e-3 );
|
|
Assert( fabs( vecRight.LengthSqr() - 1.0f ) < 1e-3 );
|
|
Assert( fabs( vecUp.LengthSqr() - 1.0f ) < 1e-3 );
|
|
|
|
Vector vecLeft;
|
|
VectorMultiply( vecRight, -1.0f, vecLeft );
|
|
|
|
// FIXME: Don't know why, but this doesn't match at all with other result
|
|
// so we can't use this super-fast way.
|
|
/*
|
|
// Find the trace of the matrix:
|
|
float flTrace = vecForward.x + vecLeft.y + vecUp.z + 1.0f;
|
|
if ( flTrace > 1e-6 )
|
|
{
|
|
float flSqrtTrace = FastSqrt( flTrace );
|
|
float s = 0.5f / flSqrtTrace;
|
|
q.x = ( vecUp.y - vecLeft.z ) * s;
|
|
q.y = ( vecForward.z - vecUp.x ) * s;
|
|
q.z = ( vecLeft.x - vecForward.y ) * s;
|
|
q.w = 0.5f * flSqrtTrace;
|
|
}
|
|
else
|
|
{
|
|
if (( vecForward.x > vecLeft.y ) && ( vecForward.x > vecUp.z ) )
|
|
{
|
|
float flSqrtTrace = FastSqrt( 1.0f + vecForward.x - vecLeft.y - vecUp.z );
|
|
float s = 0.5f / flSqrtTrace;
|
|
q.x = 0.5f * flSqrtTrace;
|
|
q.y = ( vecForward.y + vecLeft.x ) * s;
|
|
q.z = ( vecUp.x + vecForward.z ) * s;
|
|
q.w = ( vecUp.y - vecLeft.z ) * s;
|
|
}
|
|
else if ( vecLeft.y > vecUp.z )
|
|
{
|
|
float flSqrtTrace = FastSqrt( 1.0f + vecLeft.y - vecForward.x - vecUp.z );
|
|
float s = 0.5f / flSqrtTrace;
|
|
q.x = ( vecForward.y + vecLeft.x ) * s;
|
|
q.y = 0.5f * flSqrtTrace;
|
|
q.z = ( vecUp.y + vecLeft.z ) * s;
|
|
q.w = ( vecForward.z - vecUp.x ) * s;
|
|
}
|
|
else
|
|
{
|
|
float flSqrtTrace = FastSqrt( 1.0 + vecUp.z - vecForward.x - vecLeft.y );
|
|
float s = 0.5f / flSqrtTrace;
|
|
q.x = ( vecUp.x + vecForward.z ) * s;
|
|
q.y = ( vecUp.y + vecLeft.z ) * s;
|
|
q.z = 0.5f * flSqrtTrace;
|
|
q.w = ( vecLeft.x - vecForward.y ) * s;
|
|
}
|
|
}
|
|
QuaternionNormalize( q );
|
|
*/
|
|
|
|
// Version 2: Go through angles
|
|
|
|
matrix3x4_t mat;
|
|
MatrixSetColumn( vecForward, 0, mat );
|
|
MatrixSetColumn( vecLeft, 1, mat );
|
|
MatrixSetColumn( vecUp, 2, mat );
|
|
|
|
QAngle angles;
|
|
MatrixAngles( mat, angles );
|
|
|
|
// Quaternion q2;
|
|
AngleQuaternion( angles, q );
|
|
|
|
// Assert( fabs(q.x - q2.x) < 1e-3 );
|
|
// Assert( fabs(q.y - q2.y) < 1e-3 );
|
|
// Assert( fabs(q.z - q2.z) < 1e-3 );
|
|
// Assert( fabs(q.w - q2.w) < 1e-3 );
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Purpose: Converts a quaternion into engine angles
|
|
// Input : *quaternion - q3 + q0.i + q1.j + q2.k
|
|
// *outAngles - PITCH, YAW, ROLL
|
|
//-----------------------------------------------------------------------------
|
|
void QuaternionAngles( const Quaternion &q, RadianEuler &angles )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Assert( q.IsValid() );
|
|
|
|
// FIXME: doing it this way calculates too much data, needs to do an optimized version...
|
|
matrix3x4_t matrix;
|
|
QuaternionMatrix( q, matrix );
|
|
MatrixAngles( matrix, angles );
|
|
|
|
Assert( angles.IsValid() );
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Purpose: A helper function to normalize p2.x->p1.x and p3.x->p4.x to
|
|
// be the same length as p2.x->p3.x
|
|
// Input : &p2 -
|
|
// &p4 -
|
|
// p4n -
|
|
//-----------------------------------------------------------------------------
|
|
void Spline_Normalize(
|
|
const Vector &p1,
|
|
const Vector &p2,
|
|
const Vector &p3,
|
|
const Vector &p4,
|
|
Vector& p1n,
|
|
Vector& p4n )
|
|
{
|
|
float dt = p3.x - p2.x;
|
|
|
|
p1n = p1;
|
|
p4n = p4;
|
|
|
|
if ( dt != 0.0 )
|
|
{
|
|
if (p1.x != p2.x)
|
|
{
|
|
// Equivalent to p1n = p2 - (p2 - p1) * (dt / (p2.x - p1.x));
|
|
VectorLerp( p2, p1, dt / (p2.x - p1.x), p1n );
|
|
}
|
|
if (p4.x != p3.x)
|
|
{
|
|
// Equivalent to p4n = p3 + (p4 - p3) * (dt / (p4.x - p3.x));
|
|
VectorLerp( p3, p4, dt / (p4.x - p3.x), p4n );
|
|
}
|
|
}
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Purpose:
|
|
// Input :
|
|
//-----------------------------------------------------------------------------
|
|
|
|
void Catmull_Rom_Spline(
|
|
const Vector &p1,
|
|
const Vector &p2,
|
|
const Vector &p3,
|
|
const Vector &p4,
|
|
float t,
|
|
Vector& output )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
float tSqr = t*t*0.5f;
|
|
float tSqrSqr = t*tSqr;
|
|
t *= 0.5f;
|
|
|
|
Assert( &output != &p1 );
|
|
Assert( &output != &p2 );
|
|
Assert( &output != &p3 );
|
|
Assert( &output != &p4 );
|
|
|
|
output.Init();
|
|
|
|
Vector a, b, c, d;
|
|
|
|
// matrix row 1
|
|
VectorScale( p1, -tSqrSqr, a ); // 0.5 t^3 * [ (-1*p1) + ( 3*p2) + (-3*p3) + p4 ]
|
|
VectorScale( p2, tSqrSqr*3, b );
|
|
VectorScale( p3, tSqrSqr*-3, c );
|
|
VectorScale( p4, tSqrSqr, d );
|
|
|
|
VectorAdd( a, output, output );
|
|
VectorAdd( b, output, output );
|
|
VectorAdd( c, output, output );
|
|
VectorAdd( d, output, output );
|
|
|
|
// matrix row 2
|
|
VectorScale( p1, tSqr*2, a ); // 0.5 t^2 * [ ( 2*p1) + (-5*p2) + ( 4*p3) - p4 ]
|
|
VectorScale( p2, tSqr*-5, b );
|
|
VectorScale( p3, tSqr*4, c );
|
|
VectorScale( p4, -tSqr, d );
|
|
|
|
VectorAdd( a, output, output );
|
|
VectorAdd( b, output, output );
|
|
VectorAdd( c, output, output );
|
|
VectorAdd( d, output, output );
|
|
|
|
// matrix row 3
|
|
VectorScale( p1, -t, a ); // 0.5 t * [ (-1*p1) + p3 ]
|
|
VectorScale( p3, t, b );
|
|
|
|
VectorAdd( a, output, output );
|
|
VectorAdd( b, output, output );
|
|
|
|
// matrix row 4
|
|
VectorAdd( p2, output, output ); // p2
|
|
}
|
|
|
|
void Catmull_Rom_Spline_Tangent(
|
|
const Vector &p1,
|
|
const Vector &p2,
|
|
const Vector &p3,
|
|
const Vector &p4,
|
|
float t,
|
|
Vector& output )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
float tOne = 3*t*t*0.5f;
|
|
float tTwo = 2*t*0.5f;
|
|
float tThree = 0.5;
|
|
|
|
Assert( &output != &p1 );
|
|
Assert( &output != &p2 );
|
|
Assert( &output != &p3 );
|
|
Assert( &output != &p4 );
|
|
|
|
output.Init();
|
|
|
|
Vector a, b, c, d;
|
|
|
|
// matrix row 1
|
|
VectorScale( p1, -tOne, a ); // 0.5 t^3 * [ (-1*p1) + ( 3*p2) + (-3*p3) + p4 ]
|
|
VectorScale( p2, tOne*3, b );
|
|
VectorScale( p3, tOne*-3, c );
|
|
VectorScale( p4, tOne, d );
|
|
|
|
VectorAdd( a, output, output );
|
|
VectorAdd( b, output, output );
|
|
VectorAdd( c, output, output );
|
|
VectorAdd( d, output, output );
|
|
|
|
// matrix row 2
|
|
VectorScale( p1, tTwo*2, a ); // 0.5 t^2 * [ ( 2*p1) + (-5*p2) + ( 4*p3) - p4 ]
|
|
VectorScale( p2, tTwo*-5, b );
|
|
VectorScale( p3, tTwo*4, c );
|
|
VectorScale( p4, -tTwo, d );
|
|
|
|
VectorAdd( a, output, output );
|
|
VectorAdd( b, output, output );
|
|
VectorAdd( c, output, output );
|
|
VectorAdd( d, output, output );
|
|
|
|
// matrix row 3
|
|
VectorScale( p1, -tThree, a ); // 0.5 t * [ (-1*p1) + p3 ]
|
|
VectorScale( p3, tThree, b );
|
|
|
|
VectorAdd( a, output, output );
|
|
VectorAdd( b, output, output );
|
|
}
|
|
|
|
void Catmull_Rom_Spline_Normalize(
|
|
const Vector &p1,
|
|
const Vector &p2,
|
|
const Vector &p3,
|
|
const Vector &p4,
|
|
float t,
|
|
Vector& output )
|
|
{
|
|
// Normalize p2->p1 and p3->p4 to be the same length as p2->p3
|
|
float dt = p3.DistTo(p2);
|
|
|
|
Vector p1n, p4n;
|
|
VectorSubtract( p1, p2, p1n );
|
|
VectorSubtract( p4, p3, p4n );
|
|
|
|
VectorNormalize( p1n );
|
|
VectorNormalize( p4n );
|
|
|
|
VectorMA( p2, dt, p1n, p1n );
|
|
VectorMA( p3, dt, p4n, p4n );
|
|
|
|
Catmull_Rom_Spline( p1n, p2, p3, p4n, t, output );
|
|
}
|
|
|
|
|
|
void Catmull_Rom_Spline_NormalizeX(
|
|
const Vector &p1,
|
|
const Vector &p2,
|
|
const Vector &p3,
|
|
const Vector &p4,
|
|
float t,
|
|
Vector& output )
|
|
{
|
|
Vector p1n, p4n;
|
|
Spline_Normalize( p1, p2, p3, p4, p1n, p4n );
|
|
Catmull_Rom_Spline( p1n, p2, p3, p4n, t, output );
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Purpose: basic hermite spline. t = 0 returns p1, t = 1 returns p2,
|
|
// d1 and d2 are used to entry and exit slope of curve
|
|
// Input :
|
|
//-----------------------------------------------------------------------------
|
|
|
|
void Hermite_Spline(
|
|
const Vector &p1,
|
|
const Vector &p2,
|
|
const Vector &d1,
|
|
const Vector &d2,
|
|
float t,
|
|
Vector& output )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
float tSqr = t*t;
|
|
float tCube = t*tSqr;
|
|
|
|
Assert( &output != &p1 );
|
|
Assert( &output != &p2 );
|
|
Assert( &output != &d1 );
|
|
Assert( &output != &d2 );
|
|
|
|
float b1 = 2.0f*tCube-3.0f*tSqr+1.0f;
|
|
float b2 = 1.0f - b1; // -2*tCube+3*tSqr;
|
|
float b3 = tCube-2*tSqr+t;
|
|
float b4 = tCube-tSqr;
|
|
|
|
VectorScale( p1, b1, output );
|
|
VectorMA( output, b2, p2, output );
|
|
VectorMA( output, b3, d1, output );
|
|
VectorMA( output, b4, d2, output );
|
|
}
|
|
|
|
float Hermite_Spline(
|
|
float p1,
|
|
float p2,
|
|
float d1,
|
|
float d2,
|
|
float t )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
float output;
|
|
float tSqr = t*t;
|
|
float tCube = t*tSqr;
|
|
|
|
float b1 = 2.0f*tCube-3.0f*tSqr+1.0f;
|
|
float b2 = 1.0f - b1; // -2*tCube+3*tSqr;
|
|
float b3 = tCube-2*tSqr+t;
|
|
float b4 = tCube-tSqr;
|
|
|
|
output = p1 * b1;
|
|
output += p2 * b2;
|
|
output += d1 * b3;
|
|
output += d2 * b4;
|
|
|
|
return output;
|
|
}
|
|
|
|
|
|
void Hermite_SplineBasis( float t, float basis[4] )
|
|
{
|
|
float tSqr = t*t;
|
|
float tCube = t*tSqr;
|
|
|
|
basis[0] = 2.0f*tCube-3.0f*tSqr+1.0f;
|
|
basis[1] = 1.0f - basis[0]; // -2*tCube+3*tSqr;
|
|
basis[2] = tCube-2*tSqr+t;
|
|
basis[3] = tCube-tSqr;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Purpose: simple three data point hermite spline.
|
|
// t = 0 returns p1, t = 1 returns p2,
|
|
// slopes are generated from the p0->p1 and p1->p2 segments
|
|
// this is reasonable C1 method when there's no "p3" data yet.
|
|
// Input :
|
|
//-----------------------------------------------------------------------------
|
|
|
|
// BUG: the VectorSubtract()'s calls go away if the global optimizer is enabled
|
|
#ifdef _MSC_VER
|
|
#pragma optimize( "g", off )
|
|
#endif
|
|
|
|
void Hermite_Spline( const Vector &p0, const Vector &p1, const Vector &p2, float t, Vector& output )
|
|
{
|
|
Vector e10, e21;
|
|
VectorSubtract( p1, p0, e10 );
|
|
VectorSubtract( p2, p1, e21 );
|
|
Hermite_Spline( p1, p2, e10, e21, t, output );
|
|
}
|
|
|
|
#ifdef _MSC_VER
|
|
#pragma optimize( "", on )
|
|
#endif
|
|
|
|
float Hermite_Spline( float p0, float p1, float p2, float t )
|
|
{
|
|
return Hermite_Spline( p1, p2, p1 - p0, p2 - p1, t );
|
|
}
|
|
|
|
|
|
void Hermite_Spline( const Quaternion &q0, const Quaternion &q1, const Quaternion &q2, float t, Quaternion &output )
|
|
{
|
|
// cheap, hacked version of quaternions
|
|
Quaternion q0a;
|
|
Quaternion q1a;
|
|
|
|
QuaternionAlign( q2, q0, q0a );
|
|
QuaternionAlign( q2, q1, q1a );
|
|
|
|
output.x = Hermite_Spline( q0a.x, q1a.x, q2.x, t );
|
|
output.y = Hermite_Spline( q0a.y, q1a.y, q2.y, t );
|
|
output.z = Hermite_Spline( q0a.z, q1a.z, q2.z, t );
|
|
output.w = Hermite_Spline( q0a.w, q1a.w, q2.w, t );
|
|
|
|
QuaternionNormalize( output );
|
|
}
|
|
|
|
// See http://en.wikipedia.org/wiki/Kochanek-Bartels_curves
|
|
//
|
|
// Tension: -1 = Round -> 1 = Tight
|
|
// Bias: -1 = Pre-shoot (bias left) -> 1 = Post-shoot (bias right)
|
|
// Continuity: -1 = Box corners -> 1 = Inverted corners
|
|
//
|
|
// If T=B=C=0 it's the same matrix as Catmull-Rom.
|
|
// If T=1 & B=C=0 it's the same as Cubic.
|
|
// If T=B=0 & C=-1 it's just linear interpolation
|
|
//
|
|
// See http://news.povray.org/povray.binaries.tutorials/attachment/%3CXns91B880592482seed7@povray.org%3E/Splines.bas.txt
|
|
// for example code and descriptions of various spline types...
|
|
//
|
|
void Kochanek_Bartels_Spline(
|
|
float tension,
|
|
float bias,
|
|
float continuity,
|
|
const Vector &p1,
|
|
const Vector &p2,
|
|
const Vector &p3,
|
|
const Vector &p4,
|
|
float t,
|
|
Vector& output )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
|
|
float ffa, ffb, ffc, ffd;
|
|
|
|
ffa = ( 1.0f - tension ) * ( 1.0f + continuity ) * ( 1.0f + bias );
|
|
ffb = ( 1.0f - tension ) * ( 1.0f - continuity ) * ( 1.0f - bias );
|
|
ffc = ( 1.0f - tension ) * ( 1.0f - continuity ) * ( 1.0f + bias );
|
|
ffd = ( 1.0f - tension ) * ( 1.0f + continuity ) * ( 1.0f - bias );
|
|
|
|
float tSqr = t*t*0.5f;
|
|
float tSqrSqr = t*tSqr;
|
|
t *= 0.5f;
|
|
|
|
Assert( &output != &p1 );
|
|
Assert( &output != &p2 );
|
|
Assert( &output != &p3 );
|
|
Assert( &output != &p4 );
|
|
|
|
output.Init();
|
|
|
|
Vector a, b, c, d;
|
|
|
|
// matrix row 1
|
|
VectorScale( p1, tSqrSqr * -ffa, a );
|
|
VectorScale( p2, tSqrSqr * ( 4.0f + ffa - ffb - ffc ), b );
|
|
VectorScale( p3, tSqrSqr * ( -4.0f + ffb + ffc - ffd ), c );
|
|
VectorScale( p4, tSqrSqr * ffd, d );
|
|
|
|
VectorAdd( a, output, output );
|
|
VectorAdd( b, output, output );
|
|
VectorAdd( c, output, output );
|
|
VectorAdd( d, output, output );
|
|
|
|
// matrix row 2
|
|
VectorScale( p1, tSqr* 2 * ffa, a );
|
|
VectorScale( p2, tSqr * ( -6 - 2 * ffa + 2 * ffb + ffc ), b );
|
|
VectorScale( p3, tSqr * ( 6 - 2 * ffb - ffc + ffd ), c );
|
|
VectorScale( p4, tSqr * -ffd, d );
|
|
|
|
VectorAdd( a, output, output );
|
|
VectorAdd( b, output, output );
|
|
VectorAdd( c, output, output );
|
|
VectorAdd( d, output, output );
|
|
|
|
// matrix row 3
|
|
VectorScale( p1, t * -ffa, a );
|
|
VectorScale( p2, t * ( ffa - ffb ), b );
|
|
VectorScale( p3, t * ffb, c );
|
|
// p4 unchanged
|
|
|
|
VectorAdd( a, output, output );
|
|
VectorAdd( b, output, output );
|
|
VectorAdd( c, output, output );
|
|
|
|
// matrix row 4
|
|
// p1, p3, p4 unchanged
|
|
// p2 is multiplied by 1 and added, so just added it directly
|
|
|
|
VectorAdd( p2, output, output );
|
|
}
|
|
|
|
void Kochanek_Bartels_Spline_NormalizeX(
|
|
float tension,
|
|
float bias,
|
|
float continuity,
|
|
const Vector &p1,
|
|
const Vector &p2,
|
|
const Vector &p3,
|
|
const Vector &p4,
|
|
float t,
|
|
Vector& output )
|
|
{
|
|
Vector p1n, p4n;
|
|
Spline_Normalize( p1, p2, p3, p4, p1n, p4n );
|
|
Kochanek_Bartels_Spline( tension, bias, continuity, p1n, p2, p3, p4n, t, output );
|
|
}
|
|
|
|
void Cubic_Spline(
|
|
const Vector &p1,
|
|
const Vector &p2,
|
|
const Vector &p3,
|
|
const Vector &p4,
|
|
float t,
|
|
Vector& output )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
|
|
float tSqr = t*t;
|
|
float tSqrSqr = t*tSqr;
|
|
|
|
Assert( &output != &p1 );
|
|
Assert( &output != &p2 );
|
|
Assert( &output != &p3 );
|
|
Assert( &output != &p4 );
|
|
|
|
output.Init();
|
|
|
|
Vector a, b, c, d;
|
|
|
|
// matrix row 1
|
|
VectorScale( p2, tSqrSqr * 2, b );
|
|
VectorScale( p3, tSqrSqr * -2, c );
|
|
|
|
VectorAdd( b, output, output );
|
|
VectorAdd( c, output, output );
|
|
|
|
// matrix row 2
|
|
VectorScale( p2, tSqr * -3, b );
|
|
VectorScale( p3, tSqr * 3, c );
|
|
|
|
VectorAdd( b, output, output );
|
|
VectorAdd( c, output, output );
|
|
|
|
// matrix row 3
|
|
// no influence
|
|
// p4 unchanged
|
|
|
|
// matrix row 4
|
|
// p1, p3, p4 unchanged
|
|
VectorAdd( p2, output, output );
|
|
}
|
|
|
|
void Cubic_Spline_NormalizeX(
|
|
const Vector &p1,
|
|
const Vector &p2,
|
|
const Vector &p3,
|
|
const Vector &p4,
|
|
float t,
|
|
Vector& output )
|
|
{
|
|
Vector p1n, p4n;
|
|
Spline_Normalize( p1, p2, p3, p4, p1n, p4n );
|
|
Cubic_Spline( p1n, p2, p3, p4n, t, output );
|
|
}
|
|
|
|
void BSpline(
|
|
const Vector &p1,
|
|
const Vector &p2,
|
|
const Vector &p3,
|
|
const Vector &p4,
|
|
float t,
|
|
Vector& output )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
|
|
float oneOver6 = 1.0f / 6.0f;
|
|
|
|
float tSqr = t * t * oneOver6;
|
|
float tSqrSqr = t*tSqr;
|
|
t *= oneOver6;
|
|
|
|
Assert( &output != &p1 );
|
|
Assert( &output != &p2 );
|
|
Assert( &output != &p3 );
|
|
Assert( &output != &p4 );
|
|
|
|
output.Init();
|
|
|
|
Vector a, b, c, d;
|
|
|
|
// matrix row 1
|
|
VectorScale( p1, -tSqrSqr, a );
|
|
VectorScale( p2, tSqrSqr * 3.0f, b );
|
|
VectorScale( p3, tSqrSqr * -3.0f, c );
|
|
VectorScale( p4, tSqrSqr, d );
|
|
|
|
VectorAdd( a, output, output );
|
|
VectorAdd( b, output, output );
|
|
VectorAdd( c, output, output );
|
|
VectorAdd( d, output, output );
|
|
|
|
// matrix row 2
|
|
VectorScale( p1, tSqr * 3.0f, a );
|
|
VectorScale( p2, tSqr * -6.0f, b );
|
|
VectorScale( p3, tSqr * 3.0f, c );
|
|
|
|
VectorAdd( a, output, output );
|
|
VectorAdd( b, output, output );
|
|
VectorAdd( c, output, output );
|
|
|
|
// matrix row 3
|
|
VectorScale( p1, t * -3.0f, a );
|
|
VectorScale( p3, t * 3.0f, c );
|
|
// p4 unchanged
|
|
|
|
VectorAdd( a, output, output );
|
|
VectorAdd( c, output, output );
|
|
|
|
// matrix row 4
|
|
// p1 and p3 scaled by 1.0f, so done below
|
|
VectorScale( p1, oneOver6, a );
|
|
VectorScale( p2, 4.0f * oneOver6, b );
|
|
VectorScale( p3, oneOver6, c );
|
|
|
|
VectorAdd( a, output, output );
|
|
VectorAdd( b, output, output );
|
|
VectorAdd( c, output, output );
|
|
}
|
|
|
|
void BSpline_NormalizeX(
|
|
const Vector &p1,
|
|
const Vector &p2,
|
|
const Vector &p3,
|
|
const Vector &p4,
|
|
float t,
|
|
Vector& output )
|
|
{
|
|
Vector p1n, p4n;
|
|
Spline_Normalize( p1, p2, p3, p4, p1n, p4n );
|
|
BSpline( p1n, p2, p3, p4n, t, output );
|
|
}
|
|
|
|
void Parabolic_Spline(
|
|
const Vector &p1,
|
|
const Vector &p2,
|
|
const Vector &p3,
|
|
const Vector &p4,
|
|
float t,
|
|
Vector& output )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
|
|
float tSqr = t*t*0.5f;
|
|
t *= 0.5f;
|
|
|
|
Assert( &output != &p1 );
|
|
Assert( &output != &p2 );
|
|
Assert( &output != &p3 );
|
|
Assert( &output != &p4 );
|
|
|
|
output.Init();
|
|
|
|
Vector a, b, c, d;
|
|
|
|
// matrix row 1
|
|
// no influence from t cubed
|
|
|
|
// matrix row 2
|
|
VectorScale( p1, tSqr, a );
|
|
VectorScale( p2, tSqr * -2.0f, b );
|
|
VectorScale( p3, tSqr, c );
|
|
|
|
VectorAdd( a, output, output );
|
|
VectorAdd( b, output, output );
|
|
VectorAdd( c, output, output );
|
|
|
|
// matrix row 3
|
|
VectorScale( p1, t * -2.0f, a );
|
|
VectorScale( p2, t * 2.0f, b );
|
|
// p4 unchanged
|
|
|
|
VectorAdd( a, output, output );
|
|
VectorAdd( b, output, output );
|
|
|
|
// matrix row 4
|
|
VectorScale( p1, 0.5f, a );
|
|
VectorScale( p2, 0.5f, b );
|
|
|
|
VectorAdd( a, output, output );
|
|
VectorAdd( b, output, output );
|
|
}
|
|
|
|
void Parabolic_Spline_NormalizeX(
|
|
const Vector &p1,
|
|
const Vector &p2,
|
|
const Vector &p3,
|
|
const Vector &p4,
|
|
float t,
|
|
Vector& output )
|
|
{
|
|
Vector p1n, p4n;
|
|
Spline_Normalize( p1, p2, p3, p4, p1n, p4n );
|
|
Parabolic_Spline( p1n, p2, p3, p4n, t, output );
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Purpose: Compress the input values for a ranged result such that from 75% to 200% smoothly of the range maps
|
|
//-----------------------------------------------------------------------------
|
|
|
|
float RangeCompressor( float flValue, float flMin, float flMax, float flBase )
|
|
{
|
|
flValue += flBase;
|
|
|
|
// convert to 0 to 1 value
|
|
float flMid = (flValue - flMin) / (flMax - flMin);
|
|
// convert to -1 to 1 value
|
|
float flTarget = flMid * 2 - 1;
|
|
|
|
if (fabs(flTarget) > 0.75)
|
|
{
|
|
float t = (fabs(flTarget) - 0.75) / (1.25);
|
|
if (t < 1.0)
|
|
{
|
|
if (flTarget > 0)
|
|
{
|
|
flTarget = Hermite_Spline( 0.75, 1, 0.75, 0, t );
|
|
}
|
|
else
|
|
{
|
|
flTarget = -Hermite_Spline( 0.75, 1, 0.75, 0, t );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
flTarget = (flTarget > 0) ? 1.0f : -1.0f;
|
|
}
|
|
}
|
|
|
|
flMid = (flTarget + 1 ) / 2.0;
|
|
flValue = flMin * (1 - flMid) + flMax * flMid;
|
|
|
|
flValue -= flBase;
|
|
|
|
return flValue;
|
|
}
|
|
|
|
|
|
//#pragma optimize( "", on )
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Transforms a AABB into another space; which will inherently grow the box.
|
|
//-----------------------------------------------------------------------------
|
|
void TransformAABB( const matrix3x4_t& transform, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut )
|
|
{
|
|
Vector localCenter;
|
|
VectorAdd( vecMinsIn, vecMaxsIn, localCenter );
|
|
localCenter *= 0.5f;
|
|
|
|
Vector localExtents;
|
|
VectorSubtract( vecMaxsIn, localCenter, localExtents );
|
|
|
|
Vector worldCenter;
|
|
VectorTransform( localCenter, transform, worldCenter );
|
|
|
|
Vector worldExtents;
|
|
worldExtents.x = DotProductAbs( localExtents, transform[0] );
|
|
worldExtents.y = DotProductAbs( localExtents, transform[1] );
|
|
worldExtents.z = DotProductAbs( localExtents, transform[2] );
|
|
|
|
VectorSubtract( worldCenter, worldExtents, vecMinsOut );
|
|
VectorAdd( worldCenter, worldExtents, vecMaxsOut );
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Uses the inverse transform of in1
|
|
//-----------------------------------------------------------------------------
|
|
void ITransformAABB( const matrix3x4_t& transform, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut )
|
|
{
|
|
Vector worldCenter;
|
|
VectorAdd( vecMinsIn, vecMaxsIn, worldCenter );
|
|
worldCenter *= 0.5f;
|
|
|
|
Vector worldExtents;
|
|
VectorSubtract( vecMaxsIn, worldCenter, worldExtents );
|
|
|
|
Vector localCenter;
|
|
VectorITransform( worldCenter, transform, localCenter );
|
|
|
|
Vector localExtents;
|
|
localExtents.x = FloatMakePositive( worldExtents.x * transform[0][0] ) +
|
|
FloatMakePositive( worldExtents.y * transform[1][0] ) +
|
|
FloatMakePositive( worldExtents.z * transform[2][0] );
|
|
localExtents.y = FloatMakePositive( worldExtents.x * transform[0][1] ) +
|
|
FloatMakePositive( worldExtents.y * transform[1][1] ) +
|
|
FloatMakePositive( worldExtents.z * transform[2][1] );
|
|
localExtents.z = FloatMakePositive( worldExtents.x * transform[0][2] ) +
|
|
FloatMakePositive( worldExtents.y * transform[1][2] ) +
|
|
FloatMakePositive( worldExtents.z * transform[2][2] );
|
|
|
|
VectorSubtract( localCenter, localExtents, vecMinsOut );
|
|
VectorAdd( localCenter, localExtents, vecMaxsOut );
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Rotates a AABB into another space; which will inherently grow the box.
|
|
// (same as TransformAABB, but doesn't take the translation into account)
|
|
//-----------------------------------------------------------------------------
|
|
void RotateAABB( const matrix3x4_t &transform, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut )
|
|
{
|
|
Vector localCenter;
|
|
VectorAdd( vecMinsIn, vecMaxsIn, localCenter );
|
|
localCenter *= 0.5f;
|
|
|
|
Vector localExtents;
|
|
VectorSubtract( vecMaxsIn, localCenter, localExtents );
|
|
|
|
Vector newCenter;
|
|
VectorRotate( localCenter, transform, newCenter );
|
|
|
|
Vector newExtents;
|
|
newExtents.x = DotProductAbs( localExtents, transform[0] );
|
|
newExtents.y = DotProductAbs( localExtents, transform[1] );
|
|
newExtents.z = DotProductAbs( localExtents, transform[2] );
|
|
|
|
VectorSubtract( newCenter, newExtents, vecMinsOut );
|
|
VectorAdd( newCenter, newExtents, vecMaxsOut );
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Uses the inverse transform of in1
|
|
//-----------------------------------------------------------------------------
|
|
void IRotateAABB( const matrix3x4_t &transform, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut )
|
|
{
|
|
Vector oldCenter;
|
|
VectorAdd( vecMinsIn, vecMaxsIn, oldCenter );
|
|
oldCenter *= 0.5f;
|
|
|
|
Vector oldExtents;
|
|
VectorSubtract( vecMaxsIn, oldCenter, oldExtents );
|
|
|
|
Vector newCenter;
|
|
VectorIRotate( oldCenter, transform, newCenter );
|
|
|
|
Vector newExtents;
|
|
newExtents.x = FloatMakePositive( oldExtents.x * transform[0][0] ) +
|
|
FloatMakePositive( oldExtents.y * transform[1][0] ) +
|
|
FloatMakePositive( oldExtents.z * transform[2][0] );
|
|
newExtents.y = FloatMakePositive( oldExtents.x * transform[0][1] ) +
|
|
FloatMakePositive( oldExtents.y * transform[1][1] ) +
|
|
FloatMakePositive( oldExtents.z * transform[2][1] );
|
|
newExtents.z = FloatMakePositive( oldExtents.x * transform[0][2] ) +
|
|
FloatMakePositive( oldExtents.y * transform[1][2] ) +
|
|
FloatMakePositive( oldExtents.z * transform[2][2] );
|
|
|
|
VectorSubtract( newCenter, newExtents, vecMinsOut );
|
|
VectorAdd( newCenter, newExtents, vecMaxsOut );
|
|
}
|
|
|
|
|
|
float CalcSqrDistanceToAABB( const Vector &mins, const Vector &maxs, const Vector &point )
|
|
{
|
|
float flDelta;
|
|
float flDistSqr = 0.0f;
|
|
|
|
if ( point.x < mins.x )
|
|
{
|
|
flDelta = (mins.x - point.x);
|
|
flDistSqr += flDelta * flDelta;
|
|
}
|
|
else if ( point.x > maxs.x )
|
|
{
|
|
flDelta = (point.x - maxs.x);
|
|
flDistSqr += flDelta * flDelta;
|
|
}
|
|
|
|
if ( point.y < mins.y )
|
|
{
|
|
flDelta = (mins.y - point.y);
|
|
flDistSqr += flDelta * flDelta;
|
|
}
|
|
else if ( point.y > maxs.y )
|
|
{
|
|
flDelta = (point.y - maxs.y);
|
|
flDistSqr += flDelta * flDelta;
|
|
}
|
|
|
|
if ( point.z < mins.z )
|
|
{
|
|
flDelta = (mins.z - point.z);
|
|
flDistSqr += flDelta * flDelta;
|
|
}
|
|
else if ( point.z > maxs.z )
|
|
{
|
|
flDelta = (point.z - maxs.z);
|
|
flDistSqr += flDelta * flDelta;
|
|
}
|
|
|
|
return flDistSqr;
|
|
}
|
|
|
|
|
|
void CalcClosestPointOnAABB( const Vector &mins, const Vector &maxs, const Vector &point, Vector &closestOut )
|
|
{
|
|
for ( int i = 0; i < 3; i++ )
|
|
{
|
|
if ( point[i] < mins[i] )
|
|
{
|
|
closestOut[i] = mins[i];
|
|
}
|
|
else if ( point[i] > maxs[i] )
|
|
{
|
|
closestOut[i] = maxs[i];
|
|
}
|
|
else
|
|
{
|
|
closestOut[i] = point[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
float CalcClosestPointToLineT( const Vector &P, const Vector &vLineA, const Vector &vLineB, Vector &vDir )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
VectorSubtract( vLineB, vLineA, vDir );
|
|
|
|
// D dot [P - (A + D*t)] = 0
|
|
// t = ( DP - DA) / DD
|
|
float div = vDir.Dot( vDir );
|
|
if( div < 0.00001f )
|
|
{
|
|
return 0;
|
|
}
|
|
else
|
|
{
|
|
return (vDir.Dot( P ) - vDir.Dot( vLineA )) / div;
|
|
}
|
|
}
|
|
|
|
void CalcClosestPointOnLine( const Vector &P, const Vector &vLineA, const Vector &vLineB, Vector &vClosest, float *outT )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Vector vDir;
|
|
float t = CalcClosestPointToLineT( P, vLineA, vLineB, vDir );
|
|
if ( outT ) *outT = t;
|
|
vClosest.MulAdd( vLineA, vDir, t );
|
|
}
|
|
|
|
|
|
float CalcDistanceToLine( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *outT )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Vector vClosest;
|
|
CalcClosestPointOnLine( P, vLineA, vLineB, vClosest, outT );
|
|
return P.DistTo(vClosest);
|
|
}
|
|
|
|
float CalcDistanceSqrToLine( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *outT )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Vector vClosest;
|
|
CalcClosestPointOnLine( P, vLineA, vLineB, vClosest, outT );
|
|
return P.DistToSqr(vClosest);
|
|
}
|
|
|
|
void CalcClosestPointOnLineSegment( const Vector &P, const Vector &vLineA, const Vector &vLineB, Vector &vClosest, float *outT )
|
|
{
|
|
Vector vDir;
|
|
float t = CalcClosestPointToLineT( P, vLineA, vLineB, vDir );
|
|
t = clamp( t, 0, 1 );
|
|
if ( outT )
|
|
{
|
|
*outT = t;
|
|
}
|
|
vClosest.MulAdd( vLineA, vDir, t );
|
|
}
|
|
|
|
|
|
float CalcDistanceToLineSegment( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *outT )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Vector vClosest;
|
|
CalcClosestPointOnLineSegment( P, vLineA, vLineB, vClosest, outT );
|
|
return P.DistTo( vClosest );
|
|
}
|
|
|
|
float CalcDistanceSqrToLineSegment( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *outT )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Vector vClosest;
|
|
CalcClosestPointOnLineSegment( P, vLineA, vLineB, vClosest, outT );
|
|
return P.DistToSqr(vClosest);
|
|
}
|
|
|
|
float CalcClosestPointToLineT2D( const Vector2D &P, const Vector2D &vLineA, const Vector2D &vLineB, Vector2D &vDir )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Vector2DSubtract( vLineB, vLineA, vDir );
|
|
|
|
// D dot [P - (A + D*t)] = 0
|
|
// t = (DP - DA) / DD
|
|
float div = vDir.Dot( vDir );
|
|
if( div < 0.00001f )
|
|
{
|
|
return 0;
|
|
}
|
|
else
|
|
{
|
|
return (vDir.Dot( P ) - vDir.Dot( vLineA )) / div;
|
|
}
|
|
}
|
|
|
|
void CalcClosestPointOnLine2D( const Vector2D &P, const Vector2D &vLineA, const Vector2D &vLineB, Vector2D &vClosest, float *outT )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Vector2D vDir;
|
|
float t = CalcClosestPointToLineT2D( P, vLineA, vLineB, vDir );
|
|
if ( outT ) *outT = t;
|
|
vClosest.MulAdd( vLineA, vDir, t );
|
|
}
|
|
|
|
float CalcDistanceToLine2D( const Vector2D &P, const Vector2D &vLineA, const Vector2D &vLineB, float *outT )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Vector2D vClosest;
|
|
CalcClosestPointOnLine2D( P, vLineA, vLineB, vClosest, outT );
|
|
return P.DistTo( vClosest );
|
|
}
|
|
|
|
float CalcDistanceSqrToLine2D( const Vector2D &P, const Vector2D &vLineA, const Vector2D &vLineB, float *outT )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Vector2D vClosest;
|
|
CalcClosestPointOnLine2D( P, vLineA, vLineB, vClosest, outT );
|
|
return P.DistToSqr(vClosest);
|
|
}
|
|
|
|
void CalcClosestPointOnLineSegment2D( const Vector2D &P, const Vector2D &vLineA, const Vector2D &vLineB, Vector2D &vClosest, float *outT )
|
|
{
|
|
Vector2D vDir;
|
|
float t = CalcClosestPointToLineT2D( P, vLineA, vLineB, vDir );
|
|
t = clamp( t, 0, 1 );
|
|
if ( outT )
|
|
{
|
|
*outT = t;
|
|
}
|
|
vClosest.MulAdd( vLineA, vDir, t );
|
|
}
|
|
|
|
float CalcDistanceToLineSegment2D( const Vector2D &P, const Vector2D &vLineA, const Vector2D &vLineB, float *outT )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Vector2D vClosest;
|
|
CalcClosestPointOnLineSegment2D( P, vLineA, vLineB, vClosest, outT );
|
|
return P.DistTo( vClosest );
|
|
}
|
|
|
|
float CalcDistanceSqrToLineSegment2D( const Vector2D &P, const Vector2D &vLineA, const Vector2D &vLineB, float *outT )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
Vector2D vClosest;
|
|
CalcClosestPointOnLineSegment2D( P, vLineA, vLineB, vClosest, outT );
|
|
return P.DistToSqr( vClosest );
|
|
}
|
|
|
|
// Do we have another epsilon we could use
|
|
#define LINE_EPS ( 0.000001f )
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Purpose: Given lines p1->p2 and p3->p4, computes a line segment (pa->pb) and returns the parameters 0->1 multipliers
|
|
// along each segment for the returned points
|
|
// Input : p1 -
|
|
// p2 -
|
|
// p3 -
|
|
// p4 -
|
|
// *s1 -
|
|
// *s2 -
|
|
// Output : Returns true on success, false on failure.
|
|
//-----------------------------------------------------------------------------
|
|
bool CalcLineToLineIntersectionSegment(
|
|
const Vector& p1,const Vector& p2,const Vector& p3,const Vector& p4,Vector *s1,Vector *s2,
|
|
float *t1, float *t2)
|
|
{
|
|
Vector p13,p43,p21;
|
|
float d1343,d4321,d1321,d4343,d2121;
|
|
float numer,denom;
|
|
|
|
p13.x = p1.x - p3.x;
|
|
p13.y = p1.y - p3.y;
|
|
p13.z = p1.z - p3.z;
|
|
p43.x = p4.x - p3.x;
|
|
p43.y = p4.y - p3.y;
|
|
p43.z = p4.z - p3.z;
|
|
|
|
if (fabs(p43.x) < LINE_EPS && fabs(p43.y) < LINE_EPS && fabs(p43.z) < LINE_EPS)
|
|
return false;
|
|
p21.x = p2.x - p1.x;
|
|
p21.y = p2.y - p1.y;
|
|
p21.z = p2.z - p1.z;
|
|
if (fabs(p21.x) < LINE_EPS && fabs(p21.y) < LINE_EPS && fabs(p21.z) < LINE_EPS)
|
|
return false;
|
|
|
|
d1343 = p13.x * p43.x + p13.y * p43.y + p13.z * p43.z;
|
|
d4321 = p43.x * p21.x + p43.y * p21.y + p43.z * p21.z;
|
|
d1321 = p13.x * p21.x + p13.y * p21.y + p13.z * p21.z;
|
|
d4343 = p43.x * p43.x + p43.y * p43.y + p43.z * p43.z;
|
|
d2121 = p21.x * p21.x + p21.y * p21.y + p21.z * p21.z;
|
|
|
|
denom = d2121 * d4343 - d4321 * d4321;
|
|
if (fabs(denom) < LINE_EPS)
|
|
return false;
|
|
numer = d1343 * d4321 - d1321 * d4343;
|
|
|
|
*t1 = numer / denom;
|
|
*t2 = (d1343 + d4321 * (*t1)) / d4343;
|
|
|
|
s1->x = p1.x + *t1 * p21.x;
|
|
s1->y = p1.y + *t1 * p21.y;
|
|
s1->z = p1.z + *t1 * p21.z;
|
|
s2->x = p3.x + *t2 * p43.x;
|
|
s2->y = p3.y + *t2 * p43.y;
|
|
s2->z = p3.z + *t2 * p43.z;
|
|
|
|
return true;
|
|
}
|
|
|
|
#ifdef _MSC_VER
|
|
#pragma optimize( "", off )
|
|
#endif
|
|
|
|
// stuff from windows.h
|
|
#ifndef _XBOX
|
|
typedef unsigned int DWORD;
|
|
#endif
|
|
|
|
#ifndef EXCEPTION_EXECUTE_HANDLER
|
|
#define EXCEPTION_EXECUTE_HANDLER 1
|
|
#endif
|
|
|
|
#ifdef _MSC_VER
|
|
#pragma optimize( "", on )
|
|
#endif
|
|
|
|
static bool s_b3DNowEnabled = false;
|
|
static bool s_bMMXEnabled = false;
|
|
static bool s_bSSEEnabled = false;
|
|
static bool s_bSSE2Enabled = false;
|
|
|
|
void MathLib_Init( float gamma, float texGamma, float brightness, int overbright, bool bAllow3DNow, bool bAllowSSE, bool bAllowSSE2, bool bAllowMMX )
|
|
{
|
|
#ifdef _XBOX
|
|
if (s_bMathlibInitialized)
|
|
return;
|
|
#endif
|
|
// FIXME: Hook SSE into VectorAligned + Vector4DAligned
|
|
|
|
// Grab the processor information:
|
|
const CPUInformation& pi = GetCPUInformation();
|
|
|
|
#ifndef _XBOX
|
|
// Select the default generic routines.
|
|
pfSqrt = _sqrtf;
|
|
pfRSqrt = _rsqrtf;
|
|
pfRSqrtFast = _rsqrtf;
|
|
pfVectorNormalize = _VectorNormalize;
|
|
pfVectorNormalizeFast = _VectorNormalizeFast;
|
|
pfInvRSquared = _InvRSquared;
|
|
pfFastSinCos = SinCos;
|
|
pfFastCos = cosf;
|
|
#endif
|
|
|
|
if ( bAllowMMX && pi.m_bMMX )
|
|
{
|
|
// Select the MMX specific routines if available
|
|
// (MMX routines were used by SW span fillers - not currently used for HW)
|
|
s_bMMXEnabled = true;
|
|
}
|
|
else
|
|
{
|
|
s_bMMXEnabled = false;
|
|
}
|
|
|
|
#ifndef _XBOX
|
|
// SSE Generally performs better than 3DNow when present, so this is placed
|
|
// first to allow SSE to override these settings.
|
|
if ( bAllow3DNow && pi.m_b3DNow )
|
|
{
|
|
s_b3DNowEnabled = true;
|
|
|
|
// Select the 3DNow specific routines if available;
|
|
pfVectorNormalize = _3DNow_VectorNormalize;
|
|
pfVectorNormalizeFast = _3DNow_VectorNormalizeFast;
|
|
pfInvRSquared = _3DNow_InvRSquared;
|
|
pfSqrt = _3DNow_Sqrt;
|
|
pfRSqrt = _3DNow_RSqrt;
|
|
pfRSqrtFast = _3DNow_RSqrt;
|
|
}
|
|
else
|
|
{
|
|
s_b3DNowEnabled = false;
|
|
}
|
|
#endif
|
|
|
|
if ( bAllowSSE && pi.m_bSSE )
|
|
{
|
|
s_bSSEEnabled = true;
|
|
#ifdef _XBOX
|
|
_MM_SET_FLUSH_ZERO_MODE( _MM_FLUSH_ZERO_ON );
|
|
#endif
|
|
|
|
// Select the SSE specific routines if available
|
|
pfVectorNormalize = _VectorNormalize;
|
|
pfVectorNormalizeFast = _SSE_VectorNormalizeFast;
|
|
pfInvRSquared = _SSE_InvRSquared;
|
|
pfSqrt = _SSE_Sqrt;
|
|
pfRSqrt = _SSE_RSqrtAccurate;
|
|
pfRSqrtFast = _SSE_RSqrtFast;
|
|
#ifdef _WIN32
|
|
pfFastSinCos = _SSE_SinCos;
|
|
pfFastCos = _SSE_cos;
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
#ifdef _XBOX
|
|
// sse expected
|
|
Assert(0);
|
|
#endif
|
|
s_bSSEEnabled = false;
|
|
}
|
|
|
|
#ifndef _XBOX
|
|
if ( bAllowSSE2 && pi.m_bSSE2 )
|
|
{
|
|
s_bSSE2Enabled = true;
|
|
#ifdef _WIN32
|
|
pfFastSinCos = _SSE2_SinCos;
|
|
pfFastCos = _SSE2_cos;
|
|
#endif
|
|
} else
|
|
{
|
|
s_bSSE2Enabled = false;
|
|
}
|
|
#endif
|
|
|
|
s_bMathlibInitialized = true;
|
|
|
|
InitSinCosTable();
|
|
BuildGammaTable( gamma, texGamma, brightness, overbright );
|
|
}
|
|
|
|
bool MathLib_3DNowEnabled( void )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
return s_b3DNowEnabled;
|
|
}
|
|
|
|
bool MathLib_MMXEnabled( void )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
return s_bMMXEnabled;
|
|
}
|
|
|
|
bool MathLib_SSEEnabled( void )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
return s_bSSEEnabled;
|
|
}
|
|
|
|
bool MathLib_SSE2Enabled( void )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
return s_bSSE2Enabled;
|
|
}
|
|
|
|
float Approach( float target, float value, float speed )
|
|
{
|
|
float delta = target - value;
|
|
|
|
if ( delta > speed )
|
|
value += speed;
|
|
else if ( delta < -speed )
|
|
value -= speed;
|
|
else
|
|
value = target;
|
|
|
|
return value;
|
|
}
|
|
|
|
// BUGBUG: Why doesn't this call angle diff?!?!?
|
|
float ApproachAngle( float target, float value, float speed )
|
|
{
|
|
target = anglemod( target );
|
|
value = anglemod( value );
|
|
|
|
float delta = target - value;
|
|
|
|
// Speed is assumed to be positive
|
|
if ( speed < 0 )
|
|
speed = -speed;
|
|
|
|
if ( delta < -180 )
|
|
delta += 360;
|
|
else if ( delta > 180 )
|
|
delta -= 360;
|
|
|
|
if ( delta > speed )
|
|
value += speed;
|
|
else if ( delta < -speed )
|
|
value -= speed;
|
|
else
|
|
value = target;
|
|
|
|
return value;
|
|
}
|
|
|
|
|
|
// BUGBUG: Why do we need both of these?
|
|
float AngleDiff( float destAngle, float srcAngle )
|
|
{
|
|
float delta;
|
|
|
|
delta = fmodf(destAngle - srcAngle, 360.0f);
|
|
if ( destAngle > srcAngle )
|
|
{
|
|
if ( delta >= 180 )
|
|
delta -= 360;
|
|
}
|
|
else
|
|
{
|
|
if ( delta <= -180 )
|
|
delta += 360;
|
|
}
|
|
return delta;
|
|
}
|
|
|
|
|
|
float AngleDistance( float next, float cur )
|
|
{
|
|
float delta = next - cur;
|
|
|
|
if ( delta < -180 )
|
|
delta += 360;
|
|
else if ( delta > 180 )
|
|
delta -= 360;
|
|
|
|
return delta;
|
|
}
|
|
|
|
|
|
float AngleNormalize( float angle )
|
|
{
|
|
angle = fmodf(angle, 360.0f);
|
|
if (angle > 180)
|
|
{
|
|
angle -= 360;
|
|
}
|
|
if (angle < -180)
|
|
{
|
|
angle += 360;
|
|
}
|
|
return angle;
|
|
}
|
|
|
|
//--------------------------------------------------------------------------------------------------------------
|
|
// ensure that 0 <= angle <= 360
|
|
float AngleNormalizePositive( float angle )
|
|
{
|
|
angle = fmodf( angle, 360.0f );
|
|
|
|
if (angle < 0.0f)
|
|
{
|
|
angle += 360.0f;
|
|
}
|
|
|
|
return angle;
|
|
}
|
|
|
|
//--------------------------------------------------------------------------------------------------------------
|
|
bool AnglesAreEqual( float a, float b, float tolerance )
|
|
{
|
|
return (fabs( AngleDiff( a, b ) ) < tolerance);
|
|
}
|
|
|
|
void RotationDeltaAxisAngle( const QAngle &srcAngles, const QAngle &destAngles, Vector &deltaAxis, float &deltaAngle )
|
|
{
|
|
Quaternion srcQuat, destQuat, srcQuatInv, out;
|
|
AngleQuaternion( srcAngles, srcQuat );
|
|
AngleQuaternion( destAngles, destQuat );
|
|
QuaternionScale( srcQuat, -1, srcQuatInv );
|
|
QuaternionMult( destQuat, srcQuatInv, out );
|
|
|
|
QuaternionNormalize( out );
|
|
QuaternionAxisAngle( out, deltaAxis, deltaAngle );
|
|
}
|
|
|
|
void RotationDelta( const QAngle &srcAngles, const QAngle &destAngles, QAngle *out )
|
|
{
|
|
matrix3x4_t src, srcInv;
|
|
matrix3x4_t dest;
|
|
AngleMatrix( srcAngles, src );
|
|
AngleMatrix( destAngles, dest );
|
|
// xform = src(-1) * dest
|
|
MatrixInvert( src, srcInv );
|
|
matrix3x4_t xform;
|
|
ConcatTransforms( dest, srcInv, xform );
|
|
QAngle xformAngles;
|
|
MatrixAngles( xform, xformAngles );
|
|
if ( out )
|
|
{
|
|
*out = xformAngles;
|
|
}
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Purpose: Computes a triangle normal
|
|
//-----------------------------------------------------------------------------
|
|
void ComputeTrianglePlane( const Vector& v1, const Vector& v2, const Vector& v3, Vector& normal, float& intercept )
|
|
{
|
|
Vector e1, e2;
|
|
VectorSubtract( v2, v1, e1 );
|
|
VectorSubtract( v3, v1, e2 );
|
|
CrossProduct( e1, e2, normal );
|
|
VectorNormalize( normal );
|
|
intercept = DotProduct( normal, v1 );
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Purpose: This is a clone of BaseWindingForPlane()
|
|
// Input : *outVerts - an array of preallocated verts to build the polygon in
|
|
// normal - the plane normal
|
|
// dist - the plane constant
|
|
// Output : int - vert count (always 4)
|
|
//-----------------------------------------------------------------------------
|
|
int PolyFromPlane( Vector *outVerts, const Vector& normal, float dist, float fHalfScale )
|
|
{
|
|
int i, x;
|
|
vec_t max, v;
|
|
Vector org, vright, vup;
|
|
|
|
// find the major axis
|
|
|
|
max = -16384; //MAX_COORD_INTEGER
|
|
x = -1;
|
|
for (i=0 ; i<3; i++)
|
|
{
|
|
v = fabs(normal[i]);
|
|
if (v > max)
|
|
{
|
|
x = i;
|
|
max = v;
|
|
}
|
|
}
|
|
|
|
if (x==-1)
|
|
return 0;
|
|
|
|
// Build a unit vector along something other than the major axis
|
|
VectorCopy (vec3_origin, vup);
|
|
switch (x)
|
|
{
|
|
case 0:
|
|
case 1:
|
|
vup[2] = 1;
|
|
break;
|
|
case 2:
|
|
vup[0] = 1;
|
|
break;
|
|
}
|
|
|
|
// Remove the component of this vector along the normal
|
|
v = DotProduct (vup, normal);
|
|
VectorMA (vup, -v, normal, vup);
|
|
// Make it a unit (perpendicular)
|
|
VectorNormalize (vup);
|
|
|
|
// Center of the poly is at normal * dist
|
|
VectorScale (normal, dist, org);
|
|
// Calculate the third orthonormal basis vector for our plane space (this one and vup are in the plane)
|
|
CrossProduct (vup, normal, vright);
|
|
|
|
// Make the plane's basis vectors big (these are the half-sides of the polygon we're making)
|
|
VectorScale (vup, fHalfScale, vup);
|
|
VectorScale (vright, fHalfScale, vright);
|
|
|
|
// Move diagonally away from org to create the corner verts
|
|
VectorSubtract (org, vright, outVerts[0]); // left
|
|
VectorAdd (outVerts[0], vup, outVerts[0]); // up
|
|
|
|
VectorAdd (org, vright, outVerts[1]); // right
|
|
VectorAdd (outVerts[1], vup, outVerts[1]); // up
|
|
|
|
VectorAdd (org, vright, outVerts[2]); // right
|
|
VectorSubtract (outVerts[2], vup, outVerts[2]); // down
|
|
|
|
VectorSubtract (org, vright, outVerts[3]); // left
|
|
VectorSubtract (outVerts[3], vup, outVerts[3]); // down
|
|
|
|
// The four corners form a planar quadrilateral normal to "normal"
|
|
return 4;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Purpose: clip a poly to the plane and return the poly on the front side of the plane
|
|
// Input : *inVerts - input polygon
|
|
// vertCount - # verts in input poly
|
|
// *outVerts - destination poly
|
|
// normal - plane normal
|
|
// dist - plane constant
|
|
// Output : int - # verts in output poly
|
|
//-----------------------------------------------------------------------------
|
|
|
|
int ClipPolyToPlane( Vector *inVerts, int vertCount, Vector *outVerts, const Vector& normal, float dist, float fOnPlaneEpsilon )
|
|
{
|
|
vec_t *dists = (vec_t *)stackalloc( sizeof(vec_t) * vertCount * 4 ); //4x vertcount should cover all cases
|
|
int *sides = (int *)stackalloc( sizeof(vec_t) * vertCount * 4 );
|
|
int counts[3];
|
|
vec_t dot;
|
|
int i, j;
|
|
Vector mid = vec3_origin;
|
|
int outCount;
|
|
|
|
counts[0] = counts[1] = counts[2] = 0;
|
|
|
|
// determine sides for each point
|
|
for ( i = 0; i < vertCount; i++ )
|
|
{
|
|
dot = DotProduct( inVerts[i], normal) - dist;
|
|
dists[i] = dot;
|
|
if ( dot > fOnPlaneEpsilon )
|
|
{
|
|
sides[i] = SIDE_FRONT;
|
|
}
|
|
else if ( dot < -fOnPlaneEpsilon )
|
|
{
|
|
sides[i] = SIDE_BACK;
|
|
}
|
|
else
|
|
{
|
|
sides[i] = SIDE_ON;
|
|
}
|
|
counts[sides[i]]++;
|
|
}
|
|
sides[i] = sides[0];
|
|
dists[i] = dists[0];
|
|
|
|
if (!counts[0])
|
|
return 0;
|
|
|
|
if (!counts[1])
|
|
{
|
|
// Copy to output verts
|
|
for ( i = 0; i < vertCount; i++ )
|
|
{
|
|
VectorCopy( inVerts[i], outVerts[i] );
|
|
}
|
|
return vertCount;
|
|
}
|
|
|
|
outCount = 0;
|
|
for ( i = 0; i < vertCount; i++ )
|
|
{
|
|
Vector& p1 = inVerts[i];
|
|
|
|
if (sides[i] == SIDE_ON)
|
|
{
|
|
VectorCopy( p1, outVerts[outCount]);
|
|
outCount++;
|
|
continue;
|
|
}
|
|
|
|
if (sides[i] == SIDE_FRONT)
|
|
{
|
|
VectorCopy( p1, outVerts[outCount]);
|
|
outCount++;
|
|
}
|
|
|
|
if (sides[i+1] == SIDE_ON || sides[i+1] == sides[i])
|
|
continue;
|
|
|
|
// generate a split point
|
|
Vector& p2 = inVerts[(i+1)%vertCount];
|
|
|
|
dot = dists[i] / (dists[i]-dists[i+1]);
|
|
for (j=0 ; j<3 ; j++)
|
|
{ // avoid round off error when possible
|
|
if (normal[j] == 1)
|
|
mid[j] = dist;
|
|
else if (normal[j] == -1)
|
|
mid[j] = -dist;
|
|
else
|
|
mid[j] = p1[j] + dot*(p2[j]-p1[j]);
|
|
}
|
|
|
|
VectorCopy (mid, outVerts[outCount]);
|
|
outCount++;
|
|
}
|
|
|
|
return outCount;
|
|
}
|
|
|
|
|
|
int ClipPolyToPlane_Precise( double *inVerts, int vertCount, double *outVerts, const double *normal, double dist, double fOnPlaneEpsilon )
|
|
{
|
|
double *dists = (double *)stackalloc( sizeof(double) * vertCount * 4 ); //4x vertcount should cover all cases
|
|
int *sides = (int *)stackalloc( sizeof(double) * vertCount * 4 );
|
|
int counts[3];
|
|
double dot;
|
|
int i, j;
|
|
//Vector mid = vec3_origin;
|
|
double mid[3];
|
|
mid[0] = 0.0;
|
|
mid[1] = 0.0;
|
|
mid[2] = 0.0;
|
|
int outCount;
|
|
|
|
counts[0] = counts[1] = counts[2] = 0;
|
|
|
|
// determine sides for each point
|
|
for ( i = 0; i < vertCount; i++ )
|
|
{
|
|
//dot = DotProduct( inVerts[i], normal) - dist;
|
|
dot = ((inVerts[i*3 + 0] * normal[0]) + (inVerts[i*3 + 1] * normal[1]) + (inVerts[i*3 + 2] * normal[2])) - dist;
|
|
dists[i] = dot;
|
|
if ( dot > fOnPlaneEpsilon )
|
|
{
|
|
sides[i] = SIDE_FRONT;
|
|
}
|
|
else if ( dot < -fOnPlaneEpsilon )
|
|
{
|
|
sides[i] = SIDE_BACK;
|
|
}
|
|
else
|
|
{
|
|
sides[i] = SIDE_ON;
|
|
}
|
|
counts[sides[i]]++;
|
|
}
|
|
sides[i] = sides[0];
|
|
dists[i] = dists[0];
|
|
|
|
if (!counts[0])
|
|
return 0;
|
|
|
|
if (!counts[1])
|
|
{
|
|
// Copy to output verts
|
|
//for ( i = 0; i < vertCount; i++ )
|
|
for ( i = 0; i < vertCount * 3; i++ )
|
|
{
|
|
//VectorCopy( inVerts[i], outVerts[i] );
|
|
outVerts[i] = inVerts[i];
|
|
}
|
|
return vertCount;
|
|
}
|
|
|
|
outCount = 0;
|
|
for ( i = 0; i < vertCount; i++ )
|
|
{
|
|
//Vector& p1 = inVerts[i];
|
|
double *p1 = &inVerts[i*3];
|
|
//p1[0] = inVerts[i*3 + 0];
|
|
//p1[1] = inVerts[i*3 + 1];
|
|
//p1[2] = inVerts[i*3 + 2];
|
|
|
|
if (sides[i] == SIDE_ON)
|
|
{
|
|
//VectorCopy( p1, outVerts[outCount]);
|
|
outVerts[outCount*3 + 0] = p1[0];
|
|
outVerts[outCount*3 + 1] = p1[1];
|
|
outVerts[outCount*3 + 2] = p1[2];
|
|
outCount++;
|
|
continue;
|
|
}
|
|
|
|
if (sides[i] == SIDE_FRONT)
|
|
{
|
|
//VectorCopy( p1, outVerts[outCount]);
|
|
outVerts[outCount*3 + 0] = p1[0];
|
|
outVerts[outCount*3 + 1] = p1[1];
|
|
outVerts[outCount*3 + 2] = p1[2];
|
|
outCount++;
|
|
}
|
|
|
|
if (sides[i+1] == SIDE_ON || sides[i+1] == sides[i])
|
|
continue;
|
|
|
|
// generate a split point
|
|
//Vector& p2 = inVerts[(i+1)%vertCount];
|
|
int wrappedindex = (i+1)%vertCount;
|
|
double *p2 = &inVerts[wrappedindex*3];
|
|
//p2[0] = inVerts[wrappedindex*3 + 0];
|
|
//p2[1] = inVerts[wrappedindex*3 + 1];
|
|
//p2[2] = inVerts[wrappedindex*3 + 2];
|
|
|
|
dot = dists[i] / (dists[i]-dists[i+1]);
|
|
for (j=0 ; j<3 ; j++)
|
|
{
|
|
mid[j] = (double)p1[j] + dot*((double)p2[j]-(double)p1[j]);
|
|
}
|
|
|
|
//VectorCopy (mid, outVerts[outCount]);
|
|
outVerts[outCount*3 + 0] = mid[0];
|
|
outVerts[outCount*3 + 1] = mid[1];
|
|
outVerts[outCount*3 + 2] = mid[2];
|
|
outCount++;
|
|
}
|
|
|
|
return outCount;
|
|
}
|
|
|
|
void ColorRGBExp32ToVector( const ColorRGBExp32& in, Vector& out )
|
|
{
|
|
Assert( s_bMathlibInitialized );
|
|
// FIXME: Why is there a factor of 255 built into this?
|
|
out.x = 255.0f * TexLightToLinear( in.r, in.exponent );
|
|
out.y = 255.0f * TexLightToLinear( in.g, in.exponent );
|
|
out.z = 255.0f * TexLightToLinear( in.b, in.exponent );
|
|
}
|
|
|
|
// assumes that the desired mantissa range is 0..255
|
|
static int VectorToColorRGBExp32_CalcExponent( float in )
|
|
{
|
|
int power = 0;
|
|
|
|
if( in != 0.0f )
|
|
{
|
|
while( in > 255.0f )
|
|
{
|
|
power += 1;
|
|
in *= 0.5f;
|
|
}
|
|
|
|
while( in < 127.0f )
|
|
{
|
|
power -= 1;
|
|
in *= 2.0f;
|
|
}
|
|
}
|
|
|
|
return power;
|
|
}
|
|
|
|
void VectorToColorRGBExp32( const Vector& vin, ColorRGBExp32 &c )
|
|
{
|
|
Vector v = vin;
|
|
Assert( s_bMathlibInitialized );
|
|
Assert( v.x >= 0.0f && v.y >= 0.0f && v.z >= 0.0f );
|
|
int i;
|
|
float max = v[0];
|
|
for( i = 1; i < 3; i++ )
|
|
{
|
|
// Get the maximum value.
|
|
if( v[i] > max )
|
|
{
|
|
max = v[i];
|
|
}
|
|
}
|
|
|
|
// figure out the exponent for this luxel.
|
|
int exponent = VectorToColorRGBExp32_CalcExponent( max );
|
|
|
|
// make the exponent fits into a signed byte.
|
|
if( exponent < -128 )
|
|
{
|
|
exponent = -128;
|
|
}
|
|
else if( exponent > 127 )
|
|
{
|
|
exponent = 127;
|
|
}
|
|
|
|
// undone: optimize with a table
|
|
float scalar = pow( 2.0f, -exponent );
|
|
// convert to mantissa x 2^exponent format
|
|
for( i = 0; i < 3; i++ )
|
|
{
|
|
v[i] *= scalar;
|
|
// clamp
|
|
if( v[i] > 255.0f )
|
|
{
|
|
v[i] = 255.0f;
|
|
}
|
|
}
|
|
c.r = ( unsigned char )v[0];
|
|
c.g = ( unsigned char )v[1];
|
|
c.b = ( unsigned char )v[2];
|
|
c.exponent = ( signed char )exponent;
|
|
}
|
|
|
|
int CeilPow2( int in )
|
|
{
|
|
int retval;
|
|
|
|
retval = 1;
|
|
while( retval < in )
|
|
retval <<= 1;
|
|
return retval;
|
|
}
|
|
|
|
int FloorPow2( int in )
|
|
{
|
|
int retval;
|
|
|
|
retval = 1;
|
|
while( retval < in )
|
|
retval <<= 1;
|
|
return retval >> 1;
|
|
}
|
|
|
|
bool R_CullBox( const Vector& mins, const Vector& maxs, const Frustum_t &frustum )
|
|
{
|
|
return (( BoxOnPlaneSide( mins, maxs, frustum.GetPlane(FRUSTUM_RIGHT) ) == 2 ) ||
|
|
( BoxOnPlaneSide( mins, maxs, frustum.GetPlane(FRUSTUM_LEFT) ) == 2 ) ||
|
|
( BoxOnPlaneSide( mins, maxs, frustum.GetPlane(FRUSTUM_TOP) ) == 2 ) ||
|
|
( BoxOnPlaneSide( mins, maxs, frustum.GetPlane(FRUSTUM_BOTTOM) ) == 2 ) ||
|
|
( BoxOnPlaneSide( mins, maxs, frustum.GetPlane(FRUSTUM_NEARZ) ) == 2 ) ||
|
|
( BoxOnPlaneSide( mins, maxs, frustum.GetPlane(FRUSTUM_FARZ) ) == 2 ) );
|
|
}
|
|
|
|
bool R_CullBoxSkipNear( const Vector& mins, const Vector& maxs, const Frustum_t &frustum )
|
|
{
|
|
return (( BoxOnPlaneSide( mins, maxs, frustum.GetPlane(FRUSTUM_RIGHT) ) == 2 ) ||
|
|
( BoxOnPlaneSide( mins, maxs, frustum.GetPlane(FRUSTUM_LEFT) ) == 2 ) ||
|
|
( BoxOnPlaneSide( mins, maxs, frustum.GetPlane(FRUSTUM_TOP) ) == 2 ) ||
|
|
( BoxOnPlaneSide( mins, maxs, frustum.GetPlane(FRUSTUM_BOTTOM) ) == 2 ) ||
|
|
( BoxOnPlaneSide( mins, maxs, frustum.GetPlane(FRUSTUM_FARZ) ) == 2 ) );
|
|
}
|
|
|
|
#endif // !_STATIC_LINKED || _SHARED_LIB
|