1
0
mirror of https://github.com/alliedmodders/hl2sdk.git synced 2024-12-23 01:59:43 +08:00

More mathlib unification with SDK 2013, including 3dnow fixes (#231)

This commit is contained in:
Nick Hastings 2024-04-21 11:52:42 -04:00
parent b886c90f88
commit 732b5b29b5
19 changed files with 328 additions and 29 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

197
mathlib/3dnow.cpp Normal file
View File

@ -0,0 +1,197 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose: 3DNow Math primitives.
//
//=====================================================================================//
#include <math.h>
#include <float.h> // Needed for FLT_EPSILON
#include "basetypes.h"
#include <memory.h>
#include "tier0/dbg.h"
#include "mathlib/mathlib.h"
#include "mathlib/amd3dx.h"
#include "mathlib/vector.h"
// memdbgon must be the last include file in a .cpp file!!!
#include "tier0/memdbgon.h"
#if !defined(COMPILER_MSVC64) && !defined(LINUX)
// Implement for 64-bit Windows if needed.
// Clang hits "fatal error: error in backend:" and other errors when trying
// to compile the inline assembly below. 3DNow support is highly unlikely to
// be useful/used, so it's not worth spending time on fixing.
#pragma warning(disable:4244) // "conversion from 'const int' to 'float', possible loss of data"
#pragma warning(disable:4730) // "mixing _m64 and floating point expressions may result in incorrect code"
//-----------------------------------------------------------------------------
// 3D Now Implementations of optimized routines:
//-----------------------------------------------------------------------------
float _3DNow_Sqrt(float x)
{
Assert( s_bMathlibInitialized );
float root = 0.f;
#ifdef _WIN32
_asm
{
femms
movd mm0, x
PFRSQRT (mm1,mm0)
punpckldq mm0, mm0
PFMUL (mm0, mm1)
movd root, mm0
femms
}
#elif LINUX
__asm __volatile__( "femms" );
__asm __volatile__
(
"pfrsqrt %y0, %y1 \n\t"
"punpckldq %y1, %y1 \n\t"
"pfmul %y1, %y0 \n\t"
: "=y" (root), "=y" (x)
:"0" (x)
);
__asm __volatile__( "femms" );
#else
#error
#endif
return root;
}
// NJS FIXME: Need to test Recripricol squareroot performance and accuraccy
// on AMD's before using the specialized instruction.
float _3DNow_RSqrt(float x)
{
Assert( s_bMathlibInitialized );
return 1.f / _3DNow_Sqrt(x);
}
float FASTCALL _3DNow_VectorNormalize (Vector& vec)
{
Assert( s_bMathlibInitialized );
float *v = &vec[0];
float radius = 0.f;
if ( v[0] || v[1] || v[2] )
{
#ifdef _WIN32
_asm
{
mov eax, v
femms
movq mm0, QWORD PTR [eax]
movd mm1, DWORD PTR [eax+8]
movq mm2, mm0
movq mm3, mm1
PFMUL (mm0, mm0)
PFMUL (mm1, mm1)
PFACC (mm0, mm0)
PFADD (mm1, mm0)
PFRSQRT (mm0, mm1)
punpckldq mm1, mm1
PFMUL (mm1, mm0)
PFMUL (mm2, mm0)
PFMUL (mm3, mm0)
movq QWORD PTR [eax], mm2
movd DWORD PTR [eax+8], mm3
movd radius, mm1
femms
}
#elif LINUX
long long a,c;
int b,d;
memcpy(&a,&vec[0],sizeof(a));
memcpy(&b,&vec[2],sizeof(b));
memcpy(&c,&vec[0],sizeof(c));
memcpy(&d,&vec[2],sizeof(d));
__asm __volatile__( "femms" );
__asm __volatile__
(
"pfmul %y3, %y3\n\t"
"pfmul %y0, %y0 \n\t"
"pfacc %y3, %y3 \n\t"
"pfadd %y3, %y0 \n\t"
"pfrsqrt %y0, %y3 \n\t"
"punpckldq %y0, %y0 \n\t"
"pfmul %y3, %y0 \n\t"
"pfmul %y3, %y2 \n\t"
"pfmul %y3, %y1 \n\t"
: "=y" (radius), "=y" (c), "=y" (d)
: "y" (a), "0" (b), "1" (c), "2" (d)
);
memcpy(&vec[0],&c,sizeof(c));
memcpy(&vec[2],&d,sizeof(d));
__asm __volatile__( "femms" );
#else
#error
#endif
}
return radius;
}
void FASTCALL _3DNow_VectorNormalizeFast (Vector& vec)
{
_3DNow_VectorNormalize( vec );
}
// JAY: This complains with the latest processor pack
#pragma warning(disable: 4730)
float _3DNow_InvRSquared(const float* v)
{
Assert( s_bMathlibInitialized );
float r2 = 1.f;
#ifdef _WIN32
_asm { // AMD 3DNow only routine
mov eax, v
femms
movq mm0, QWORD PTR [eax]
movd mm1, DWORD PTR [eax+8]
movd mm2, [r2]
PFMUL (mm0, mm0)
PFMUL (mm1, mm1)
PFACC (mm0, mm0)
PFADD (mm1, mm0)
PFMAX (mm1, mm2)
PFRCP (mm0, mm1)
movd [r2], mm0
femms
}
#elif LINUX
long long a,c;
int b;
memcpy(&a,&v[0],sizeof(a));
memcpy(&b,&v[2],sizeof(b));
memcpy(&c,&v[0],sizeof(c));
__asm __volatile__( "femms" );
__asm __volatile__
(
"PFMUL %y2, %y2 \n\t"
"PFMUL %y3, %y3 \n\t"
"PFACC %y2, %y2 \n\t"
"PFADD %y2, %y3 \n\t"
"PFMAX %y3, %y4 \n\t"
"PFRCP %y3, %y2 \n\t"
"movq %y2, %y0 \n\t"
: "=y" (r2)
: "0" (r2), "y" (a), "y" (b), "y" (c)
);
__asm __volatile__( "femms" );
#else
#error
#endif
return r2;
}
#endif // COMPILER_MSVC64

16
mathlib/3dnow.h Normal file
View File

@ -0,0 +1,16 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose:
//
//=====================================================================================//
#ifndef _3DNOW_H
#define _3DNOW_H
float _3DNow_Sqrt(float x);
float _3DNow_RSqrt(float x);
float FASTCALL _3DNow_VectorNormalize (Vector& vec);
void FASTCALL _3DNow_VectorNormalizeFast (Vector& vec);
float _3DNow_InvRSquared(const float* v);
#endif // _3DNOW_H

View File

@ -11,6 +11,7 @@ builder.SetBuildFolder('/')
project = builder.StaticLibraryProject('mathlib')
project.sources = [
'3dnow.cpp',
'almostequal.cpp',
'anorms.cpp',
'bumpvects.cpp',

View File

@ -26,6 +26,9 @@
#include "mathlib/vector.h"
#if !defined( _X360 )
#include "mathlib/amd3dx.h"
#ifndef OSX
#include "3dnow.h"
#endif
#include "sse.h"
#endif
@ -3355,7 +3358,26 @@ void MathLib_Init( float gamma, float texGamma, float brightness, int overbright
s_bMMXEnabled = false;
}
// SSE Generally performs better than 3DNow when present, so this is placed
// first to allow SSE to override these settings.
#if !defined( OSX ) && !defined( PLATFORM_WINDOWS_PC64 ) && !defined(LINUX)
if ( bAllow3DNow && pi.m_b3DNow )
{
s_b3DNowEnabled = true;
// Select the 3DNow specific routines if available;
pfVectorNormalize = _3DNow_VectorNormalize;
pfVectorNormalizeFast = _3DNow_VectorNormalizeFast;
pfInvRSquared = _3DNow_InvRSquared;
pfSqrt = _3DNow_Sqrt;
pfRSqrt = _3DNow_RSqrt;
pfRSqrtFast = _3DNow_RSqrt;
}
else
#endif
{
s_b3DNowEnabled = false;
}
if ( bAllowSSE && pi.m_bSSE )
{

View File

@ -18,7 +18,7 @@
// memdbgon must be the last include file in a .cpp file!!!
#include "tier0/memdbgon.h"
#ifdef _MSC_VER
#ifdef _WIN32
#pragma warning (disable : 4700) // local variable 'x' used without having been initialized
#endif

View File

@ -2407,10 +2407,20 @@ FORCEINLINE i32x4 IntShiftLeftWordSIMD(const i32x4 &vSrcA, const i32x4 &vSrcB)
// like this.
FORCEINLINE void ConvertStoreAsIntsSIMD(intx4 * RESTRICT pDest, const fltx4 &vSrc)
{
#ifdef COMPILER_MSVC64
(*pDest)[0] = SubFloat(vSrc, 0);
(*pDest)[1] = SubFloat(vSrc, 1);
(*pDest)[2] = SubFloat(vSrc, 2);
(*pDest)[3] = SubFloat(vSrc, 3);
#else
__m64 bottom = _mm_cvttps_pi32( vSrc );
__m64 top = _mm_cvttps_pi32( _mm_movehl_ps(vSrc,vSrc) );
*reinterpret_cast<__m64 *>(&(*pDest)[0]) = bottom;
*reinterpret_cast<__m64 *>(&(*pDest)[2]) = top;
_mm_empty();
#endif
}

View File

@ -72,7 +72,7 @@ public:
// Construction/destruction:
Vector(void);
Vector(vec_t X, vec_t Y, vec_t Z);
Vector(vec_t XYZ); // TODO (Ilya): is this potentially a bad idea?
explicit Vector(vec_t XYZ); ///< broadcast initialize
// Initialization
void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f);

View File

@ -910,6 +910,7 @@ inline bool MatricesAreEqual( const VMatrix &src1, const VMatrix &src2, float fl
void MatrixBuildOrtho( VMatrix& dst, double left, double top, double right, double bottom, double zNear, double zFar );
void MatrixBuildPerspectiveX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar );
void MatrixBuildPerspectiveOffCenterX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar, double bottom, double top, double left, double right );
void MatrixBuildPerspectiveZRange( VMatrix& dst, double flZNear, double flZFar );
inline void MatrixOrtho( VMatrix& dst, double left, double top, double right, double bottom, double zNear, double zFar )
{

View File

@ -545,6 +545,11 @@ typedef void * HINSTANCE;
#endif
#ifdef POSIX
typedef uintp HMODULE;
typedef void *HANDLE;
#endif
//-----------------------------------------------------------------------------
// fsel
//-----------------------------------------------------------------------------

View File

@ -36,21 +36,16 @@
#pragma once
#endif
#if defined _LINUX || defined __APPLE__
#ifdef _LINUX
#include <dlfcn.h> // dlopen,dlclose, et al
#include <unistd.h>
#ifndef HMODULE
#define HMODULE void *
#endif
#ifndef GetProcAddress
#define GetProcAddress dlsym
#endif
#ifndef _snprintf
#define _snprintf snprintf
#ifdef _snprintf
#undef _snprintf
#endif
#define _snprintf snprintf
#endif
// TODO: move interface.cpp into tier0 library.
@ -125,18 +120,21 @@ public:
// Use this to expose a singleton interface with a global variable you've created.
#if !defined(_STATIC_LINKED) || !defined(_SUBSYSTEM)
#define EXPOSE_SINGLE_INTERFACE_GLOBALVAR(className, interfaceName, versionName, globalVarName) \
static void* __Create##className##interfaceName##_interface() {return static_cast<interfaceName *>( &globalVarName );} \
#define EXPOSE_SINGLE_INTERFACE_GLOBALVAR_WITH_NAMESPACE(className, interfaceNamespace, interfaceName, versionName, globalVarName) \
static void* __Create##className##interfaceName##_interface() {return static_cast<interfaceNamespace interfaceName *>( &globalVarName );} \
static InterfaceReg __g_Create##className##interfaceName##_reg(__Create##className##interfaceName##_interface, versionName);
#else
#define EXPOSE_SINGLE_INTERFACE_GLOBALVAR(className, interfaceName, versionName, globalVarName) \
#define EXPOSE_SINGLE_INTERFACE_GLOBALVAR_WITH_NAMESPACE(className, interfaceNamespace, interfaceName, versionName, globalVarName) \
namespace _SUBSYSTEM \
{ \
static void* __Create##className##interfaceName##_interface() {return static_cast<interfaceName *>( &globalVarName );} \
static void* __Create##className##interfaceName##_interface() {return static_cast<interfaceNamespace interfaceName *>( &globalVarName );} \
static InterfaceReg __g_Create##className##interfaceName##_reg(__Create##className##interfaceName##_interface, versionName); \
}
#endif
#define EXPOSE_SINGLE_INTERFACE_GLOBALVAR(className, interfaceName, versionName, globalVarName) \
EXPOSE_SINGLE_INTERFACE_GLOBALVAR_WITH_NAMESPACE(className, , interfaceName, versionName, globalVarName)
// Use this to expose a singleton interface. This creates the global variable for you automatically.
#if !defined(_STATIC_LINKED) || !defined(_SUBSYSTEM)
#define EXPOSE_SINGLE_INTERFACE(className, interfaceName, versionName) \

View File

@ -1,4 +1,4 @@
//===== Copyright © 1996-2005, Valve Corporation, All rights reserved. ======//
//===== Copyright <EFBFBD> 1996-2005, Valve Corporation, All rights reserved. ======//
//
// Purpose:
//
@ -27,13 +27,16 @@
#include "tier0/threadtools.h"
#ifdef _WIN32
#include <direct.h> // getcwd
#elif defined _LINUX || defined __APPLE__
#elif POSIX
#include <dlfcn.h>
#include <unistd.h>
#define _getcwd getcwd
#endif
#if defined( _X360 )
#include "xbox/xbox_win32stubs.h"
#endif
// memdbgon must be the last include file in a .cpp file!!!
#include "tier0/memdbgon.h"
@ -54,8 +57,15 @@ InterfaceReg::InterfaceReg( InstantiateInterfaceFn fn, const char *pName ) :
// CreateInterface.
// This is the primary exported function by a dll, referenced by name via dynamic binding
// that exposes an opqaue function pointer to the interface.
//
// We have the Internal variant so Sys_GetFactoryThis() returns the correct internal
// symbol under GCC/Linux/Mac as CreateInterface is DLL_EXPORT so its global so the loaders
// on those OS's pick exactly 1 of the CreateInterface symbols to be the one that is process wide and
// all Sys_GetFactoryThis() calls find that one, which doesn't work. Using the internal walkthrough here
// makes sure Sys_GetFactoryThis() has the dll specific symbol and GetProcAddress() returns the module specific
// function for CreateInterface again getting the dll specific symbol we need.
// ------------------------------------------------------------------------------------ //
void* CreateInterface( const char *pName, int *pReturnCode )
void* CreateInterfaceInternal( const char *pName, int *pReturnCode )
{
InterfaceReg *pCur;
@ -78,8 +88,14 @@ void* CreateInterface( const char *pName, int *pReturnCode )
return NULL;
}
void* CreateInterface( const char *pName, int *pReturnCode )
{
return CreateInterfaceInternal( pName, pReturnCode );
}
#if defined _LINUX || defined __APPLE__
#ifdef POSIX
// Linux doesn't have this function so this emulates its functionality
void *GetModuleHandle(const char *name)
{
@ -119,14 +135,24 @@ void *GetModuleHandle(const char *name)
//-----------------------------------------------------------------------------
static void *Sys_GetProcAddress( const char *pModuleName, const char *pName )
{
HMODULE hModule = GetModuleHandle( pModuleName );
return GetProcAddress( hModule, pName );
HMODULE hModule = (HMODULE)GetModuleHandle( pModuleName );
#ifdef WIN32
return (void *)GetProcAddress( hModule, pName );
#else
return (void *)dlsym( (void *)hModule, pName );
#endif
}
#if !defined(LINUX)
static void *Sys_GetProcAddress( HMODULE hModule, const char *pName )
{
return GetProcAddress( hModule, pName );
#ifdef WIN32
return (void *)GetProcAddress( hModule, pName );
#else
return (void *)dlsym( (void *)hModule, pName );
#endif
}
#endif
bool Sys_IsDebuggerPresent()
{
@ -247,7 +273,7 @@ CSysModule *Sys_LoadModule( const char *pModuleName, Sys_Flags flags /* = SYS_NO
// file in the depot (MFP) or a filesystem GetLocalCopy() call must be made
// prior to the call to this routine.
char szCwd[1024];
HMODULE hDLL = NULL;
HMODULE hDLL = 0;
if ( !Q_IsAbsolutePath( pModuleName ) )
{
@ -383,7 +409,7 @@ void Sys_UnloadModule( CSysModule *pModule )
#ifdef _WIN32
FreeLibrary( hDLL );
#elif defined(_LINUX) || defined(__APPLE__)
#elif defined(POSIX)
dlclose((void *)hDLL);
#endif
}
@ -402,7 +428,7 @@ CreateInterfaceFn Sys_GetFactory( CSysModule *pModule )
HMODULE hDLL = reinterpret_cast<HMODULE>(pModule);
#ifdef _WIN32
return reinterpret_cast<CreateInterfaceFn>(GetProcAddress( hDLL, CREATEINTERFACE_PROCNAME ));
#elif defined(_LINUX) || defined (__APPLE__)
#elif defined(POSIX)
// Linux gives this error:
//../public/interface.cpp: In function `IBaseInterface *(*Sys_GetFactory
//(CSysModule *)) (const char *, int *)':
@ -410,7 +436,7 @@ CreateInterfaceFn Sys_GetFactory( CSysModule *pModule )
//pointer-to-function and pointer-to-object
//
// so lets get around it :)
return (CreateInterfaceFn)(GetProcAddress( hDLL, CREATEINTERFACE_PROCNAME ));
return (CreateInterfaceFn)(GetProcAddress( (void *)hDLL, CREATEINTERFACE_PROCNAME ));
#endif
}
@ -420,7 +446,7 @@ CreateInterfaceFn Sys_GetFactory( CSysModule *pModule )
//-----------------------------------------------------------------------------
CreateInterfaceFn Sys_GetFactoryThis( void )
{
return CreateInterface;
return &CreateInterfaceInternal;
}
//-----------------------------------------------------------------------------
@ -432,7 +458,7 @@ CreateInterfaceFn Sys_GetFactory( const char *pModuleName )
{
#ifdef _WIN32
return static_cast<CreateInterfaceFn>( Sys_GetProcAddress( pModuleName, CREATEINTERFACE_PROCNAME ) );
#elif defined(_LINUX) || defined(__APPLE__)
#elif defined(POSIX)
// see Sys_GetFactory( CSysModule *pModule ) for an explanation
return (CreateInterfaceFn)( Sys_GetProcAddress( pModuleName, CREATEINTERFACE_PROCNAME ) );
#endif
@ -516,3 +542,26 @@ void CDllDemandLoader::Unload()
m_hModule = 0;
}
}
#if defined( STAGING_ONLY ) && defined( _WIN32 )
typedef USHORT( WINAPI RtlCaptureStackBackTrace_FUNC )(
ULONG frames_to_skip,
ULONG frames_to_capture,
PVOID *backtrace,
PULONG backtrace_hash );
extern "C" int backtrace( void **buffer, int size )
{
HMODULE hNTDll = GetModuleHandleA( "ntdll.dll" );
static RtlCaptureStackBackTrace_FUNC * const pfnRtlCaptureStackBackTrace =
( RtlCaptureStackBackTrace_FUNC * )GetProcAddress( hNTDll, "RtlCaptureStackBackTrace" );
if ( !pfnRtlCaptureStackBackTrace )
return 0;
return (int)pfnRtlCaptureStackBackTrace( 2, size, buffer, 0 );
}
#endif // STAGING_ONLY && _WIN32