csgo-2018-source/tier0/cpu.cpp
2021-07-24 21:11:47 -07:00

926 lines
24 KiB
C++

//===== Copyright (c) 1996-2005, Valve Corporation, All rights reserved. ======//
//
// Purpose:
//
// $NoKeywords: $
//=============================================================================//
#include "pch_tier0.h"
#if defined(_WIN32) && !defined(_X360)
#define WINDOWS_LEAN_AND_MEAN
#include <windows.h>
#include "cputopology.h"
#elif defined( PLATFORM_OSX )
#include <sys/sysctl.h>
#endif
#ifndef _PS3
#include "tier0_strtools.h"
#endif
//#include "tier1/strtools.h" // this is included for the definition of V_isspace()
#ifdef PLATFORM_WINDOWS_PC
#include <intrin.h>
#endif
// NOTE: This has to be the last file included!
#include "tier0/memdbgon.h"
const tchar* GetProcessorVendorId();
const tchar* GetProcessorBrand();
struct CpuIdResult_t
{
unsigned long eax;
unsigned long ebx;
unsigned long ecx;
unsigned long edx;
void Reset()
{
eax = ebx = ecx = edx = 0;
}
};
static bool cpuid( unsigned long function, CpuIdResult_t &out )
{
#if defined( _X360 ) || defined( _PS3 )
return false;
#elif defined(GNUC)
unsigned long out_eax,out_ebx,out_ecx,out_edx;
#ifdef PLATFORM_64BITS
asm("mov %%rbx, %%rsi\n\t"
"cpuid\n\t"
"xchg %%rsi, %%rbx"
: "=a" (out_eax),
"=S" (out_ebx),
"=c" (out_ecx),
"=d" (out_edx)
: "a" (function)
);
#else
asm("mov %%ebx, %%esi\n\t"
"cpuid\n\t"
"xchg %%esi, %%ebx"
: "=a" (out_eax),
"=S" (out_ebx),
"=c" (out_ecx),
"=d" (out_edx)
: "a" (function)
);
#endif
out.eax = out_eax;
out.ebx = out_ebx;
out.ecx = out_ecx;
out.edx = out_edx;
return true;
#elif defined(_WIN64)
int pCPUInfo[4];
__cpuid( pCPUInfo, (int)function );
out.eax = pCPUInfo[0];
out.ebx = pCPUInfo[1];
out.ecx = pCPUInfo[2];
out.edx = pCPUInfo[3];
return true;
#else
bool retval = true;
unsigned long out_eax = 0, out_ebx = 0, out_ecx = 0, out_edx = 0;
_asm pushad;
__try
{
_asm
{
xor edx, edx // Clue the compiler that EDX & others is about to be used.
xor ecx, ecx
xor ebx, ebx // <Sergiy> Note: if I don't zero these out, cpuid sometimes won't work, I didn't find out why yet
mov eax, function // set up CPUID to return processor version and features
// 0 = vendor string, 1 = version info, 2 = cache info
cpuid // code bytes = 0fh, 0a2h
mov out_eax, eax // features returned in eax
mov out_ebx, ebx // features returned in ebx
mov out_ecx, ecx // features returned in ecx
mov out_edx, edx // features returned in edx
}
}
__except(EXCEPTION_EXECUTE_HANDLER)
{
retval = false;
}
out.eax = out_eax;
out.ebx = out_ebx;
out.ecx = out_ecx;
out.edx = out_edx;
_asm popad
return retval;
#endif
}
static bool cpuidex( unsigned long function, unsigned long subfunction, CpuIdResult_t &out )
{
#if defined( _X360 ) || defined( _PS3 )
return false;
#elif defined(GNUC)
unsigned long out_eax, out_ebx, out_ecx, out_edx;
asm( "mov %%ebx, %%esi\n\t"
"cpuid\n\t"
"xchg %%esi, %%ebx"
: "=a" ( out_eax ),
"=S" ( out_ebx ),
"=c" ( out_ecx ),
"=d" ( out_edx )
: "a" ( function ),
"c" ( subfunction )
);
out.eax = out_eax;
out.ebx = out_ebx;
out.ecx = out_ecx;
out.edx = out_edx;
return true;
#elif defined(_WIN64)
int pCPUInfo[ 4 ];
__cpuidex( pCPUInfo, ( int )function, ( int )subfunction );
out.eax = pCPUInfo[ 0 ];
out.ebx = pCPUInfo[ 1 ];
out.ecx = pCPUInfo[ 2 ];
out.edx = pCPUInfo[ 3 ];
return false;
#else
bool retval = true;
unsigned long out_eax = 0, out_ebx = 0, out_ecx = 0, out_edx = 0;
_asm pushad;
__try
{
_asm
{
xor edx, edx // Clue the compiler that EDX & others is about to be used.
mov ecx, subfunction
xor ebx, ebx // <Sergiy> Note: if I don't zero these out, cpuid sometimes won't work, I didn't find out why yet
mov eax, function // set up CPUID to return processor version and features
// 0 = vendor string, 1 = version info, 2 = cache info
cpuid // code bytes = 0fh, 0a2h
mov out_eax, eax // features returned in eax
mov out_ebx, ebx // features returned in ebx
mov out_ecx, ecx // features returned in ecx
mov out_edx, edx // features returned in edx
}
}
__except ( EXCEPTION_EXECUTE_HANDLER )
{
retval = false;
}
out.eax = out_eax;
out.ebx = out_ebx;
out.ecx = out_ecx;
out.edx = out_edx;
_asm popad
return retval;
#endif
}
static CpuIdResult_t cpuid( unsigned long function )
{
CpuIdResult_t out;
if ( !cpuid( function, out ) )
{
out.Reset();
}
return out;
}
static CpuIdResult_t cpuidex( unsigned long function, unsigned long subfunction )
{
CpuIdResult_t out;
if ( !cpuidex( function, subfunction, out ) )
{
out.Reset();
}
return out;
}
//-----------------------------------------------------------------------------
// Purpose: This is a bit of a hack because it appears
// Output : Returns true on success, false on failure.
//-----------------------------------------------------------------------------
static bool IsWin98OrOlder()
{
#if defined( _X360 ) || defined( _PS3 ) || defined( POSIX )
return false;
#else
bool retval = false;
OSVERSIONINFOEX osvi;
ZeroMemory(&osvi, sizeof(OSVERSIONINFOEX));
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX);
BOOL bOsVersionInfoEx = GetVersionEx ((OSVERSIONINFO *) &osvi);
if( !bOsVersionInfoEx )
{
// If OSVERSIONINFOEX doesn't work, try OSVERSIONINFO.
osvi.dwOSVersionInfoSize = sizeof (OSVERSIONINFO);
if ( !GetVersionEx ( (OSVERSIONINFO *) &osvi) )
{
Error( _T("IsWin98OrOlder: Unable to get OS version information") );
}
}
switch (osvi.dwPlatformId)
{
case VER_PLATFORM_WIN32_NT:
// NT, XP, Win2K, etc. all OK for SSE
break;
case VER_PLATFORM_WIN32_WINDOWS:
// Win95, 98, Me can't do SSE
retval = true;
break;
case VER_PLATFORM_WIN32s:
// Can't really run this way I don't think...
retval = true;
break;
default:
break;
}
return retval;
#endif
}
static bool CheckSSETechnology(void)
{
#if defined( _X360 ) || defined( _PS3 )
return true;
#else
if ( IsWin98OrOlder() )
{
return false;
}
return ( cpuid( 1 ).edx & 0x2000000L ) != 0;
#endif
}
static bool CheckSSE2Technology(void)
{
#if defined( _X360 ) || defined( _PS3 )
return false;
#else
return ( cpuid( 1 ).edx & 0x04000000 ) != 0;
#endif
}
bool CheckSSE3Technology(void)
{
#if defined( _X360 ) || defined( _PS3 )
return false;
#else
return ( cpuid( 1 ).ecx & 0x00000001 ) != 0; // bit 1 of ECX
#endif
}
bool CheckSSSE3Technology(void)
{
#if defined( _X360 ) || defined( _PS3 )
return false;
#else
// SSSE 3 is implemented by both Intel and AMD
// detection is done the same way for both vendors
return ( cpuid( 1 ).ecx & ( 1 << 9 ) ) != 0; // bit 9 of ECX
#endif
}
bool CheckSSE41Technology(void)
{
#if defined( _X360 ) || defined( _PS3 )
return false;
#else
// SSE 4.1 is implemented by both Intel and AMD
// detection is done the same way for both vendors
return ( cpuid( 1 ).ecx & ( 1 << 19 ) ) != 0; // bit 19 of ECX
#endif
}
bool CheckSSE42Technology(void)
{
#if defined( _X360 ) || defined( _PS3 )
return false;
#else
// SSE4.2 is an Intel-only feature
const char *pchVendor = GetProcessorVendorId();
if ( 0 != V_tier0_stricmp( pchVendor, "GenuineIntel" ) )
return false;
return ( cpuid( 1 ).ecx & ( 1 << 20 ) ) != 0; // bit 20 of ECX
#endif
}
bool CheckSSE4aTechnology( void )
{
#if defined( _X360 ) || defined( _PS3 )
return false;
#else
// SSE 4a is an AMD-only feature
const char *pchVendor = GetProcessorVendorId();
if ( 0 != V_tier0_stricmp( pchVendor, "AuthenticAMD" ) )
return false;
return ( cpuid( 1 ).ecx & ( 1 << 6 ) ) != 0; // bit 6 of ECX
#endif
}
static bool Check3DNowTechnology(void)
{
#if defined( _X360 ) || defined( _PS3 )
return false;
#else
if ( cpuid( 0x80000000 ).eax > 0x80000000L )
{
return ( cpuid( 0x80000001 ).eax & ( 1 << 31 ) ) != 0;
}
return false;
#endif
}
static bool CheckCMOVTechnology()
{
#if defined( _X360 ) || defined( _PS3 )
return false;
#else
return ( cpuid( 1 ).edx & ( 1 << 15 ) ) != 0;
#endif
}
static bool CheckFCMOVTechnology(void)
{
#if defined( _X360 ) || defined( _PS3 )
return false;
#else
return ( cpuid( 1 ).edx & ( 1 << 16 ) ) != 0;
#endif
}
static bool CheckRDTSCTechnology(void)
{
#if defined( _X360 ) || defined( _PS3 )
return false;
#else
return ( cpuid( 1 ).edx & 0x10 ) != 0;
#endif
}
static tchar s_CpuVendorID[ 13 ] = "unknown";
bool s_bCpuVendorIdInitialized = false;
union CpuBrand_t
{
CpuIdResult_t cpuid[ 3 ];
char name[ 49 ];
};
CpuBrand_t s_CpuBrand;
bool s_bCpuBrandInitialized = false;
// Return the Processor's vendor identification string, or "Generic_x86" if it doesn't exist on this CPU
const tchar* GetProcessorVendorId()
{
#if defined( _X360 ) || defined( _PS3 )
return "PPC";
#else
if ( s_bCpuVendorIdInitialized )
{
return s_CpuVendorID;
}
s_bCpuVendorIdInitialized = true;
CpuIdResult_t cpuid0 = cpuid( 0 );
memset( s_CpuVendorID, 0, sizeof(s_CpuVendorID) );
if ( !cpuid0.eax )
{
// weird...
if ( IsPC() )
{
_tcscpy( s_CpuVendorID, _T( "Generic_x86" ) );
}
else if ( IsX360() )
{
_tcscpy( s_CpuVendorID, _T( "PowerPC" ) );
}
}
else
{
memcpy( s_CpuVendorID + 0, &( cpuid0.ebx ), sizeof( cpuid0.ebx ) );
memcpy( s_CpuVendorID + 4, &( cpuid0.edx ), sizeof( cpuid0.edx ) );
memcpy( s_CpuVendorID + 8, &( cpuid0.ecx ), sizeof( cpuid0.ecx ) );
}
return s_CpuVendorID;
#endif
}
const tchar* GetProcessorBrand()
{
#if defined( _X360 )
return "Xenon";
#elif defined( _PS3 )
return "Cell Broadband Engine";
#else
if ( s_bCpuBrandInitialized )
{
return s_CpuBrand.name;
}
s_bCpuBrandInitialized = true;
memset( &s_CpuBrand, 0, sizeof( s_CpuBrand ) );
const char *pchVendor = GetProcessorVendorId();
if ( 0 == V_tier0_stricmp( pchVendor, "GenuineIntel" ) )
{
// Intel brand string
if ( cpuid( 0x80000000 ).eax >= 0x80000004 )
{
s_CpuBrand.cpuid[ 0 ] = cpuid( 0x80000002 );
s_CpuBrand.cpuid[ 1 ] = cpuid( 0x80000003 );
s_CpuBrand.cpuid[ 2 ] = cpuid( 0x80000004 );
}
}
return s_CpuBrand.name;
#endif
}
// Returns non-zero if Hyper-Threading Technology is supported on the processors and zero if not.
// If it's supported, it does not mean that it's been enabled. So we test another flag to see if it's enabled
// See Intel Processor Identification and the CPUID instruction Application Note 485
// http://www.intel.com/Assets/PDF/appnote/241618.pdf
static bool HTSupported(void)
{
#if ( defined( _X360 ) || defined( _PS3 ) )
// not entirtely sure about the semantic of HT support, it being an intel name
// are we asking about HW threads or HT?
return true;
#else
enum {
HT_BIT = 0x10000000, // EDX[28] - Bit 28 set indicates Hyper-Threading Technology is supported in hardware.
FAMILY_ID = 0x0f00, // EAX[11:8] - Bit 11 thru 8 contains family processor id
EXT_FAMILY_ID = 0x0f00000, // EAX[23:20] - Bit 23 thru 20 contains extended family processor id
FAMILY_ID_386 = 0x0300,
FAMILY_ID_486 = 0x0400, // EAX[8:12] - 486, 487 and overdrive
FAMILY_ID_PENTIUM = 0x0500, // Pentium, Pentium OverDrive 60 - 200
FAMILY_ID_PENTIUM_PRO = 0x0600,// P Pro, P II, P III, P M, Celeron M, Core Duo, Core Solo, Core2 Duo, Core2 Extreme, P D, Xeon model F,
// also 45-nm : Intel Atom, Core i7, Xeon MP ; see Intel Processor Identification and the CPUID instruction pg 20,21
FAMILY_ID_EXTENDED = 0x0F00 // P IV, Xeon, Celeron D, P D,
};
// this works on both newer AMD and Intel CPUs
CpuIdResult_t cpuid1 = cpuid( 1 );
// <Sergiy> Previously, we detected P4 specifically; now, we detect GenuineIntel with HT enabled in general
// if (((cpuid1.eax & FAMILY_ID) == FAMILY_ID_EXTENDED) || (cpuid1.eax & EXT_FAMILY_ID))
// Check to see if this is an Intel Processor with HT or CMT capability , and if HT/CMT is enabled
// ddk: This codef is actually correct: see example code at software.intel.com/en-us/articles/multi-core-detect/
return ( cpuid1.edx & HT_BIT ) != 0 && // Genuine Intel Processor with Hyper-Threading Technology implemented
( ( cpuid1.ebx >> 16 ) & 0xFF ) > 1; // Hyper-Threading OR Core Multi-Processing has been enabled
#endif
}
// Returns the number of logical processors per physical processors.
static uint8 LogicalProcessorsPerPackage(void)
{
#if defined( _X360 )
return 2;
#else
// EBX[23:16] indicate number of logical processors per package
const unsigned NUM_LOGICAL_BITS = 0x00FF0000;
if ( !HTSupported() )
return 1;
return ( uint8 )( ( cpuid( 1 ).ebx & NUM_LOGICAL_BITS ) >> 16 );
#endif
}
#if defined(POSIX)
// Move this declaration out of the CalculateClockSpeed() function because
// otherwise clang warns that it is non-obvious whether it is a variable
// or a function declaration: [-Wvexing-parse]
uint64 CalculateCPUFreq(); // from cpu_linux.cpp
#endif
// Measure the processor clock speed by sampling the cycle count, waiting
// for some fraction of a second, then measuring the elapsed number of cycles.
static int64 CalculateClockSpeed()
{
#if defined( _X360 ) || defined(_PS3)
// Xbox360 and PS3 have the same clock speed and share a lot of characteristics on PPU
return 3200000000LL;
#else
#if defined( _WIN32 )
LARGE_INTEGER waitTime, startCount, curCount;
CCycleCount start, end;
// Take 1/32 of a second for the measurement.
QueryPerformanceFrequency( &waitTime );
int scale = 5;
waitTime.QuadPart >>= scale;
QueryPerformanceCounter( &startCount );
start.Sample();
do
{
QueryPerformanceCounter( &curCount );
}
while ( curCount.QuadPart - startCount.QuadPart < waitTime.QuadPart );
end.Sample();
return (end.m_Int64 - start.m_Int64) << scale;
#elif defined(POSIX)
int64 freq =(int64)CalculateCPUFreq();
if ( freq == 0 ) // couldn't calculate clock speed
{
Error( "Unable to determine CPU Frequency\n" );
}
return freq;
#else
#error "Please implement Clock Speed function for this platform"
#endif
#endif
}
static CPUInformation s_cpuInformation;
struct IntelCacheDesc_t
{
uint8 nDesc;
uint16 nCacheSize;
};
static IntelCacheDesc_t s_IntelL1DataCacheDesc[] = {
{ 0xA, 8 },
{ 0xC, 16 },
{ 0xD, 16 },
{ 0x2C, 32 },
{ 0x30, 32 },
{ 0x60, 16 },
{ 0x66, 8 },
{ 0x67, 16 },
{ 0x68, 32 }
};
static IntelCacheDesc_t s_IntelL2DataCacheDesc[] =
{
{ 0x21, 256 },
{ 0x39, 128 },
{ 0x3a, 192 },
{ 0x3b, 128 },
{ 0x3c, 256 },
{ 0x3D, 384 },
{ 0x3E, 512 },
{ 0x41, 128 },
{ 0x42, 256 },
{ 0x43, 512 },
{ 0x44, 1024 },
{ 0x45, 2048 },
{ 0x48, 3 * 1024 },
{ 0x4e, 6 * 1024 },
{ 0x78, 1024 },
{ 0x79, 128 },
{ 0x7a, 256 },
{ 0x7b, 512 },
{ 0x7c, 1024 },
{ 0x7d, 2048 },
{ 0x7f, 512 },
{ 0x82, 256 },
{ 0x83, 512 },
{ 0x84, 1024 },
{ 0x85, 2048 },
{ 0x86, 512 },
{ 0x87, 1024 }
};
static IntelCacheDesc_t s_IntelL3DataCacheDesc[] = {
{ 0x22, 512 },
{ 0x23, 1024 },
{ 0x25, 2 * 1024 },
{ 0x29, 4 * 1024 },
{ 0x46, 4 * 1024 },
{ 0x47, 8 * 1024 },
// { 49,
{ 0x4a, 6 * 1024 },
{ 0x4b, 8 * 1024 },
{ 0x4c, 12 * 1024 },
{ 0x4d, 16 * 1014 },
{ 0xD0, 512 },
{ 0xD1, 1024 },
{ 0xD2, 2048 },
{ 0xD6, 1024 },
{ 0xD7, 2048 },
{ 0xD8, 4096 },
{ 0xDC, 1536 },
{ 0xDD, 3 * 1024 },
{ 0xDE, 6 * 1024 },
{ 0xE2, 2048 },
{ 0xE3, 4096 },
{ 0xE4, 8 * 1024 },
{ 0xEA, 12 * 1024 },
{ 0xEB, 18 * 1024 },
{ 0xEC, 24 * 1024 }
};
static void FindIntelCacheDesc( uint8 nDesc, const IntelCacheDesc_t *pDesc, int nDescCount, uint32 &nCache, uint32 &nCacheDesc )
{
for ( int i = 0; i < nDescCount; ++i )
{
if ( pDesc->nDesc == nDesc )
{
nCache = pDesc->nCacheSize;
nCacheDesc = nDesc;
break;
}
}
}
// see "Output of the CPUID instruction" from Intel, page 26
static void InterpretIntelCacheDescriptors( uint32 nPackedDesc )
{
if ( nPackedDesc & 0x80000000 )
{
return; // this is a wrong descriptor
}
for ( int i = 0; i < 4; ++i )
{
FindIntelCacheDesc( nPackedDesc & 0xFF, s_IntelL1DataCacheDesc, ARRAYSIZE( s_IntelL1DataCacheDesc ), s_cpuInformation.m_nL1CacheSizeKb, s_cpuInformation.m_nL1CacheDesc );
FindIntelCacheDesc( nPackedDesc & 0xFF, s_IntelL2DataCacheDesc, ARRAYSIZE( s_IntelL2DataCacheDesc ), s_cpuInformation.m_nL2CacheSizeKb, s_cpuInformation.m_nL2CacheDesc );
FindIntelCacheDesc( nPackedDesc & 0xFF, s_IntelL3DataCacheDesc, ARRAYSIZE( s_IntelL3DataCacheDesc ), s_cpuInformation.m_nL3CacheSizeKb, s_cpuInformation.m_nL3CacheDesc );
nPackedDesc >>= 8;
}
}
const CPUInformation& GetCPUInformation()
{
CPUInformation &pi = s_cpuInformation;
// Has the structure already been initialized and filled out?
if ( pi.m_Size == sizeof(pi) )
return pi;
// Redundant, but just in case the user somehow messes with the size.
memset(&pi, 0x0, sizeof(pi));
// Fill out the structure, and return it:
pi.m_Size = sizeof(pi);
// Grab the processor frequency:
pi.m_Speed = CalculateClockSpeed();
// Get the logical and physical processor counts:
pi.m_nLogicalProcessors = LogicalProcessorsPerPackage();
bool bAuthenticAMD = ( 0 == V_tier0_stricmp( GetProcessorVendorId(), "AuthenticAMD" ) );
bool bGenuineIntel = !bAuthenticAMD && ( 0 == V_tier0_stricmp( GetProcessorVendorId(), "GenuineIntel" ) );
#if defined( _X360 )
pi.m_nPhysicalProcessors = 3;
pi.m_nLogicalProcessors = 6;
#elif defined( _PS3 )
pi.m_nPhysicalProcessors = 1;
pi.m_nLogicalProcessors = 2;
#elif defined(_WIN32) && !defined( _X360 )
SYSTEM_INFO si;
ZeroMemory( &si, sizeof(si) );
GetSystemInfo( &si );
// Sergiy: fixing: si.dwNumberOfProcessors is the number of logical processors according to experiments on i7, P4 and a DirectX sample (Aug'09)
// this is contrary to MSDN documentation on GetSystemInfo()
//
pi.m_nLogicalProcessors = si.dwNumberOfProcessors;
if ( bAuthenticAMD )
{
// quick fix for AMD Phenom: it reports 3 logical cores and 4 physical cores;
// no AMD CPUs by the end of 2009 have HT, so we'll override HT detection here
pi.m_nPhysicalProcessors = pi.m_nLogicalProcessors;
}
else
{
CpuTopology topo;
pi.m_nPhysicalProcessors = topo.NumberOfSystemCores();
}
// Make sure I always report at least one, when running WinXP with the /ONECPU switch,
// it likes to report 0 processors for some reason.
if ( pi.m_nPhysicalProcessors == 0 && pi.m_nLogicalProcessors == 0 )
{
Assert( !"Sergiy: apparently I didn't fix some CPU detection code completely. Let me know and I'll do my best to fix it soon." );
pi.m_nPhysicalProcessors = 1;
pi.m_nLogicalProcessors = 1;
}
#elif defined(LINUX)
pi.m_nLogicalProcessors = 0;
pi.m_nPhysicalProcessors = 0;
const int k_cMaxProcessors = 256;
bool rgbProcessors[k_cMaxProcessors];
memset( rgbProcessors, 0, sizeof( rgbProcessors ) );
int cMaxCoreId = 0;
FILE *fpCpuInfo = fopen( "/proc/cpuinfo", "r" );
if ( fpCpuInfo )
{
char rgchLine[256];
while ( fgets( rgchLine, sizeof( rgchLine ), fpCpuInfo ) )
{
if ( !strncasecmp( rgchLine, "processor", strlen( "processor" ) ) )
{
pi.m_nLogicalProcessors++;
}
if ( !strncasecmp( rgchLine, "core id", strlen( "core id" ) ) )
{
char *pchValue = strchr( rgchLine, ':' );
cMaxCoreId = MAX( cMaxCoreId, atoi( pchValue + 1 ) );
}
if ( !strncasecmp( rgchLine, "physical id", strlen( "physical id" ) ) )
{
// it seems (based on survey data) that we can see
// processor N (N > 0) when it's the only processor in
// the system. so keep track of each processor
char *pchValue = strchr( rgchLine, ':' );
int cPhysicalId = atoi( pchValue + 1 );
if ( cPhysicalId < k_cMaxProcessors )
rgbProcessors[cPhysicalId] = true;
}
/* this code will tell us how many physical chips are in the machine, but we want
core count, so for the moment, each processor counts as both logical and physical.
if ( !strncasecmp( rgchLine, "physical id ", strlen( "physical id " ) ) )
{
char *pchValue = strchr( rgchLine, ':' );
pi.m_nPhysicalProcessors = MAX( pi.m_nPhysicalProcessors, atol( pchValue ) );
}
*/
}
fclose( fpCpuInfo );
for ( int i = 0; i < k_cMaxProcessors; i++ )
if ( rgbProcessors[i] )
pi.m_nPhysicalProcessors++;
pi.m_nPhysicalProcessors *= ( cMaxCoreId + 1 );
}
else
{
pi.m_nLogicalProcessors = 1;
pi.m_nPhysicalProcessors = 1;
Assert( !"couldn't read cpu information from /proc/cpuinfo" );
}
#elif defined(OSX)
int num_phys_cpu = 1, num_log_cpu = 1;
size_t len = sizeof(num_phys_cpu);
sysctlbyname( "hw.physicalcpu", &num_phys_cpu, &len, NULL, 0 );
sysctlbyname( "hw.logicalcpu", &num_log_cpu, &len, NULL, 0 );
pi.m_nPhysicalProcessors = num_phys_cpu;
pi.m_nLogicalProcessors = num_log_cpu;
#endif
CpuIdResult_t cpuid0 = cpuid( 0 );
if ( cpuid0.eax >= 1 )
{
CpuIdResult_t cpuid1 = cpuid( 1 );
uint bFPU = cpuid1.edx & 1; // this should always be on on anything we support
// Determine Processor Features:
pi.m_bRDTSC = ( cpuid1.edx >> 4 ) & 1;
pi.m_bCMOV = ( cpuid1.edx >> 15 ) & 1;
pi.m_bFCMOV = ( pi.m_bCMOV && bFPU ) ? 1 : 0;
pi.m_bMMX = ( cpuid1.edx >> 23 ) & 1;
pi.m_bSSE = ( cpuid1.edx >> 25 ) & 1;
pi.m_bSSE2 = ( cpuid1.edx >> 26 ) & 1;
pi.m_bSSE3 = cpuid1.ecx & 1;
pi.m_bSSSE3 = ( cpuid1.ecx >> 9 ) & 1;;
pi.m_bSSE4a = CheckSSE4aTechnology();
pi.m_bSSE41 = ( cpuid1.ecx >> 19 ) & 1;
pi.m_bSSE42 = ( cpuid1.ecx >> 20 ) & 1;
pi.m_b3DNow = Check3DNowTechnology();
pi.m_bAVX = ( cpuid1.ecx >> 28 ) & 1;
pi.m_szProcessorID = ( tchar* )GetProcessorVendorId();
pi.m_szProcessorBrand = ( tchar* )GetProcessorBrand();
pi.m_bHT = ( pi.m_nPhysicalProcessors < pi.m_nLogicalProcessors ); //HTSupported();
pi.m_nModel = cpuid1.eax; // full CPU model info
pi.m_nFeatures[ 0 ] = cpuid1.edx; // x87+ features
pi.m_nFeatures[ 1 ] = cpuid1.ecx; // sse3+ features
pi.m_nFeatures[ 2 ] = cpuid1.ebx; // some additional features
if ( bGenuineIntel )
{
if ( cpuid0.eax >= 4 )
{
// we have CPUID.4, use it to find all the cache parameters
const uint nCachesToQuery = 4; // leve 0 is not used
uint nCacheSizeKiB[ nCachesToQuery ];
for ( uint i = 0; i < nCachesToQuery; ++i )
{
nCacheSizeKiB[ i ] = 0;
}
for ( unsigned long nSub = 0; nSub < 1024 ; ++nSub )
{
CpuIdResult_t cpuid4 = cpuidex( 4, nSub );
uint nCacheType = cpuid4.eax & 0x1F;
if ( nCacheType == 0 )
{
// no more caches
break;
}
if ( nCacheType & 1 )
{
// this cache includes data cache: it's either data or unified. Instuction cache type is 2
uint nCacheLevel = ( cpuid4.eax >> 5 ) & 7;
if ( nCacheLevel < nCachesToQuery )
{
uint nCacheWays = 1 + ( ( cpuid4.ebx >> 22 ) & 0x3F );
uint nCachePartitions = 1 + ( ( cpuid4.ebx >> 12 ) & 0x3F );
uint nCacheLineSize = 1 + ( cpuid4.ebx & 0xFF );
uint nCacheSets = 1 + cpuid4.ecx;
uint nCacheSizeBytes = nCacheWays * nCachePartitions * nCacheLineSize * nCacheSets;
nCacheSizeKiB[ nCacheLevel ] = nCacheSizeBytes >> 10;
}
}
}
pi.m_nL1CacheSizeKb = nCacheSizeKiB[ 1 ];
pi.m_nL2CacheSizeKb = nCacheSizeKiB[ 2 ];
pi.m_nL3CacheSizeKb = nCacheSizeKiB[ 3 ];
}
else if ( cpuid0.eax >= 2 )
{
// get the cache
CpuIdResult_t cpuid2 = cpuid( 2 );
for ( int i = ( cpuid2.eax & 0xFF ); i-- > 0; )
{
InterpretIntelCacheDescriptors( cpuid2.eax & ~0xFF );
InterpretIntelCacheDescriptors( cpuid2.ebx );
InterpretIntelCacheDescriptors( cpuid2.ecx );
InterpretIntelCacheDescriptors( cpuid2.edx );
cpuid2 = cpuid( 2 ); // read the next
}
}
}
}
CpuIdResult_t cpuid0ex = cpuid( 0x80000000 );
if ( bAuthenticAMD )
{
if ( cpuid0ex.eax >= 0x80000005 )
{
CpuIdResult_t cpuid5ex = cpuid( 0x80000005 );
pi.m_nL1CacheSizeKb = cpuid5ex.ecx >> 24;
pi.m_nL1CacheDesc = cpuid5ex.ecx & 0xFFFFFF;
}
if ( cpuid0ex.eax >= 0x80000006 )
{
CpuIdResult_t cpuid6ex = cpuid( 0x80000006 );
pi.m_nL2CacheSizeKb = cpuid6ex.ecx >> 16;
pi.m_nL2CacheDesc = cpuid6ex.ecx & 0xFFFF;
pi.m_nL3CacheSizeKb = ( cpuid6ex.edx >> 18 ) * 512;
pi.m_nL3CacheDesc = cpuid6ex.edx & 0xFFFF;
}
}
else if ( bGenuineIntel )
{
if ( cpuid0ex.eax >= 0x80000006 )
{
// make sure we got the L2 cache info right
pi.m_nL2CacheSizeKb = ( cpuid( 0x80000006 ).ecx >> 16 );
}
}
return pi;
}