2021-07-24 21:11:47 -07:00

996 lines
38 KiB
C++

//================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================
//
// GCM memory allocation mgmt
//
//==================================================================================================
#include "utlmap.h"
#include "sys/tty.h"
#include "convar.h"
#include "ps3gcmmemory.h"
#include "gcmlabels.h"
#include "gcmstate.h"
#include "gcmdrawstate.h"
#include "memdbgon.h"
PLATFORM_OVERRIDE_MEM_ALLOC_INTERNAL_PS3_IMPL
#define HARDWARE_CURSOR_SIZE (64*64*4)
//--------------------------------------------------------------------------------------------------
// GCM memory allocators
//--------------------------------------------------------------------------------------------------
#if 1 // #ifndef _CERT
#define TRACK_ALLOC_STATS 1
#endif
#ifdef GCMLOCALMEMORYBLOCKDEBUG
ConVar r_ps3_gcmnocompact( "r_ps3_gcmnocompact", "0" );
ConVar r_ps3_gcmlowcompact( "r_ps3_gcmlowcompact", "1" );
#endif
static CThreadFastMutex s_AllocMutex;
static int32 s_uiGcmLocalMemoryAllocatorMutexLockCount;
struct CGcmLocalMemoryAllocatorMutexLockCounter_t
{
CGcmLocalMemoryAllocatorMutexLockCounter_t() { Assert( s_uiGcmLocalMemoryAllocatorMutexLockCount >= 0 ); ++ s_uiGcmLocalMemoryAllocatorMutexLockCount; }
~CGcmLocalMemoryAllocatorMutexLockCounter_t() { Assert( s_uiGcmLocalMemoryAllocatorMutexLockCount > 0 ); -- s_uiGcmLocalMemoryAllocatorMutexLockCount; }
};
#define PS3ALLOCMTX AUTO_LOCK( s_AllocMutex ); CGcmLocalMemoryAllocatorMutexLockCounter_t aLockCounter;
bool IsItSafeToRefreshFrontBufferNonInteractivePs3()
{
// NOTE: only main thread can refresh front buffer
if ( !ThreadInMainThread() )
return false;
AUTO_LOCK( s_AllocMutex );
Assert( s_uiGcmLocalMemoryAllocatorMutexLockCount >= 0 );
return s_uiGcmLocalMemoryAllocatorMutexLockCount <= 0;
}
struct CPs3gcmLocalMemoryBlockMutable : public CPs3gcmLocalMemoryBlock
{
inline uint32 & MutableOffset() { return m_nLocalMemoryOffset; }
inline uint32 & MutableSize() { return m_uiSize; }
inline CPs3gcmAllocationType_t & MutableType() { return m_uType; }
inline uint32 & MutableIndex() { return m_uiIndex; }
};
#ifdef GCMLOCALMEMORYBLOCKDEBUG
static const uint64 g_GcmLocalMemoryBlockDebugCookieAllocated = 0xA110CA7EDA110CA7ull;
static const uint64 g_GcmLocalMemoryBlockDebugCookieFree = 0xFEEFEEFEEFEEFEEFllu;
#endif
struct CPs3gcmLocalMemoryAllocator
{
//////////////////////////////////////////////////////////////////////////
//
// Allocated memory tracking
//
uint32 m_nOffsetMin; // RSX Local Memory allocated by Initialization that will never be released
uint32 m_nOffsetMax; // Ceiling of allocatable RSX Local Memory (because the top portion is reserved for zcull/etc.), top portion managed separately
uint32 m_nOffsetUnallocated; // RSX Local Memory offset of not yet allocated memory (between Min and Max)
CUtlVector< CPs3gcmLocalMemoryBlockMutable * > m_arrAllocations; // Sorted array of all allocations
//////////////////////////////////////////////////////////////////////////
//
// Free blocks tracking
//
struct LocalMemoryAllocation_t
{
CPs3gcmLocalMemoryBlockMutable m_block;
uint32 m_uiFenceNumber;
LocalMemoryAllocation_t *m_pNext;
};
LocalMemoryAllocation_t *m_pPendingFreeBlock;
LocalMemoryAllocation_t *m_pFreeBlock;
static uint32 sm_uiFenceNumber;
uint32 m_uiFenceLastKnown;
static uint32 volatile *sm_puiFenceLocation;
//////////////////////////////////////////////////////////////////////////
//
// Implementation
//
inline bool Alloc( CPs3gcmLocalMemoryBlockMutable * RESTRICT pBlock );
inline void Free( CPs3gcmLocalMemoryBlockMutable * RESTRICT pBlock );
inline uint32 Reclaim( bool bForce = false );
inline void Compact();
// Helper methods
inline LocalMemoryAllocation_t * FindFreeBlock( uint32 uiAlignBytes, uint32 uiSize );
inline bool IsFenceCompleted( uint32 uiCurrentFenceValue, uint32 uiCheckStoredFenceValue );
inline void TrackAllocStats( CPs3gcmAllocationType_t uAllocType, int nDelta );
#ifdef GCMLOCALMEMORYBLOCKDEBUG
inline void ValidateAllBlocks();
#endif
}
g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolCount];
uint32 CPs3gcmLocalMemoryAllocator::sm_uiFenceNumber = 1;
uint32 volatile * CPs3gcmLocalMemoryAllocator::sm_puiFenceLocation;
// RSX memory usage stats tracking:
static GPUMemoryStats g_RsxMemoryStats;
struct GPUMemoryStats_Pool
{
int nDefaultPoolSize;
int nDefaultPoolUsed;
int nRTPoolUsed;
int nDynamicPoolUsed;
int nMainMemUsed;
int nUnknownPoolUsed;
};
GPUMemoryStats_Pool g_RsxMemoryStats_Pool;
static inline uint32 Ps3gcmHelper_ComputeTiledAreaMemorySize( uint32 nCount, uint32 w, uint32 h, uint32 bpp )
{
uint32 nTilePitch = cellGcmGetTiledPitchSize( w * bpp );
uint32 uiSize = nTilePitch * AlignValue( h, 32 );
uiSize *= nCount;
uiSize = AlignValue( uiSize, PS3GCMALLOCATIONALIGN( kAllocPs3gcmColorBufferMisc ) );
return uiSize;
}
void Ps3gcmLocalMemoryAllocator_Init()
{
PS3ALLOCMTX
if ( !CPs3gcmLocalMemoryAllocator::sm_puiFenceLocation )
{
CPs3gcmLocalMemoryAllocator::sm_puiFenceLocation = cellGcmGetLabelAddress( GCM_LABEL_MEMORY_FREE );
*CPs3gcmLocalMemoryAllocator::sm_puiFenceLocation = 0;
}
// Pool boundaries
uint32 uiGcmAllocBegin = g_ps3gcmGlobalState.m_nLocalBaseOffset;
uint32 uiGcmAllocEnd = uiGcmAllocBegin + g_ps3gcmGlobalState.m_nLocalSize;
// Memory should be allocated for large frame buffers
uint32 uiMemorySizeBuffer[2] = { MAX( 1280, g_ps3gcmGlobalState.m_nRenderSize[0] ), MAX( 720, g_ps3gcmGlobalState.m_nRenderSize[1] ) };
uint32 uiFactor[2] = { uiMemorySizeBuffer[0]*uiMemorySizeBuffer[1], 1280*720 };
// Configuration of pool memory (can be #ifdef'd for every game)
static const uint32 s_PoolMemoryLayout[/*kGcmAllocPoolCount*/] =
{
#if defined( CSTRIKE15 )
// mhansen - We had to adjust the memory values a bit for cstrike15 to get a map to load
// PS3_BUILDFIX - We need to revisit this to determine the proper size later on
/*kGcmAllocPoolDefault = */ 0,
/*kGcmAllocPoolDynamicNewPath = */ 6 * 1024 * 1024, // 5 MB
/*kGcmAllocPoolDynamic = */ 11 * 1024 * 1024, // 11 MB
/*kGcmAllocPoolTiledColorFB = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 2 + CPs3gcmDisplay::SURFACE_COUNT, uiMemorySizeBuffer[0], uiMemorySizeBuffer[1], 4 ),
// 2 buffers allocated in CreateRSXBuffers + 2 _rt_fullFrameFB - can probably get this down if...
// 1. we clean up the post-pro rendering to use the front buffer as a textureand
// 2. tidy up aliasing for rt_fullframeFB and rt_fullFrameFB1
/*kGcmAllocPoolTiledColorFBQ = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 2, uiMemorySizeBuffer[0]/4, uiMemorySizeBuffer[1]/4, 4 ), // fits 2 1/4 size framebuffer textures
/*kGcmAllocPoolTiledColor512 = */ 0,
/*kGcmAllocPoolTiledColorMisc = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 1, 640, 640, 4 ) // RTT shadows ?
+ Ps3gcmHelper_ComputeTiledAreaMemorySize( 2, 1024, 512, 4) // Water
+ Ps3gcmHelper_ComputeTiledAreaMemorySize(1, 32, 32, 4), // Eye Glint
/*kGcmAllocPoolTiledD24S8 = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 1, 640*2, 640*2, 2)
+ Ps3gcmHelper_ComputeTiledAreaMemorySize(1, 640, 640, 2) // CSM and Flashlight
+ Ps3gcmHelper_ComputeTiledAreaMemorySize( 1, uiMemorySizeBuffer[0], uiMemorySizeBuffer[1], 4 ), // Main depth buffer
/*kGcmAllocPoolMainMemory = */ 0, // configured based on mapped IO memory
/*kGcmAllocPoolMallocMemory = */ 0, // using malloc
#else
/*kGcmAllocPoolDefault = */ 0,
/*kGcmAllocPoolDynamicNewPath = */ 5 * 1024 * 1024, // 5 MB
/*kGcmAllocPoolDynamic = */ 10 * 1024 * 1024, // 10 MB
/*kGcmAllocPoolTiledColorFB = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 2 * CPs3gcmDisplay::SURFACE_COUNT, uiMemorySizeBuffer[0], uiMemorySizeBuffer[1], 4 ), // fits 6 of full framebuffer textures
/*kGcmAllocPoolTiledColorFBQ = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 4, uiMemorySizeBuffer[0]/4, uiMemorySizeBuffer[1]/4, 4 ), // fits 4 quarters of framebuffer textures
/*kGcmAllocPoolTiledColor512 = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 2, 512, 512, 4 ), // fits 2 512x512 RGBA textures
/*kGcmAllocPoolTiledColorMisc = */ 5 * 1024 * 1024, // 5 MB
/*kGcmAllocPoolTiledD24S8 = */ uint64( 15 * 1024 * 1024 ) * uiFactor[0]/uiFactor[1], // 15 MB
/*kGcmAllocPoolMainMemory = */ 0, // configured based on mapped IO memory
/*kGcmAllocPoolMallocMemory = */ 0, // using malloc
#endif
};
COMPILE_TIME_ASSERT( ARRAYSIZE( s_PoolMemoryLayout ) == ARRAYSIZE( g_ps3gcmLocalMemoryAllocator ) );
for ( int j = ARRAYSIZE( g_ps3gcmLocalMemoryAllocator ); j -- > 0; )
{
const uint32 uiSize = AlignValue( s_PoolMemoryLayout[j], 1024 * 1024 ); // Align it on 1 MB boundaries, all our pools are large
g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetMax = uiGcmAllocEnd;
uiGcmAllocEnd -= uiSize;
g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetMin =
g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetUnallocated = uiGcmAllocEnd;
}
// Default pool setup (rest of local memory)
g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolDefault ].m_nOffsetMax = uiGcmAllocEnd;
g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolDefault ].m_nOffsetMin =
g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolDefault ].m_nOffsetUnallocated = uiGcmAllocBegin + HARDWARE_CURSOR_SIZE;
// Main memory mapped pool
g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMainMemory ].m_nOffsetMin =
g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMainMemory ].m_nOffsetUnallocated = uint32( g_ps3gcmGlobalState.m_pRsxMainMemoryPoolBuffer ) + g_ps3gcmGlobalState.m_nIoOffsetDelta;
g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMainMemory ].m_nOffsetMax = g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMainMemory ].m_nOffsetMin + g_ps3gcmGlobalState.m_nRsxMainMemoryPoolBufferSize;
// Store initial capacity for memory stats tracking:
g_RsxMemoryStats.nGPUMemSize = g_ps3gcmGlobalState.m_nLocalSize;
g_RsxMemoryStats_Pool.nDefaultPoolSize = g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolDefault ].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolDefault ].m_nOffsetMin;
//
// Setup preset tiled regions
//
{
CPs3gcmAllocationPool_t ePool = kGcmAllocPoolTiledColorFB;
uint8 uiBank = 0; // bank 0..3
uint32 nRenderPitch = cellGcmGetTiledPitchSize( g_ps3gcmGlobalState.m_nRenderSize[0] * 4 );
uint8 uiTileIndex = ePool - kGcmAllocPoolTiledColorFB;
cellGcmSetTileInfo( uiTileIndex, CELL_GCM_LOCATION_LOCAL,
g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin,
g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin,
nRenderPitch, CELL_GCM_COMPMODE_DISABLED,
( g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin - g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolTiledColorFB ].m_nOffsetMin ) / 0x10000, // The area base + size/0x10000 will be allocated as the tag area.
uiBank );
cellGcmBindTile( uiTileIndex );
}
{
CPs3gcmAllocationPool_t ePool = kGcmAllocPoolTiledColorFBQ;
uint8 uiBank = 1; // bank 0..3
uint32 nRenderPitch = cellGcmGetTiledPitchSize( g_ps3gcmGlobalState.m_nRenderSize[0] * 4 / 4 );
uint8 uiTileIndex = ePool - kGcmAllocPoolTiledColorFB;
cellGcmSetTileInfo( uiTileIndex, CELL_GCM_LOCATION_LOCAL,
g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin,
g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin,
nRenderPitch, CELL_GCM_COMPMODE_DISABLED,
( g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin - g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolTiledColorFB ].m_nOffsetMin ) / 0x10000, // The area base + size/0x10000 will be allocated as the tag area.
uiBank );
cellGcmBindTile( uiTileIndex );
}
{
CPs3gcmAllocationPool_t ePool = kGcmAllocPoolTiledColor512;
uint8 uiBank = 2; // bank 0..3
uint32 nRenderPitch = cellGcmGetTiledPitchSize( 512 * 4 );
uint8 uiTileIndex = ePool - kGcmAllocPoolTiledColorFB;
cellGcmSetTileInfo( uiTileIndex, CELL_GCM_LOCATION_LOCAL,
g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin,
g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin,
nRenderPitch, CELL_GCM_COMPMODE_DISABLED,
( g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin - g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolTiledColorFB ].m_nOffsetMin ) / 0x10000, // The area base + size/0x10000 will be allocated as the tag area.
uiBank );
cellGcmBindTile( uiTileIndex );
}
#ifndef _CERT
static const char * s_PoolMemoryNames[] =
{
/*kGcmAllocPoolDefault = */ "Default Pool",
/*kGcmAllocPoolDynamicNewPath = */ "Dynamic New ",
/*kGcmAllocPoolDynamic = */ "Dynamic IBVB",
/*kGcmAllocPoolTiledColorFB = */ "FullFrameRTs",
/*kGcmAllocPoolTiledColorFBQ = */ "1/4Frame RTs",
/*kGcmAllocPoolTiledColor512 = */ "512x512 RTs ",
/*kGcmAllocPoolTiledColorMisc = */ "All Misc RTs",
/*kGcmAllocPoolTiledD24S8 = */ "DepthStencil",
/*kGcmAllocPoolMainMemory = */ "Main Memory ",
/*kGcmAllocPoolMallocMemory = */ "MallocMemory",
};
COMPILE_TIME_ASSERT( ARRAYSIZE( s_PoolMemoryNames ) == ARRAYSIZE( g_ps3gcmLocalMemoryAllocator ) );
Msg( "RSX Local Memory layout:\n" );
for ( int j = 0; j < ARRAYSIZE( g_ps3gcmLocalMemoryAllocator ); ++ j )
{
Msg( " %s 0x%08X - 0x%08X [ %9.3f MB ]\n",
s_PoolMemoryNames[j],
g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetMin,
g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetMax,
(g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetMin) / 1024.f / 1024.f );
}
Msg( "Total size: %d MB\n", g_ps3gcmGlobalState.m_nLocalSize / 1024 / 1024 );
#endif
}
void Ps3gcmLocalMemoryAllocator_Reclaim()
{
PS3ALLOCMTX
for ( int k = 0; k < ARRAYSIZE( g_ps3gcmLocalMemoryAllocator ); ++ k )
g_ps3gcmLocalMemoryAllocator[ k ].Reclaim();
}
void Ps3gcmLocalMemoryAllocator_Compact()
{
PS3ALLOCMTX
#define PS3GCMCOMPACTPROFILE 0
#if PS3GCMCOMPACTPROFILE
float flTimeStart = Plat_FloatTime();
uint32 uiFree = g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated;
#endif
// Let PPU wait for all RSX commands done (include waitFlip)
// Flush GPU right up to current point - Endframe call does this...
gpGcmDrawState->EndFrame();
gpGcmDrawState->CmdBufferFinish();
#if PS3GCMCOMPACTPROFILE
float flTimeWait = Plat_FloatTime() - flTimeStart;
#endif
{
// Locking out memory mgmt for the whole of the compact before this
// PS3ALLOCMTX
for ( int k = 0; k < ARRAYSIZE( g_ps3gcmLocalMemoryAllocator ); ++ k )
{
g_ps3gcmLocalMemoryAllocator[ k ].Compact();
}
}
#if PS3GCMCOMPACTPROFILE
float flTimePrepareTransfer = Plat_FloatTime() - flTimeStart;
#endif
// Wait for all RSX memory to be transferred
gpGcmDrawState->EndFrame();
gpGcmDrawState->CmdBufferFinish();
#if PS3GCMCOMPACTPROFILE
float flTimeDone = Plat_FloatTime() - flTimeStart;
char chBuffer[64];
Q_snprintf( chBuffer, ARRAYSIZE( chBuffer ), "COMPACT: %0.3f / %0.3f / %0.3f sec\n",
flTimeWait, flTimePrepareTransfer, flTimeDone );
uint32 dummy;
sys_tty_write( SYS_TTYP6, chBuffer, Q_strlen( chBuffer ), &dummy );
Q_snprintf( chBuffer, ARRAYSIZE( chBuffer ), "COMPACT: %0.3f -> %0.3f MB (%0.3f MB free)\n",
uiFree / 1024.f / 1024.f, g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated / 1024.f / 1024.f,
(g_ps3gcmLocalMemoryAllocator[0].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated) / 1024.f / 1024.f );
sys_tty_write( SYS_TTYP6, chBuffer, Q_strlen( chBuffer ), &dummy );
#endif
}
void Ps3gcmLocalMemoryAllocator_CompactWithReason( char const *szReason )
{
double flTimeCompactStart = Plat_FloatTime();
DevMsg( "====== GCM LOCAL MEMORY COMPACT : %s =====\n", szReason );
uint32 uiFreeMemoryBeforeCompact = g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated;
DevMsg( "RSX Local Memory Free: %0.3f MB; compacting...\n", (g_ps3gcmLocalMemoryAllocator[0].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated) / 1024.f / 1024.f );
Ps3gcmLocalMemoryAllocator_Compact();
DevMsg( "RSX Local Memory Compacted %0.3f MB in %0.3f sec\n",
(uiFreeMemoryBeforeCompact - g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated) / 1024.f / 1024.f,
Plat_FloatTime() - flTimeCompactStart );
DevMsg( "RSX Local Memory Free: %0.3f MB\n", (g_ps3gcmLocalMemoryAllocator[0].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated) / 1024.f / 1024.f );
}
bool CPs3gcmLocalMemoryBlock::Alloc()
{
PS3ALLOCMTX
return g_ps3gcmLocalMemoryAllocator[PS3GCMALLOCATIONPOOL(m_uType)].Alloc( reinterpret_cast< CPs3gcmLocalMemoryBlockMutable * >( this ) );
}
void CPs3gcmLocalMemoryBlock::Free()
{
PS3ALLOCMTX
g_ps3gcmLocalMemoryAllocator[PS3GCMALLOCATIONPOOL(m_uType)].Free( reinterpret_cast< CPs3gcmLocalMemoryBlockMutable * >( this ) );
}
//////////////////////////////////////////////////////////////////////////
//
// Private implementation of PS3 local memory allocator
//
inline bool CPs3gcmLocalMemoryAllocator::Alloc( CPs3gcmLocalMemoryBlockMutable * RESTRICT pBlock )
{
TrackAllocStats( pBlock->MutableType(), pBlock->MutableSize() );
uint32 uAlignBytes = PS3GCMALLOCATIONALIGN( pBlock->MutableType() );
Assert( IsPowerOfTwo( uAlignBytes ) );
double flAllocatorStallTime = 0.0f;
bool bCompactPerformed = false;
#ifdef GCMLOCALMEMORYBLOCKDEBUG
bCompactPerformed = !r_ps3_gcmlowcompact.GetBool();
#endif
retry_allocation:
// Try to find a free block
if ( LocalMemoryAllocation_t *pFreeBlock = FindFreeBlock( uAlignBytes, pBlock->MutableSize() ) )
{
pBlock->MutableOffset() = pFreeBlock->m_block.MutableOffset();
pBlock->MutableIndex() = pFreeBlock->m_block.MutableIndex();
#ifdef GCMLOCALMEMORYBLOCKDEBUG
if ( m_arrAllocations[ pBlock->MutableIndex() ] != &pFreeBlock->m_block )
Error( "<vitaliy> GCM Local Memory Allocator Error (attempt to reuse invalid free block)!" );
#endif
m_arrAllocations[ pBlock->MutableIndex() ] = reinterpret_cast< CPs3gcmLocalMemoryBlockMutable * >( pBlock );
delete pFreeBlock;
}
else if ( this != &g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMallocMemory ] )
{
// Allocate new block
uint32 uiOldUnallocatedEdge = m_nOffsetUnallocated;
uint32 uiFreeBlock = ( m_nOffsetUnallocated + uAlignBytes - 1 ) & ~( uAlignBytes - 1 );
// Check if there's enough space in this pool for the requested block
if ( uiFreeBlock + pBlock->MutableSize() > m_nOffsetMax )
{
// There's not enough space in this pool
if ( m_pPendingFreeBlock )
{
// There are pending free blocks, we just need to wait for
// RSX to finish rendering using them
if ( !flAllocatorStallTime )
{
flAllocatorStallTime = Plat_FloatTime();
// Flush GPU right up to current point - Endframe call does this...
gpGcmDrawState->EndFrame();
gpGcmDrawState->CmdBufferFlush();
}
while ( Reclaim() < pBlock->MutableSize() && m_pPendingFreeBlock )
{
ThreadSleep( 1 );
}
goto retry_allocation;
}
else if ( !bCompactPerformed )
{
if (this == &g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMainMemory ]) return false;
// Let PPU wait for all RSX commands done
gpGcmDrawState->EndFrame();
gpGcmDrawState->CmdBufferFinish();
uint32 uiFragmentedFreeSpace = m_nOffsetMax - m_nOffsetUnallocated;
for ( LocalMemoryAllocation_t *pFreeFragment = m_pFreeBlock; pFreeFragment; pFreeFragment = pFreeFragment->m_pNext )
uiFragmentedFreeSpace += pFreeFragment->m_block.MutableSize();
Warning(
"**************** GCM LOCAL MEMORY LOW *****************\n"
"<vitaliy> GCM Local Memory Allocator#%d pool compacting!\n"
" Requested allocation %u bytes.\n"
" Pool capacity %u bytes.\n"
" Free fragmented space %u bytes.\n"
" Unallocated %u bytes.\n"
" Used %u bytes.\n",
this - g_ps3gcmLocalMemoryAllocator,
( uint32 ) pBlock->MutableSize(),
m_nOffsetMax - m_nOffsetMin,
uiFragmentedFreeSpace,
m_nOffsetMax - m_nOffsetUnallocated,
m_nOffsetUnallocated - m_nOffsetMin
);
Compact();
Warning( " ---> Compacted pool#%d has %u unallocated bytes.\n",
this - g_ps3gcmLocalMemoryAllocator,
m_nOffsetMax - m_nOffsetUnallocated );
bCompactPerformed = true;
// Wait for all RSX memory to be transferred
gpGcmDrawState->EndFrame();
gpGcmDrawState->CmdBufferFinish();
goto retry_allocation;
}
else
{
// Main memory pool returns failure so caller can try local pool.
if (this == &g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMainMemory ]) return false;
uint32 uiFragmentedFreeSpace = m_nOffsetMax - m_nOffsetUnallocated;
for ( LocalMemoryAllocation_t *pFreeFragment = m_pFreeBlock; pFreeFragment; pFreeFragment = pFreeFragment->m_pNext )
uiFragmentedFreeSpace += pFreeFragment->m_block.MutableSize();
Error(
"********* OUT OF GCM LOCAL MEMORY ********************\n"
"<vitaliy> GCM Local Memory Allocator#%d pool exhausted!\n"
" Failed allocation %u bytes.\n"
" Pool capacity %u bytes.\n"
" Free fragmented space %u bytes.\n"
" Unallocated %u bytes.\n"
" Used %u bytes.\n",
this - g_ps3gcmLocalMemoryAllocator,
( uint32 ) pBlock->MutableSize(),
m_nOffsetMax - m_nOffsetMin,
uiFragmentedFreeSpace,
m_nOffsetMax - m_nOffsetUnallocated,
m_nOffsetUnallocated - m_nOffsetMin
);
}
}
// update the pointer to "unallocated" realm
m_nOffsetUnallocated = uiFreeBlock + pBlock->MutableSize();
// this is the last allocation so far
pBlock->MutableIndex() = m_arrAllocations.AddToTail( reinterpret_cast< CPs3gcmLocalMemoryBlockMutable * >( pBlock ) );
pBlock->MutableOffset() = uiFreeBlock;
}
else
{
MEM_ALLOC_CREDIT_( "GCM Malloc Pool" );
void *pvMallocMemory = MemAlloc_AllocAligned( pBlock->MutableSize(), uAlignBytes );
pBlock->MutableOffset() = (uint32) pvMallocMemory;
pBlock->MutableIndex() = ~0;
}
// 7LTODO if ( flAllocatorStallTime )
// g_ps3gcmGlobalState.m_flAllocatorStallTimeWaitingRSX += Plat_FloatTime() - flAllocatorStallTime;
#ifdef GCMLOCALMEMORYBLOCKDEBUG
// PS3 doesn't allow more than 8 zcull regions (index 0..7)
if ( g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledD24S8].m_arrAllocations.Count() > 8 )
Error( "PS3 number of zcull regions exceeded!\n" );
// PS3 doesn't allow more than 15 tiles regions (index 0..14)
if ( g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledD24S8].m_arrAllocations.Count() +
g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorMisc].m_arrAllocations.Count() +
( kGcmAllocPoolTiledColorMisc - kGcmAllocPoolTiledColorFB )
> 15 )
Error( "PS3 number of tiled regions exceeded!\n" );
pBlock->m_dbgGuardCookie = g_GcmLocalMemoryBlockDebugCookieAllocated;
#endif
return true;
}
inline void CPs3gcmLocalMemoryAllocator::Free( CPs3gcmLocalMemoryBlockMutable * RESTRICT pBlock )
{
#ifdef GCMLOCALMEMORYBLOCKDEBUG
if ( !pBlock ||
pBlock->m_dbgGuardCookie != g_GcmLocalMemoryBlockDebugCookieAllocated ||
( ( pBlock->MutableIndex() != ~0 ) && ( m_arrAllocations[ pBlock->MutableIndex() ] != pBlock ) ) )
{
//DebuggerBreak();
Error( "<vitaliy> Attempt to free not allocated GCM local memory block!" );
}
pBlock->m_dbgGuardCookie = g_GcmLocalMemoryBlockDebugCookieFree;
#endif
LocalMemoryAllocation_t *pDealloc = new LocalMemoryAllocation_t;
pDealloc->m_block = *pBlock;
pDealloc->m_uiFenceNumber = sm_uiFenceNumber;
sm_uiFenceNumber ++;
if(!sm_uiFenceNumber)sm_uiFenceNumber = 1;
pDealloc->m_pNext = m_pPendingFreeBlock;
gpGcmDrawState->SetWriteBackEndLabel(GCM_LABEL_MEMORY_FREE, sm_uiFenceNumber);
m_pPendingFreeBlock = pDealloc;
TrackAllocStats( pBlock->MutableType(), - pBlock->MutableSize() );
if ( pBlock->MutableIndex() != ~0 )
{
#ifdef GCMLOCALMEMORYBLOCKDEBUG
if ( m_arrAllocations[ pBlock->MutableIndex() ] != pBlock )
Error( "<vitaliy> GCM Local Memory Allocator Error (freeing block that is not properly registered)!" );
#endif
m_arrAllocations[ pBlock->MutableIndex() ] = &pDealloc->m_block;
}
#ifdef GCMLOCALMEMORYBLOCKDEBUG
pBlock->MutableOffset() = ~0;
pBlock->MutableIndex() = ~0;
#endif
}
inline bool CPs3gcmLocalMemoryAllocator::IsFenceCompleted( uint32 uiCurrentFenceValue, uint32 uiCheckStoredFenceValue )
{
#if GCM_ALLOW_NULL_FLIPS
extern bool g_ps3_nullflips;
if ( g_ps3_nullflips )
return true;
#endif
// Needs to handle the counter wrapping around
return ( ( uiCurrentFenceValue - m_uiFenceLastKnown ) >= ( uiCheckStoredFenceValue - m_uiFenceLastKnown ) );
}
inline uint32 CPs3gcmLocalMemoryAllocator::Reclaim( bool bForce )
{
uint32 uiLargestBlockSizeReclaimed = 0;
uint32 uiCurrentFenceValue = *sm_puiFenceLocation;
// Walk pending free blocks and see if they are no longer
// in use by RSX:
LocalMemoryAllocation_t **p = &m_pPendingFreeBlock;
if ( !bForce ) while ( (*p) && !IsFenceCompleted( uiCurrentFenceValue, (*p)->m_uiFenceNumber ) )
p = &( (*p)->m_pNext );
// Now p is pointing to the chain of free blocks
// chain that has been completed (due to the nature of
// pushing new deallocation at the head of the pending
// list)
if ( *p )
{
LocalMemoryAllocation_t *pCompletedChain = *p;
*p = NULL; // Terminate the chain
// Handle the special case of malloc reclaim - free all memory
if ( this == &g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMallocMemory ] )
{
MEM_ALLOC_CREDIT_( "GCM Malloc Pool" );
for ( LocalMemoryAllocation_t *pActualFree = pCompletedChain; pActualFree; )
{
MemAlloc_FreeAligned( pActualFree->m_block.DataInMallocMemory() );
LocalMemoryAllocation_t *pDelete = pActualFree;
pActualFree = pActualFree->m_pNext;
delete pDelete;
}
pCompletedChain = NULL;
}
// Relink the completed pending chain into
// the free blocks chain
LocalMemoryAllocation_t **ppFree = &m_pFreeBlock;
while ( *ppFree )
ppFree = &( (*ppFree)->m_pNext );
*ppFree = pCompletedChain;
// Recompute actual free sizes of the completed chain
// Actual free size is the delta between block offset and next block offset
// When there's no next block then its delta between block offset and unallocated edge
for ( LocalMemoryAllocation_t *pActualFree = pCompletedChain; pActualFree; pActualFree = pActualFree->m_pNext )
{
uint32 uiIdx = pActualFree->m_block.MutableIndex() + 1;
uint32 uiNextOffset = m_nOffsetUnallocated;
if ( uiIdx < m_arrAllocations.Count() )
{
CPs3gcmLocalMemoryBlockMutable * RESTRICT pNextBlock = m_arrAllocations[ uiIdx ];
uiNextOffset = pNextBlock->Offset();
}
uint32 uiActualBlockSize = uiNextOffset - pActualFree->m_block.Offset();
pActualFree->m_block.MutableSize() = uiActualBlockSize;
uiLargestBlockSizeReclaimed = MAX( uiLargestBlockSizeReclaimed, uiActualBlockSize );
}
}
// Remember the last known fence value
m_uiFenceLastKnown = uiCurrentFenceValue;
#ifdef GCMLOCALMEMORYBLOCKDEBUG
ValidateAllBlocks();
#endif
return uiLargestBlockSizeReclaimed;
}
inline CPs3gcmLocalMemoryAllocator::LocalMemoryAllocation_t * CPs3gcmLocalMemoryAllocator::FindFreeBlock( uint32 uiAlignBytes, uint32 uiSize )
{
LocalMemoryAllocation_t **ppBest = NULL;
uint32 uiSizeMax = uiSize * 11/10; // we don't want to inflate requested size by > 10%
for ( LocalMemoryAllocation_t **p = &m_pFreeBlock;
(*p);
p = &( (*p)->m_pNext ) )
{
if ( (*p)->m_block.MutableSize() >= uiSize && (*p)->m_block.MutableSize() <= uiSizeMax &&
!( (*p)->m_block.Offset() & ( uiAlignBytes - 1 ) ) )
{
if ( !ppBest || ( (*p)->m_block.MutableSize() <= (*ppBest)->m_block.MutableSize() ) )
{
ppBest = p;
}
}
}
if ( ppBest )
{
LocalMemoryAllocation_t *pFree = (*ppBest);
(*ppBest) = pFree->m_pNext;
pFree->m_pNext = NULL;
return pFree;
}
return NULL;
}
inline bool TrackAllocStats_Pool( CPs3gcmAllocationType_t uAllocType, int nDelta )
{
CPs3gcmAllocationPool_t pool = PS3GCMALLOCATIONPOOL( uAllocType );
int *stat = &g_RsxMemoryStats_Pool.nUnknownPoolUsed;
bool bInRSXMem = true;
switch( pool )
{
case kGcmAllocPoolDefault:
stat = &g_RsxMemoryStats_Pool.nDefaultPoolUsed;
break;
case kGcmAllocPoolDynamicNewPath:
case kGcmAllocPoolDynamic:
stat = &g_RsxMemoryStats_Pool.nDynamicPoolUsed;
break;
case kGcmAllocPoolTiledColorFB:
case kGcmAllocPoolTiledColorFBQ:
case kGcmAllocPoolTiledColor512:
case kGcmAllocPoolTiledColorMisc:
case kGcmAllocPoolTiledD24S8:
stat = &g_RsxMemoryStats_Pool.nRTPoolUsed;
break;
case kGcmAllocPoolMainMemory: // Unused, unless PS3GCM_VBIB_IN_IO_MEMORY set to 1
case kGcmAllocPoolMallocMemory:
stat = &g_RsxMemoryStats_Pool.nMainMemUsed;
bInRSXMem = false; // In main memory!
break;
}
*stat += nDelta;
Assert( 0 <= (int)*stat );
// Report free memory only from the default pool (the other pools are pre-sized to fixed limits, and all
// geom/textures go into the default pool, so that's where content-driven variation/failures will occur)
g_RsxMemoryStats.nGPUMemFree = g_RsxMemoryStats_Pool.nDefaultPoolSize - g_RsxMemoryStats_Pool.nDefaultPoolUsed;
return bInRSXMem;
}
inline void CPs3gcmLocalMemoryAllocator::TrackAllocStats( CPs3gcmAllocationType_t uAllocType, int nDelta )
{
#if TRACK_ALLOC_STATS
// Early-out for allocations not in RSX memory:
if ( !TrackAllocStats_Pool( uAllocType, nDelta ) )
return;
unsigned int *stat = &g_RsxMemoryStats.nUnknown;
switch( uAllocType )
{
case kAllocPs3gcmColorBufferMisc:
case kAllocPs3gcmColorBufferFB:
case kAllocPs3gcmColorBufferFBQ:
case kAllocPs3gcmColorBuffer512:
case kAllocPs3gcmDepthBuffer:
stat = &g_RsxMemoryStats.nRTSize;
break;
case kAllocPs3gcmTextureData:
case kAllocPs3gcmTextureData0:
stat = &g_RsxMemoryStats.nTextureSize;
break;
case kAllocPs3GcmVertexBuffer:
stat = &g_RsxMemoryStats.nVBSize;
break;
case kAllocPs3GcmIndexBuffer:
stat = &g_RsxMemoryStats.nIBSize;
break;
case kAllocPs3GcmShader:
case kAllocPs3GcmEdgeGeomBuffer:
case kAllocPs3GcmVertexBufferDynamic:
case kAllocPs3GcmIndexBufferDynamic:
case kAllocPs3GcmDynamicBufferPool:
case kAllocPs3GcmVertexBufferDma:
case kAllocPs3GcmIndexBufferDma:
// Treat these as misc unless they become big/variable
break;
}
*stat += nDelta;
Assert( 0 <= (int)*stat );
#endif // TRACK_ALLOC_STATS
}
#ifdef GCMLOCALMEMORYBLOCKDEBUG
#define VALIDATECONDITION( x ) if( !( x ) ) { Error( "<vitaliy> GCM Local Memory Allocation block %p index %d is corrupt [line %d]!\n", pBlock, k, __LINE__ ); }
inline void CPs3gcmLocalMemoryAllocator::ValidateAllBlocks()
{
// Traverse the allocated list and validate debug guards and patch-back indices
CUtlVector< uint32 > arrFreeBlocksIdx;
uint32 uiLastAllocatedOffset = m_nOffsetMin;
for ( int k = 0, kEnd = m_arrAllocations.Count(); k < kEnd; ++ k )
{
CPs3gcmLocalMemoryBlockMutable * RESTRICT pBlock = m_arrAllocations[k];
VALIDATECONDITION( pBlock );
VALIDATECONDITION( pBlock->m_dbgGuardCookie == g_GcmLocalMemoryBlockDebugCookieAllocated || pBlock->m_dbgGuardCookie == g_GcmLocalMemoryBlockDebugCookieFree );
VALIDATECONDITION( pBlock->MutableIndex() < m_arrAllocations.Count() );
VALIDATECONDITION( pBlock->MutableIndex() == k );
VALIDATECONDITION( m_arrAllocations[ pBlock->MutableIndex() ] == pBlock );
VALIDATECONDITION( pBlock->Offset() >= uiLastAllocatedOffset );
uiLastAllocatedOffset = pBlock->Offset() + pBlock->MutableSize();
VALIDATECONDITION( uiLastAllocatedOffset <= m_nOffsetMax );
if ( pBlock->m_dbgGuardCookie == g_GcmLocalMemoryBlockDebugCookieFree )
arrFreeBlocksIdx.AddToTail( k );
}
// Traverse free lists and validate
LocalMemoryAllocation_t * arrFree[] = { m_pPendingFreeBlock, m_pFreeBlock };
for ( int j = 0; j < ARRAYSIZE( arrFree ); ++ j )
for ( LocalMemoryAllocation_t *p = arrFree[j]; p; p = p->m_pNext )
{
int k = j;
CPs3gcmLocalMemoryBlockMutable * RESTRICT pBlock = &p->m_block;
VALIDATECONDITION( pBlock );
VALIDATECONDITION( pBlock->m_dbgGuardCookie == g_GcmLocalMemoryBlockDebugCookieFree );
k = pBlock->MutableIndex();
if ( pBlock->MutableIndex() != ~0 )
{
VALIDATECONDITION( pBlock->MutableIndex() < m_arrAllocations.Count() );
VALIDATECONDITION( m_arrAllocations[ pBlock->MutableIndex() ] == pBlock );
VALIDATECONDITION( arrFreeBlocksIdx.FindAndFastRemove( pBlock->MutableIndex() ) );
}
}
int k = 0;
void *pBlock = 0;
VALIDATECONDITION( !arrFreeBlocksIdx.Count() );
}
#endif
inline void CPs3gcmLocalMemoryAllocator::Compact()
{
#ifdef GCMLOCALMEMORYBLOCKDEBUG
ValidateAllBlocks();
if ( r_ps3_gcmnocompact.GetBool() )
return;
#endif
// Reclaim all memory (NOTE: all pending blocks must be reclaimed since both RSX and PPU have stopped rendering!)
Reclaim();
#ifdef GCMLOCALMEMORYBLOCKDEBUG
if ( m_pPendingFreeBlock )
Warning( "GCM Local Memory Allocator Compact forces pending free blocks to be reclaimed.\n" );
ValidateAllBlocks();
#endif
if ( m_pPendingFreeBlock )
Reclaim( true );
#ifdef GCMLOCALMEMORYBLOCKDEBUG
if ( m_pPendingFreeBlock )
Error( "<vitaliy> GCM Local Memory Allocator Compact requires RSX and PPU rendering to be paused! (pending free blocks have not been reclaimed)\n" );
ValidateAllBlocks();
#endif
// Walk the free blocks chain and patch-back NULL pointers into allocation tracking system
while ( m_pFreeBlock )
{
LocalMemoryAllocation_t *p = m_pFreeBlock;
m_pFreeBlock = p->m_pNext;
m_arrAllocations[ p->m_block.MutableIndex() ] = NULL;
delete p;
}
Assert( !m_pFreeBlock && !m_pPendingFreeBlock );
// These are elements requiring reallocation
uint32 uiCount = m_arrAllocations.Count();
CPs3gcmLocalMemoryBlockMutable **pReallocationBlocks = m_arrAllocations.Base();
// Here "correct" implementation would be to copy off m_arrAllocations vector onto stack for iteration,
// RemoveAll from m_arrAllocations vector and allocate all blocks again.
// We will cheat since we know that we will allocate same number of elements and directly write zero
// into m_arrAllocations m_Size member, then we will still be able to use the memory of the vector
// for reading blocks requiring compact reallocation, and AddToTail will still fill the vector with
// correct data.
struct AllocatorCompactVectorCheat : public CUtlVector< CPs3gcmLocalMemoryBlockMutable * > { inline void ResetCountPreservingMemoryContents() { m_Size = 0; } };
( ( AllocatorCompactVectorCheat * ) ( char * ) &m_arrAllocations )->ResetCountPreservingMemoryContents();
m_nOffsetUnallocated = m_nOffsetMin;
// Prepare RSX for data buffer transfers in local memory
uint nTransferMode = ( ( this - &g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolDefault ] ) < kGcmAllocPoolMainMemory ) ? CELL_GCM_TRANSFER_LOCAL_TO_LOCAL : CELL_GCM_TRANSFER_MAIN_TO_MAIN;
Assert( nTransferMode < 4 );
// Reallocate all blocks
for ( ; uiCount; -- uiCount, ++ pReallocationBlocks )
{
CPs3gcmLocalMemoryBlockMutable *pBlock = *pReallocationBlocks;
if ( !pBlock )
continue;
uint32 nOldOffset = pBlock->Offset();
char* pOldAddress = pBlock->DataInAnyMemory();
TrackAllocStats( pBlock->MutableType(), - pBlock->MutableSize() );
Alloc( pBlock );
if ( nOldOffset == pBlock->Offset() )
continue;
// Have RSX transfer blocks data. RSX may hang if there's WriteLabel between the Format and Offset commands,
// so reserve space for both of them up front
// SpuDrawTransfer_t * pTransfer = g_spuGcm.GetDrawQueue()->AllocWithHeader<SpuDrawTransfer_t>( SPUDRAWQUEUE_TRANSFER_METHOD | nTransferMode );
// pTransfer->m_nLineSize = pBlock->MutableSize();
// pTransfer->m_nOldOffset = nOldOffset;
// pTransfer->m_nNewOffset = pBlock->Offset();
// 7LTODO
uint32 uiLineSize = pBlock->MutableSize();
uint32 uiLineOffset = 0;
const uint nMaxTransferSize = 0x3FFFFF;
cellGcmReserveMethodSizeInline(gpGcmContext, 0x4000/4);
GCM_FUNC( cellGcmSetTransferDataMode, nTransferMode );
int i = 1;
do
{
uint32 uiTransferSize = Min<uint32>( uiLineSize, nMaxTransferSize );
GCM_FUNC( cellGcmSetTransferDataFormat, 0, 0, uiTransferSize, 1, 1, 1 );
GCM_FUNC( cellGcmSetTransferDataOffset, pBlock->Offset() + uiLineOffset, nOldOffset + uiLineOffset );
uiLineSize -= uiTransferSize;
uiLineOffset += uiTransferSize;
i++;
}
while ( uiLineSize > 0 );
// V_memmove(pBlock->DataInAnyMemory(), pOldAddress, pBlock->MutableSize() );
}
#ifdef GCMLOCALMEMORYBLOCKDEBUG
ValidateAllBlocks();
#endif
}
//////////////////////////////////////////////////////////////////////////
//
// Computation of tiled memory
//
uint32 CPs3gcmLocalMemoryBlock::TiledMemoryTagAreaBase() const
{
CPs3gcmAllocationPool_t ePool = PS3GCMALLOCATIONPOOL(m_uType);
if ( ePool == kGcmAllocPoolTiledColorMisc ) // Misc color tiles are placed at the front of tag area after preset pools
return ( Offset() - g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorFB].m_nOffsetMin ) / 0x10000;
if ( ePool == kGcmAllocPoolTiledD24S8 ) // Depth tiles are placed in the end of tag area (0-0x7FF is offset range)
return 0x800 - ( Offset() - g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledD24S8].m_nOffsetMin + m_uiSize ) / 0x10000;
if ( ePool == kGcmAllocPoolTiledColorFB ) // FB color tiles go first
return ( g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorFB].m_nOffsetMin - g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorFB].m_nOffsetMin ) / 0x10000;
if ( ePool == kGcmAllocPoolTiledColorFBQ ) // FBQ color tiles go next
return ( g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorFBQ].m_nOffsetMin - g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorFB].m_nOffsetMin ) / 0x10000;
if ( ePool == kGcmAllocPoolTiledColor512 ) // 512 color tiles go next
return ( g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColor512].m_nOffsetMin - g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorFB].m_nOffsetMin ) / 0x10000;
#ifdef GCMLOCALMEMORYBLOCKDEBUG
Error( "<vitaliy> Cannot compute tiled memory tag base from a non-tiled-pool allocation!\n" );
#endif
return ~0;
}
uint32 CPs3gcmLocalMemoryBlock::TiledMemoryIndex() const
{
CPs3gcmAllocationPool_t ePool = PS3GCMALLOCATIONPOOL(m_uType);
if ( ePool == kGcmAllocPoolTiledColorMisc ) // Color tiles are placed in the front
return m_uiIndex + kGcmAllocPoolTiledColorMisc - kGcmAllocPoolTiledColorFB;
if ( ePool == kGcmAllocPoolTiledD24S8 ) // Depth tiles are placed as last tiles
return 14 - m_uiIndex;
return ePool - kGcmAllocPoolTiledColorFB;
}
uint32 CPs3gcmLocalMemoryBlock::ZcullMemoryIndex() const
{
CPs3gcmAllocationPool_t ePool = PS3GCMALLOCATIONPOOL(m_uType);
if ( ePool == kGcmAllocPoolTiledD24S8 ) // Depth tiles are the only zcull tiles
return m_uiIndex;
#ifdef GCMLOCALMEMORYBLOCKDEBUG
Error( "<vitaliy> Cannot compute zcull index from a non-zcull allocation!\n" );
#endif
return ~0;
}
uint32 CPs3gcmLocalMemoryBlock::ZcullMemoryStart() const
{
CPs3gcmAllocationPool_t ePool = PS3GCMALLOCATIONPOOL(m_uType);
if ( ePool == kGcmAllocPoolTiledD24S8 ) // Depth tiles are the only zcull tiles
return ( Offset() - g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledD24S8].m_nOffsetMin ) / 4; // 1 byte per pixel, D24S8 is 4 bytes per pixel, implicitly 4096 aligned because offset is 64Kb aligned
#ifdef GCMLOCALMEMORYBLOCKDEBUG
Error( "<vitaliy> Cannot compute zcull memory start from a non-zcull allocation!\n" );
#endif
return ~0;
}
//////////////////////////////////////////////////////////////////////////
//
// Allow shaderapi to query GPU memory stats:
//
void GetGPUMemoryStats( GPUMemoryStats &stats )
{
stats = g_RsxMemoryStats;
}