224 lines
6.9 KiB
C++
224 lines
6.9 KiB
C++
//========== Copyright © Valve Corporation, All rights reserved. ========
|
|
// This is shared between SPU and PPU
|
|
|
|
#ifndef COMMON_PS3_VJOBUTILS_SHARED_HDR
|
|
#define COMMON_PS3_VJOBUTILS_SHARED_HDR
|
|
|
|
|
|
|
|
#ifndef _PS3
|
|
#error "This is PS3 specific header"
|
|
#endif
|
|
|
|
#include "spu_job_shared.h"
|
|
#include <cell/spurs/job_descriptor.h>
|
|
|
|
|
|
template <typename T>
|
|
inline void AddInputDma( T * pJob, uint nSize, const void * pEa )
|
|
{
|
|
Assert( !( nSize & 0xF ) );
|
|
Assert( !( pJob->header.sizeDmaList & ( sizeof( uint64 ) - 1 ) ) );
|
|
//int nError = cellSpursJobGetInputList( &pJob->workArea.dmaList[pJob->header.sizeDmaList / sizeof( uint64_t )], nSize, (uint32) pEa );
|
|
//Assert( nError == CELL_OK );
|
|
pJob->workArea.dmaList[pJob->header.sizeDmaList / sizeof( uint64_t )] = ( uint64( nSize ) << 32 ) | ( uint32 )pEa;
|
|
|
|
// ( nSIze << 32 ) | pEa
|
|
pJob->header.sizeDmaList += sizeof( uint64_t );
|
|
pJob->header.sizeInOrInOut += nSize;
|
|
Assert( pJob->header.sizeDmaList <= sizeof( pJob->workArea ) );
|
|
}
|
|
|
|
template <typename T>
|
|
inline void AlignInputDma( T * pJob )
|
|
{
|
|
pJob->workArea.dmaList[pJob->header.sizeDmaList / sizeof( uint64_t )] = 0;
|
|
pJob->header.sizeDmaList = AlignValue( pJob->header.sizeDmaList, 16 );
|
|
Assert( pJob->header.sizeDmaList <= sizeof( pJob->workArea ) );
|
|
}
|
|
|
|
inline void AddCacheDma( struct CellSpursJob128 * pJob, uint nSize, const void * pEa )
|
|
{
|
|
int nError = cellSpursJobGetInputList( &pJob->workArea.dmaList[( pJob->header.sizeDmaList + pJob->header.sizeCacheDmaList ) / sizeof( uint64 ) ], nSize, (uint32)pEa );
|
|
( void )nError;
|
|
Assert( nError == CELL_OK );
|
|
pJob->header.sizeCacheDmaList += sizeof( uint64_t );
|
|
}
|
|
|
|
inline uint64 MakeDmaElement( void * pData, uint nSize )
|
|
{
|
|
return ((uint32)pData) | (uint64(nSize)<<32);
|
|
}
|
|
|
|
template <uint nSize>
|
|
inline void V_memcpy16( void *pDest, const void * pSrc )
|
|
{
|
|
Assert( !( nSize & 0xF ) && !( uintp( pDest ) & 0xF ) && !( uintp( pSrc ) & 0xF ) );
|
|
for( uint i = 0; i < nSize / 16; ++i )
|
|
{
|
|
( ( vector unsigned int * ) pDest )[i] = ( ( vector unsigned int * ) pSrc )[i];
|
|
}
|
|
}
|
|
|
|
class CDmaListConstructor
|
|
{
|
|
uint32 *m_pListBegin, *m_pList;
|
|
uint m_sizeInOrInOut;
|
|
uint m_sizeCacheDmaList;
|
|
public:
|
|
CDmaListConstructor( void * pList )
|
|
{
|
|
m_pList = m_pListBegin = ( uint32* )pList;
|
|
m_sizeInOrInOut = m_sizeCacheDmaList = 0;
|
|
}
|
|
|
|
void AddInputDma( uint nSize, const void *pEa )
|
|
{
|
|
Assert( !m_sizeCacheDmaList );
|
|
Assert( !( nSize & 0xF ) && nSize <= 16 * 1024 && ( !nSize || pEa ) );
|
|
Assert( !IsAddressInStack( pEa ) );
|
|
m_pList[0] = nSize;
|
|
m_pList[1] = ( uint32 )pEa;
|
|
m_pList += 2;
|
|
|
|
m_sizeInOrInOut += nSize;
|
|
}
|
|
|
|
void AddCacheDma( uint nSize, const void *pEa )
|
|
{
|
|
// WARNING : NEVER use size=0, as there's a bug in SPURS that can corrupt data if you do
|
|
Assert( !IsAddressInStack( pEa ) && pEa && nSize > 0 );
|
|
uint32 * pCache = AddBytes( m_pList, m_sizeCacheDmaList );
|
|
pCache[0] = nSize;
|
|
pCache[1] = ( uint32 )pEa;
|
|
|
|
m_sizeCacheDmaList += 8;
|
|
Assert( m_sizeCacheDmaList <= 32 );
|
|
}
|
|
|
|
void AddInputDmaLargeUnalignedRegion( void * pBegin, void * pEnd )
|
|
{
|
|
uint32 eaBeginAligned = uint32( pBegin ) & -16;
|
|
uint32 eaEndAligned = AlignValue( uint32( pEnd ), 16 );
|
|
AddInputDmaLarge( eaEndAligned - eaBeginAligned, ( void* )eaBeginAligned );
|
|
}
|
|
|
|
void* AddInputDmaUnalignedRegion( void * pBegin, void * pEnd, int nAlignment = 16 )
|
|
{
|
|
uint32 eaBeginAligned = uint32( pBegin ) & -nAlignment;
|
|
uint32 eaEndAligned = AlignValue( uint32( pEnd ), nAlignment );
|
|
AddInputDma( eaEndAligned - eaBeginAligned, ( void* )eaBeginAligned );
|
|
return ( void* )eaBeginAligned;
|
|
}
|
|
|
|
void AddInputDmaLarge( uint nMinReserve, uint nSize, const void * pEa )
|
|
{
|
|
AddInputDmaLarge( nSize, pEa );
|
|
Assert( !( nMinReserve & 15 ) );
|
|
if( nMinReserve > nSize )
|
|
{
|
|
m_sizeInOrInOut += nMinReserve - nSize;
|
|
}
|
|
}
|
|
|
|
uint AddInputDmaLargeRegion( const void * pBegin, const void * pEnd )
|
|
{
|
|
uint nSize = uintp( pEnd ) - uintp( pBegin );
|
|
AddInputDmaLarge( nSize, pBegin );
|
|
return nSize;
|
|
}
|
|
|
|
void AddInputDmaLarge( uint nSize, const void * pEa )
|
|
{
|
|
Assert( !( nSize & 0xF ) && nSize < 248 * 1024 );
|
|
Assert( !IsAddressInStack( pEa ) );
|
|
uint nSizeRemaining = nSize;
|
|
uintp eaRemaining = ( uintp )pEa;
|
|
const uint nMaxDmaElementSize = 16 * 1024;
|
|
while( nSizeRemaining > nMaxDmaElementSize )
|
|
{
|
|
m_pList[0] = nMaxDmaElementSize;
|
|
m_pList[1] = eaRemaining;
|
|
m_pList += 2;
|
|
nSizeRemaining -= nMaxDmaElementSize;
|
|
eaRemaining += nMaxDmaElementSize;
|
|
}
|
|
m_pList[0] = nSizeRemaining;
|
|
m_pList[1] = eaRemaining;
|
|
m_pList += 2;
|
|
|
|
m_sizeInOrInOut += nSize;
|
|
}
|
|
|
|
void AddSizeInOrInOut( uint nAddIoBufferSize )
|
|
{
|
|
Assert( !( nAddIoBufferSize & 0xF ) );
|
|
m_sizeInOrInOut += nAddIoBufferSize;
|
|
}
|
|
|
|
void EnsureCapacityInOrInOut( uint nCapacity )
|
|
{
|
|
Assert( !( nCapacity & 0xF ) );
|
|
m_sizeInOrInOut = Max( m_sizeInOrInOut, nCapacity );
|
|
}
|
|
|
|
void FinishIoBuffer( CellSpursJobHeader * pHeader )
|
|
{
|
|
FinishInOrIoBuffer( pHeader );
|
|
Assert( pHeader->useInOutBuffer == 1 );
|
|
}
|
|
|
|
void FinishInBuffer( CellSpursJobHeader * pHeader )
|
|
{
|
|
FinishInOrIoBuffer( pHeader );
|
|
Assert( pHeader->useInOutBuffer == 0 );
|
|
}
|
|
|
|
void FinishIoBuffer( CellSpursJobHeader * pHeader, void * pParams )
|
|
{
|
|
FinishIoBuffer( pHeader );
|
|
// we only use up to 256 byte jobs, which have up to 26 DMA slots. Check that the params belongs to this job structure
|
|
// and check that it doesn't overlap with IO DMA list or cache DMA list
|
|
Assert( uintp( pParams ) <= uintp( m_pListBegin + 26 * 2 ) && uintp( pParams ) >= uintp( m_pList ) + m_sizeCacheDmaList );
|
|
}
|
|
|
|
void FinishInBuffer( CellSpursJobHeader * pHeader, void * pParams )
|
|
{
|
|
FinishInBuffer( pHeader );
|
|
// we only use up to 256 byte jobs, which have up to 26 DMA slots. Check that the params belongs to this job structure
|
|
// and check that it doesn't overlap with IO DMA list or cache DMA list
|
|
Assert( uintp( pParams ) <= uintp( m_pListBegin + 26 * 2 ) && uintp( pParams ) >= uintp( m_pList ) + m_sizeCacheDmaList );
|
|
}
|
|
|
|
inline uint32 * operator [] ( int i )
|
|
{
|
|
uint32 * pResult = m_pListBegin + 2 * i;
|
|
Assert( pResult >= m_pList ); // are we not overwriting the dma list tail that we wrote previously?
|
|
return pResult;
|
|
}
|
|
|
|
private:
|
|
void FinishInOrIoBuffer( CellSpursJobHeader * pHeader )
|
|
{
|
|
pHeader->sizeDmaList = uintp( m_pList ) - uintp( m_pListBegin );
|
|
pHeader->sizeInOrInOut = m_sizeInOrInOut;
|
|
pHeader->sizeCacheDmaList = m_sizeCacheDmaList;
|
|
}
|
|
|
|
// We can't DMA from / to the PPU stack, let's verify that
|
|
bool IsAddressInStack( const void * pEa )
|
|
{
|
|
#if IsPlatformPS3_PPU()
|
|
uint64 fp = __reg(1);
|
|
void * minStack = ( void* )( ( uint32 ) fp - 16 * 1024 ); // The 16 * 1024 should is not really necessary (as it means somebody addresses some portion that could be erased by the stack.
|
|
// Never the less, we want to be more conservative.
|
|
void * maxStack = (void *)((uint32)fp + 64 * 1024); // Assume that the stack is 64 Kb deep, make sure there is no allocations around if the stack is smaller
|
|
return ( ( pEa >= minStack ) && ( pEa <= maxStack ) );
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|
|
};
|
|
|
|
|
|
#endif |