2021-07-24 21:11:47 -07:00

486 lines
14 KiB
C++

//================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================
//
//
//
//==================================================================================================
//--------------------------------------------------------------------------------------------------
// Headers
//--------------------------------------------------------------------------------------------------
#include "SpuMgr_spu.h"
#include <cell/atomic.h>
#ifndef _CERT
#include <libsn_spu.h>
#endif
#include <stdlib.h>
#include <string.h>
//--------------------------------------------------------------------------------------------------
// Globals
//--------------------------------------------------------------------------------------------------
// singleton instance
SpuMgr gSpuMgr __attribute__((aligned(128)));
unsigned char gUnalignedMem[16] __attribute__((aligned(16)));
MemCpyHeader gMemCpyHeader __attribute__((aligned(16)));
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
void SPU_memcpy( void *pBuf1, void *pBuf2 )
{
uint32_t header;
gSpuMgr.ReadMailbox( &header );
gSpuMgr.MemcpyLock();
gSpuMgr.DmaGetUNSAFE( &gMemCpyHeader, header, sizeof( MemCpyHeader ), 0 );
gSpuMgr.DmaDone( 0x1 );
DEBUG_ERROR( ( gMemCpyHeader.src & 0xf ) == 0 );
uint32_t sizeAligned;
uint32_t sizeAlignedDown;
uint32_t dstAlignedDown;
uint32_t offset;
memcpy( gUnalignedMem, gMemCpyHeader.cacheLine, 16 );
while ( gMemCpyHeader.size > 8192 )
{
sizeAligned = 8192;
dstAlignedDown = SPUMGR_ALIGN_DOWN( gMemCpyHeader.dst, 16 );
offset = gMemCpyHeader.dst - dstAlignedDown;
gSpuMgr.DmaGetUNSAFE( pBuf1, gMemCpyHeader.src, sizeAligned, 0 );
gSpuMgr.DmaDone( 0x1 );
if ( offset )
{
memcpy( pBuf2, gUnalignedMem, offset );
}
memcpy( (void *) ( (uint32_t) pBuf2 + offset ), pBuf1, sizeAligned );
gSpuMgr.DmaSync();
gSpuMgr.DmaPut( dstAlignedDown, pBuf2, SPUMGR_ALIGN_UP( sizeAligned + offset, 16 ), 0 );
gSpuMgr.DmaDone( 0x1 );
sizeAlignedDown = SPUMGR_ALIGN_DOWN( sizeAligned + offset, 16 );
memcpy( gUnalignedMem, (void *) ( (uint32_t) pBuf2 + sizeAlignedDown ), 16 );
gMemCpyHeader.size -= sizeAligned;
gMemCpyHeader.dst += 8192;
gMemCpyHeader.src += 8192;
}
sizeAligned = SPUMGR_ALIGN_UP( gMemCpyHeader.size, 16 );
dstAlignedDown = SPUMGR_ALIGN_DOWN( gMemCpyHeader.dst, 16 );
offset = gMemCpyHeader.dst - dstAlignedDown;
gSpuMgr.DmaGetUNSAFE( pBuf1, gMemCpyHeader.src, sizeAligned, 0 );
gSpuMgr.DmaDone( 0x1 );
if ( offset )
{
memcpy( pBuf2, gUnalignedMem, offset );
}
memcpy( (void *) ( (uint32_t) pBuf2 + offset ), pBuf1, gMemCpyHeader.size );
sizeAligned = SPUMGR_ALIGN_UP( gMemCpyHeader.size + offset, 16 );
gSpuMgr.DmaSync();
gSpuMgr.DmaPut( dstAlignedDown, pBuf2, sizeAligned, 0 );
gSpuMgr.DmaDone( 0x1 );
if ( gMemCpyHeader.blocking )
{
gSpuMgr.WriteMailbox( 0 );
}
gSpuMgr.MemcpyUnlock();
}
//--------------------------------------------------------------------------------------------------
// DmaCheckAlignment
//
// Checks restrictions specified in SpuMgr::DmaGet
//--------------------------------------------------------------------------------------------------
int DmaCheckAlignment(uint32_t src, uint32_t dest, uint32_t size)
{
#if !defined( _CERT )
uint32_t align = size;
bool error = false;
if (size >= 16 && ((size & 0xf) == 0))
{
align = 16;
}
else if (size == 8 || size == 4 || size == 2 || size == 1)
{
error = ((src & 0xF) != (dest & 0xF));
}
else
{
error = true; // bad size
}
return (!error && src && dest &&
SPUMGR_IS_ALIGNED(src, align) &&
SPUMGR_IS_ALIGNED(dest, align));
#else //!FINAL
return 1;
#endif //!FINAL
}
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
int SpuMgr::Init()
{
// Start the decrementer since it is possible
// that it has not been started by default
const unsigned int kEventDec = 0x20;
// Disable the decrementer event.
unsigned int maskEvents = spu_readch(SPU_RdEventStatMask);
spu_writech(SPU_WrEventMask, maskEvents & ~kEventDec);
// Acknowledge any pending events and stop the decrementer.
spu_writech(SPU_WrEventAck, kEventDec);
// Write the decrementer value to start the decrementer.
unsigned int decValue = spu_readch(SPU_RdDec);
spu_writech(SPU_WrDec, decValue);
// Enable events.
spu_writech(SPU_WrEventMask, maskEvents | kEventDec);
// Reset byte count
ResetBytesTransferred();
// reset malloc count
m_mallocCount = 0;
// Read the effective address of the SPU locks.
ReadMailbox( &m_lockEA );
ReadMailbox( &m_memcpyLockEA );
return 0;
}
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
void SpuMgr::Term()
{
}
//--------------------------------------------------------------------------------------------------
// SpuMgr::DmaGet
//
// DmaGet - alignment and size checking
// DmaGetUNSAFE - no alignment or size checking (but will assert in debug)
// _DmaGet - handles badly aligned dma's, should be a private member really (doesn't handle small dma's)
//
// DMA restrictions
// An MFC supports naturally aligned DMA transfer sizes of 1, 2, 4,
// 8, and 16 bytes and multiples of 16 bytes
// Furthermore, if size is 1, 2, 4, or 8 bytes then lower 4 bits
// of LS and EA must match
//
// Note:
// Peak performance is achieved for transfers in which both the EA and
// the LSA are 128-byte aligned and the size of the transfer is a multiple
// of 128 bytes.
//--------------------------------------------------------------------------------------------------
void SpuMgr::DmaGetUNSAFE(void *ls, uint32_t ea, uint32_t size, uint32_t tagId)
{
DEBUG_ERROR( ea < 0xd0000000 );
DEBUG_ERROR( ea );
DEBUG_ERROR(DmaCheckAlignment((uint32_t)ls, ea, size));
// do the dma
while (size)
{
uint32_t dmaSize = 0x4000;
dmaSize = (size < dmaSize)? size: dmaSize;
size -= dmaSize;
// kick off dma
spu_mfcdma64( (void*)ls, 0, ea, dmaSize, tagId, MFC_GET_CMD);
m_numDMATransfers++;
ls = (void*)((uint32_t)ls + dmaSize);
ea += dmaSize;
}
// add up bytes transferred
m_bytesRequested += size;
m_bytesTransferred += size;
}
//--------------------------------------------------------------------------------------------------
// SpuMgr::_DmaGet
//
// Internal function - do not call this directly
//--------------------------------------------------------------------------------------------------
void SpuMgr::_DmaGet(void *ls, uint32_t ea, uint32_t size, uint32_t tagId)
{
uint32_t unaligned = false;
uint32_t eaAligned = (uint32_t)ea;
uint32_t sizeAligned = size;
uint32_t lsAligned = (uint32_t)ls;
uint32_t sizeOffset = 0;
char *pTempBuff = NULL;
// check if src is unaligned
if (eaAligned & 0xF)
{
eaAligned = eaAligned & ~0xF; // round down
sizeOffset = ea - eaAligned;
sizeAligned += sizeOffset;
unaligned = true;
}
// check if size is unaligned
if (sizeAligned & 0xF)
{
sizeAligned = (sizeAligned + 0xF) & ~0xF; // round up
unaligned = true;
}
// if we have adjusted the size, or if ls is unaligned,
// we need to alloc temp buffer
if (unaligned || (lsAligned & 0xF))
{
pTempBuff = (char*)MemAlign(0x10, sizeAligned);
lsAligned = (uint32_t)pTempBuff;
unaligned = true;
}
// add up bytes transferred, for informational purposes
m_bytesRequested += size;
m_bytesTransferred += sizeAligned;
// do the dma
while (sizeAligned)
{
uint32_t dmaSize = 0x4000;
dmaSize = (sizeAligned < dmaSize)? sizeAligned: dmaSize;
sizeAligned -= dmaSize;
// kick off dma
spu_mfcdma64( (void*)lsAligned, 0, eaAligned, dmaSize, tagId, MFC_GET_CMD);
m_numDMATransfers++;
lsAligned += dmaSize;
eaAligned += dmaSize;
}
if (unaligned)
{
// block for now till dma done because we do the memcpy right here
DmaDone(1 << tagId);
// copy data over
memcpy(ls, pTempBuff + sizeOffset, size);
// free temp buff
Free(pTempBuff);
}
}
//--------------------------------------------------------------------------------------------------
// SpuMgr::DmaGetSAFE
//
// DMA restrictions (look at SpuMgr::DmaGetUNSAFE in this file) are
// handled transparently by this function
//--------------------------------------------------------------------------------------------------
void SpuMgr::DmaGetSAFE(void *ls, uint32_t ea, uint32_t size, uint32_t tagId)
{
DEBUG_ERROR( ea );
if( size < 0x10 )
{
// lowest 4 bits of address have to match regardless, &
// size can only be 1, 2, 4 or 8 B
if( size==0x1 || size==0x2 || size==0x4 || size==0x8 )
{
if( ((uint32_t)ls&0xF == ea&0xF) )
{
DmaGetUNSAFE(ls,ea,size,tagId);
}
else
{
// small get not aligned within a 16B block
_DmaGet(ls,ea,size,tagId);
}
}
else
{
// if < 16B can only get 1,2,4 or 8B
_DmaGet(ls,ea,size,tagId);
}
}
else
{
if( (!(size & 0xF)) && // has to be multiple of 16B, &
(((uint32_t)ls&0xF)==0) && // ea and ls have to be 16B aligned
((ea&0xF)==0) )
{
// alignment is okay just dma
DmaGetUNSAFE(ls,ea,size,tagId);
}
else
{
_DmaGet(ls,ea,size,tagId);
}
}
}
//--------------------------------------------------------------------------------------------------
// SpuMgr::DmaPut
//--------------------------------------------------------------------------------------------------
void SpuMgr::DmaPut(uint32_t ea, void *ls, uint32_t size, uint32_t tagId)
{
DEBUG_ERROR( (ea!=0) && (ea<0xd0000000) ); // valid ea
DEBUG_ERROR( (uint32_t)ls < 0x40000 ); // valid ls
DEBUG_ERROR(DmaCheckAlignment((uint32_t)ls, ea, size));
// do the dma
while (size)
{
uint32_t dmaSize = 0x4000;
dmaSize = (size < dmaSize)? size: dmaSize;
size -= dmaSize;
// initiate dma to ppu
spu_mfcdma64( ls, 0, ea, dmaSize, tagId, MFC_PUT_CMD);
ls = (void*)((uint32_t)ls + dmaSize);
ea += dmaSize;
}
}
//--------------------------------------------------------------------------------------------------
// SpuMgr::DmaSmallPut
//--------------------------------------------------------------------------------------------------
void SpuMgr::DmaSmallPut(uint32_t ea, void *ls, uint32_t size, uint32_t tagId)
{
DEBUG_ERROR( (ea!=0) && (ea<0xd0000000) ); // valid ea
DEBUG_ERROR( (uint32_t)ls < 0x40000 ); // valid ls
DEBUG_ERROR(DmaCheckAlignment((uint32_t)ls, ea, size));
uint32_t dmaSize = 1;
if ((size % 8) == 0)
{
dmaSize = 8;
}
else if ((size % 4) == 0)
{
dmaSize = 4;
}
else if ((size % 2) == 0)
{
dmaSize = 2;
}
while (size)
{
size -= dmaSize;
// initiate dma to ppu
spu_mfcdma64( ls, 0, ea, dmaSize, tagId, MFC_PUT_CMD);
ls = (void*)((uint32_t)ls + dmaSize);
ea += dmaSize;
}
}
//--------------------------------------------------------------------------------------------------
// SpuMgr::DmaGetlist
//
// Gather data scattered around main mem, MFC will run through the list, and place the elements (based on ea address and size)
// contiguously in ls.
//
// NOTE: if an individual list element size is <16B, the data will still be dma'd but the proceeding element will be placed
// on the next 16B boundary. So it is possible to get lots of small elements, but you will be left with gaps in ls.
//
// ls - ls address of where items will be placed (contiguously)
// lsList - ls address of actual list
// sizeList - size of list in bytes (each list element is 8B (sizeof(DMAList)), so sizeList should be number of list elements // sizeof(DMAList))
// tagId - works the same way as regular DMA's
//
// Alignment and Size Restrictions:
// -ls and lsList must be 8B aligned
// -size must be a multiple of 8B (sizeof(DMAList))
// -no more than 2048 list elements
//
// light error checking right now
//--------------------------------------------------------------------------------------------------
void SpuMgr::DmaGetList(void *ls, DMAList *pLS_List, uint32_t sizeList, uint32_t tagId)
{
DEBUG_ERROR( ((uint32_t)pLS_List&0x7) == 0 ); // ls address must be 8B aligned
DEBUG_ERROR( ((uint32_t)ls&0x7) == 0 ); // ea so aligned also, due to offset within 16B alignment restrictions
DEBUG_ERROR( (sizeList&0x7) == 0 ); // list size is a multiple of 8B
DEBUG_ERROR( sizeList<(2048*sizeof(DMAList))); // no more than 2048 list elements
// initiate dma list
spu_mfcdma64( ls, 0, (uint32_t)pLS_List, sizeList, tagId, MFC_GETL_CMD );
}
//--------------------------------------------------------------------------------------------------
// SpuMgr::DmaGPutlist
//
// Scatter data held contiguously in ls, to main mem
//
// ls - ls address of where items exist (contiguously) to be scattered back to main mem
// lsList - ls address of actual list
// sizeList - size of list in bytes (each list element is 8B (sizeof(DMAList)), so sizeList should be number of list elements * sizeof(DMAList))
// tagId - works the same way as regular DMA's
//
// Alignment and Size Restrictions:
// ls and lsList must be 8B aligned, size must be a multiple of 8B (sizeof(DMAList))
//
// light error checking right now
//--------------------------------------------------------------------------------------------------
void SpuMgr::DmaPutList(void *ls, DMAList* pLS_List, uint32_t sizeList, uint32_t tagId)
{
DEBUG_ERROR( ((uint32_t)pLS_List&0x7) == 0 ); // ls address must be 8B aligned
DEBUG_ERROR( ((uint32_t)ls&0x7) == 0 ); // ea so aligned also, due to offset within 16B alignment restrictions
DEBUG_ERROR( (sizeList&0x7) == 0 ); // list size is a multiple of 8B
DEBUG_ERROR( sizeList<(2048*sizeof(DMAList))); // no more than 2048 list elements
// initiate dma list
spu_mfcdma64( ls, 0, (uint32_t)pLS_List, sizeList, tagId, MFC_PUTL_CMD );
}