482 lines
16 KiB
C++
Raw Permalink Normal View History

2021-07-24 21:11:47 -07:00
//========= Copyright <20> Valve Corporation, All rights reserved. ============//
#include "snd_ps3_mp3dec.h"
#include "vjobs/root.h"
#include "filesystem_engine.h"
#include "filesystem.h"
#include <cell/mstream.h>
#include "Mp3DecLibPpu.h"
#include "mp3declib.h"
extern IVJobs * g_pVJobs;
Mp3DecMgr g_mp3dec[NUMBER_OF_MP3_DECODER_SLOTS];
void Mp3DecMgr::Init()
{
g_pVJobs->Register( this );
}
#if 0 // def _DEBUG
#undef Assert
#define Assert(X) do{if( !(X) ) {Msg("Assertion failed\n%s:%d\n%s\n", __FILE__, __LINE__, #X); DebuggerBreak();};}while(0)
#endif
void Mp3DecMgr::Shutdown()
{
Finish();
g_pVJobs->Unregister( this );
}
void Mp3DecMgr::OnVjobsInit() // gets called after m_pRoot was created and assigned
{
COMPILE_TIME_ASSERT( !( ( JOBLET_COUNT - 1 ) & JOBLET_COUNT ) ); // JOBLET_COUNT must be a power of 2
V_memset( &m_jobWorker, 0, sizeof( m_jobWorker ) );
V_memset( m_joblets, 0, sizeof( m_joblets ) );
m_nMaxSpuWorkers = 1; // Use only one SPU at a time (that way each job is implicitly dependent on the previous jobs to be completed).
m_jobWorker.header = *m_pRoot->m_pJobMp3Dec;
m_jobWorker.header.sizeScratch = 114 * 1024 / 16;
m_jobWorker.header.sizeInOrInOut = job_mp3dec::IOBUFFER_SIZE;
job_mp3dec::JobParams_t *pParams = GetWorkerJobParams();
m_pWorkerParams = pParams;
m_nDecoderSize = 0;
m_nAllocatedNotKicked = 0;
int nError = cellMP3IntegratedDecoderGetRequiredSize( &m_nDecoderSize );
if( nError )
{
Warning( "cannot initialize mp3 decoding, error %d\n", nError );
}
pParams->m_eaDecoder = MemAlloc_AllocAligned( m_nDecoderSize, 128 );
pParams->m_eaJoblets = m_joblets;
cellMP3IntegratedDecoderInit( pParams->m_eaDecoder, m_nDecoderSize );
m_nDecoderCrc = CRC32_ProcessSingleBuffer( pParams->m_eaDecoder, m_nDecoderSize );
if( CommandLine()->FindParm( "-msftest" ) )
{
const char * pTestFiles[] = {
"amb_muffled_lo_mach_14.mp3",
"amb_muffled_lo_mach_15.mp3",
"amb_muffled_lo_mach_17.mp3",
"portal_4000_degrees_kelvin.msf",
"portal_android_hell.msf",
"portal_no_cake_for_you.msf",
"portal_party_escort.msf",
"portal_procedural_jiggle_bone.msf",
"portal_self_esteem_fund.msf",
"portal_still_alive.msf",
"portal_stop_what_you_are_doing.msf",
"portal_subject_name_here.msf",
"portal_taste_of_blood.msf",
"portal_you_cant_escape_you_know.msf",
"portal_youre_not_a_good_person.msf",
"error.mps",
"amb_metal_imp_warehouse_39.mps",
"aa2.msf",
"aa1.msf"
};
for( int i = 0; i < ARRAYSIZE( pTestFiles ); i++ )
{
const char * pTestFile = pTestFiles[i];
//g_pFileSystem->FileExists( pTestFile );
//StartMsfTest( pTestFile, 0 );
//StartMsfTest( pTestFile, "_skip1_noctx.wav", 3 );
StartMsfTest( pTestFile, "_dec.wav", 1 );
//StartMsfTest( pTestFile, "_ctx0.wav", 1 );
}
}
}
// This function skips ID3 tag version2.x
uint8* SkipId3Tag( uint8 * pMp3header )
{
uint32 tmp;
uint32 size=0;
if ( (pMp3header[0]!='I') ||
(pMp3header[1]!='D') ||
(pMp3header[2]!='3')
)
{
Msg( "ID3 tag v2.x not found\n" );
while( *pMp3header != 0xFF || ( pMp3header[1] & 0xE0 ) != 0xE0 )
++pMp3header; // sync up with mp3 bitstream
return pMp3header; // ID3 tag not found, these are probably MP3 frames going right here
}
for(uint i=0;i<4;i++) {
tmp = (pMp3header[i+6] & 0x7f);
tmp<<=(7*(3-i));
size|=tmp;
}
// skip 10 bytes of the header, and the size is the size of the data after the header (the tag)
return pMp3header + 10 + size;
}
void ValidateMp3( uint8 * pMp3Frames, uint8 * pMp3FramesEnd )
{
uint nPadding[2] = {0,0};
for( uint8* p = pMp3Frames; p < pMp3FramesEnd; )
{
Mp3FrameHeader * pHdr = ( Mp3FrameHeader * )p;
Assert( pHdr->CheckSync() );
Mp3FrameHeader *pNext = ( Mp3FrameHeader * )( p + pHdr->CorrectFrameLength( pMp3FramesEnd ) );
if( uintp( pNext + 1 ) >= uintp( pMp3FramesEnd ) )
break;
Assert( pNext->CheckSync() );
nPadding[pHdr->GetPadding()]++;
p = (uint8*)pNext;
}
Msg( "MP3 validation: %d padded, %d unpadded\n", nPadding[1], nPadding[0] );
}
void Mp3DecMgr::StartMsfTest( const char * pInputFile, const char *pExt, int nMode )
{
FileHandle_t fh = g_pFileSystem->OpenEx( pInputFile, "rb", FSOPEN_NEVERINPACK );
if( fh == FILESYSTEM_INVALID_HANDLE )
return;
CUtlBuffer msf;
if( !g_pFileSystem->ReadToBuffer( fh, msf ) )
{
Warning("Cannot load test msf file\n");
return;
}
g_pFileSystem->Close( fh );
uint8 * pMp3Frames = (uint8*)msf.Base(), *pMp3FramesEnd = pMp3Frames + msf.Size();
{
CellMSMSFHeader * pMsfHeader = (CellMSMSFHeader *)msf.Base();
if( pMsfHeader->header[0] == 'M' && pMsfHeader->header[1] == 'S' && pMsfHeader->header[2] == 'F' )
{
if( pMsfHeader->compressionType != CELL_MS_MP3 )
{
Warning("Invalid compression type %d\n", pMsfHeader->compressionType );
}
// one of the samples comments that 0x10 is the bit responsible for -loop option at the time of compilation. Documentation states "bit 4" , so it means all the bits in documentation are little-endian.
// See MSWrapResource.cpp : 173 (romaji) MSF fairu sakuseiji ni -loop wotsuketakadoukano handan
Msg( "Testing %d-channel MP3 @%dHz, %d loops %s %s %s\n", pMsfHeader->channels, pMsfHeader->sampleRate, pMsfHeader->miscInfo & 0xF, pMsfHeader->miscInfo & 0x10 ? "-loop":"(no -loop)", pMsfHeader->miscInfo & 0x20 ? "VBR":"CBR", pMsfHeader->miscInfo & 0x40 ? "joint stereo":"" );
pMp3Frames = (uint8*)( pMsfHeader + 1 );
}
else
{
pMp3Frames = SkipId3Tag( (uint8*)msf.Base() );
}
}
char outputFile[256];
V_snprintf( outputFile, sizeof( outputFile ), "/app_home/%s", pInputFile );
V_strncpy( V_strrchr( outputFile, '.' ), pExt, sizeof( outputFile ) );
FILE *fOut = ( nMode & 1 ) ? fopen( outputFile, "wb" ) : NULL;
RiffWavHeader hdr;
if( fOut )
{
fwrite( &hdr, 1, sizeof( hdr ), fOut );
}
else
{
V_strcpy( outputFile, "<null>" );
}
while( GetWorkerJobParams()->m_nWorkers > 1 )
sys_timer_usleep(100);
m_nMaxSpuWorkers = 1; // so that context is serialized
Mp3DecContext * pMp3Context = NULL;
pMp3Context = (Mp3DecContext * )MemAlloc_AllocAligned( sizeof( Mp3DecContext ), 128 );
if( pMp3Context ) pMp3Context->Init();
Msg("Decompressing %s into %s\n", pInputFile, outputFile );
// CUtlVector<uint16> wav;
// wav.EnsureCapacity( 32*1024*1024 );
uint nTotalSamples = 0;
CUtlVector<int16> arrWave;
uint nChannelFlags = Mp3DecJoblet::FLAG_STEREO;
ValidateMp3( pMp3Frames, pMp3FramesEnd );
if( nMode == 4 )
return;
Mp3FrameHeader * pMp3FrameHeader = ( Mp3FrameHeader *)pMp3Frames;
uint nSamplingRate = pMp3FrameHeader->GetFrameSamplingRate();
float flBitrateSum = 0;
uint nBitrateFrames = 0;
uint nTickStart = __mftb();
uint nBatchFrames = 1;
EnterWorkerLock();
const uint nMaxSkipFrames = 1;
uint nSkipFrames = 0;
uint8 * pPreviousFrame[nMaxSkipFrames+1];
while( pMp3Frames < pMp3FramesEnd )
{
if( nMode & 2 )
{
const uint nMaxParallelJoblets = JOBLET_COUNT;
arrWave.SetCount( nBatchFrames * nMaxParallelJoblets * 0x901 );
Mp3DecJoblet *pDec[nMaxParallelJoblets];
uint nDecCount = 0;
for( uint i = 0; i < JOBLET_COUNT; ++i )
{
Assert( !m_joblets[i].IsAllocated() );
}
for( nDecCount = 0; nDecCount < nMaxParallelJoblets; ++nDecCount )
{
pPreviousFrame[0] = pMp3Frames;
if( pMp3Frames + 4 >= pMp3FramesEnd )
break;
uint nFrameLength = 0, nBatchedFrames = 0;
uint8 * pLastFrame = pMp3Frames;
while( nBatchedFrames < nBatchFrames )
{
Mp3FrameHeader* pFrame = ( Mp3FrameHeader* )( pMp3Frames + nFrameLength );
if( !pFrame->CheckSync() || pMp3Frames + nFrameLength > pMp3FramesEnd )
break;
pLastFrame = pMp3Frames + nFrameLength;
nFrameLength += pFrame->CorrectFrameLength( pMp3FramesEnd );
flBitrateSum += pFrame->GetBitrateKbps();
nBitrateFrames ++;
nBatchedFrames ++;
}
if( nFrameLength == 0 )
break;
pDec[nDecCount] = NewDecode( nChannelFlags | Mp3DecJoblet::FLAG_LITTLE_ENDIAN | Mp3DecJoblet::FLAG_FULL_MP3_FRAMES_ONLY );
pDec[nDecCount]->m_eaMp3 = pMp3Context ? pMp3Frames : pPreviousFrame[nSkipFrames];
pDec[nDecCount]->m_eaMp3End = pMp3Frames + nFrameLength;
pDec[nDecCount]->m_eaWave = arrWave.Base() + nDecCount * 0x901 * nBatchFrames;
pDec[nDecCount]->m_eaWaveEnd = arrWave.Base() + nDecCount * 0x901 * nBatchFrames + 0x900 * nBatchFrames;
pDec[nDecCount]->m_eaContext = pMp3Context;
pDec[nDecCount]->m_nSkipSamples = pMp3Context ? 0 : nSkipFrames * 0x480;
pMp3Frames += nFrameLength;
KickPending();
for( uint i = nMaxSkipFrames; i-->0; )
pPreviousFrame[i+1] = pPreviousFrame[i];
nSkipFrames = MIN( nMaxSkipFrames, nSkipFrames + 1 );
}
if( nDecCount == 0 )
break;// finished
for( uint i = 0 ; i < nDecCount; ++i )
{
Wait( pDec[i] );
Assert( pDec[i]->m_nFlags & pDec[i]->FLAG_DECODE_COMPLETE );
Assert( pDec[i]->m_eaWavePut == pDec[i]->m_eaWaveEnd || pDec[i]->m_eaWavePut == pDec[i]->m_eaWave || i + 1 == nDecCount );
uint nSamplesDecoded = pDec[i]->m_eaWavePut - pDec[i]->m_eaWave;
Assert( nSamplesDecoded <= 0x900 * nBatchFrames );
nTotalSamples += nSamplesDecoded;
if( fOut )
{
fwrite( pDec[i]->m_eaWave, ( uintp( pDec[i]->m_eaWavePut ) - uintp( pDec[i]->m_eaWave ) ) & -2, 1, fOut );
}
DeleteDecode( pDec[i] );
}
for( uint i = 0; i < JOBLET_COUNT; ++i )
{
Assert( !m_joblets[i].IsAllocated() );
}
}
else
{
Mp3DecJoblet *pDec = NewDecode( nChannelFlags | Mp3DecJoblet::FLAG_LITTLE_ENDIAN /*| Mp3DecJoblet::FLAG_FULL_MP3_FRAMES_ONLY*/ );
arrWave.SetCount( 0x900 );
uint nFrameSize = ((Mp3FrameHeader*)pMp3Frames)->CorrectFrameLength( pMp3FramesEnd );
uint8 * pFrameCopy = new uint8[ nFrameSize ];
V_memcpy( pFrameCopy, pMp3Frames, nFrameSize );
Msg("Decoding %u-byte frame @%p..", nFrameSize, pMp3Frames );
pDec->m_eaMp3 = pFrameCopy;
pDec->m_eaMp3End = pFrameCopy + nFrameSize;
pDec->m_eaWave = arrWave.Base();
pDec->m_eaWaveEnd = arrWave.Base() + arrWave.Count();
pDec->m_eaContext = pMp3Context;
KickPending();
Wait( pDec );
nChannelFlags = pDec->m_nFlags & Mp3DecJoblet::FLAGS_MONO_OR_STEREO; // choose whichever (mono or stereo) the job decoded
uint nSamplesDecoded = pDec->m_eaWavePut - pDec->m_eaWave;
Msg( "%d chan, %d samples\n", nChannelFlags, nSamplesDecoded / nChannelFlags );
nTotalSamples += nSamplesDecoded;
if( fOut )
{
fwrite( arrWave.Base(), ( uintp( pDec->m_eaWavePut ) - uintp( pDec->m_eaWave ) ) & -2, 1, fOut );
}
pMp3Frames += pDec->m_eaMp3Get - pDec->m_eaMp3;
DeleteDecode( pDec );
delete[]pFrameCopy;
if( pDec->m_nFlags & pDec->FLAG_DECODE_ERROR )
{
Warning("Mp3 Decoder Error\n");
break;
}
if( pDec->m_eaWavePut <= pDec->m_eaWave )
{
break; // nothing was decoded
}
}
}
float flBitrate = nBitrateFrames ? flBitrateSum / nBitrateFrames : 0;
LeaveWorkerLock();
if( pMp3Context )
MemAlloc_FreeAligned( pMp3Context );
const char * pszSampleCh = "mono";
uint nChannelCount = 1;
if( nChannelFlags & Mp3DecJoblet::FLAG_STEREO )
{
pszSampleCh = "stereo";
nChannelCount = 2;
}
nTotalSamples /= nChannelCount;
float flSeconds = (nTotalSamples) / float( nSamplingRate );
uint nTicksTotal = __mftb() - nTickStart;
if( fOut )
Msg( "Writing %dHz %.1f second Riff Wave File, %d %s samples\n", nSamplingRate, flSeconds, nTotalSamples, pszSampleCh );
else
{
Msg( "%d %s samples @%dHz @%.1f kbps = %.1f seconds in %.2f ms, ratio = %.2f%%\n", nTotalSamples, pszSampleCh, nSamplingRate, flBitrate, flSeconds, nTicksTotal / 79800.0f, 100 * ( nTicksTotal / 79800000.0f ) / ( flSeconds ) );
}
hdr.Init( nTotalSamples, nChannelCount, 16, nSamplingRate );
if( fOut )
{
fseek( fOut, 0, SEEK_SET );
fwrite( &hdr, 1, sizeof( hdr ), fOut );
fclose( fOut );
}
}
void Mp3DecMgr::OnVjobsShutdown() // gets called before m_pRoot is about to be destructed and NULL'ed
{
Finish();
job_mp3dec::JobParams_t *pParams = GetWorkerJobParams();
if( m_nDecoderCrc != CRC32_ProcessSingleBuffer( pParams->m_eaDecoder, m_nDecoderSize ) )
{
Warning( "MP3 Decoder is corrupted; please tell Sergiy\n" );
}
MemAlloc_FreeAligned( pParams->m_eaDecoder );
}
Mp3DecJoblet * Mp3DecMgr::NewDecode( uint nFlags )
{
job_mp3dec::JobParams_t *pParams = GetWorkerJobParams();
// there are JOBLET_COUNT joblets in the ring buffer. The first m_nAllocatedNotKicked (counting from m_nGet index)
// are already taken (allocated) and we cannot wait for them or allocate them because they aren't even kicked yet
// So somebody later will kick them, but for now we have to let them be.
// Cycle through the remaining joblets and find one that's free and allocate it (return a pointer to it)
int nSleepCounter = 0;
for( uint i = m_nAllocatedNotKicked; i < JOBLET_COUNT; ++i )
{
// let's try to see if this joblet with this index is available for allocation
uint nTryAllocateIndex = pParams->m_nPut + i;
while( nTryAllocateIndex - pParams->m_nGet >= JOBLET_COUNT )
{
// this joblet is in previous ring of the ring buffer. SPU is working on it. Perhaps it's free,
// but even if it is, we need to let SPU realize that and advance m_nGet pointer.
// this joblet logically is not allocated yet, but it occupies the same space in memory as one of the joblets previously allocated
// in the previous ring of the joblet ring buffer.
// there are probably workers working on this joblet, but by this line they may have exited. If they did
// then the queue must be empty (put == get)
Assert( pParams->m_nWorkers || pParams->m_nPut == pParams->m_nGet );
// at all times, put and get must be within this distance (the size of the ring buffer)
Assert( pParams->m_nPut - pParams->m_nGet <= JOBLET_COUNT );
// wait for SPU to advance get pointer
sys_timer_usleep( 60 );
++nSleepCounter;
}
// if this joblet is free, we can now use it because SPU is past this point
Mp3DecJoblet *pNextJoblet = &m_joblets[ nTryAllocateIndex & ( JOBLET_COUNT - 1 ) ];
#ifdef _DEBUG
Mp3DecJoblet jobletState;
__sync(); // try to flush pending DMA's, to increase the probability of atomic copy (still not guaranteed, but it's for debugging only)
V_memcpy( &jobletState, pNextJoblet, sizeof( jobletState ) );
#endif
if( !pNextJoblet->IsAllocated() )
{
// we found a joblet that is not allocated and is not worked by SPU. Return it.
V_memset( pNextJoblet, 0, sizeof( *pNextJoblet ) );
pNextJoblet->m_nFlags = nFlags | Mp3DecJoblet::FLAG_ALLOCATED;
m_nAllocatedNotKicked++; // we'll need to kick this joblet
return pNextJoblet;
}
else
{
// we found a joblet that spu finished working on, but it's not free. We must skip it.
m_nAllocatedNotKicked++;
}
}
if ( nSleepCounter >= 8 )
{
// If we had to wait more than 0.5 ms, let's print something...
Warning( " Mp3DecMgr::NewDecode() waited for more than %f\n", (float)nSleepCounter * 0.060f );
}
return NULL;
}
// kick ALL pending allocated not kicked jobs
void Mp3DecMgr::KickPending()
{
if( !m_nAllocatedNotKicked )
return;
job_mp3dec::JobParams_t *pParams = GetWorkerJobParams();
__lwsync(); // order the previous writes with submitting this joblet for processing
uint nNewPut = cellAtomicAdd32( &pParams->m_nPut, m_nAllocatedNotKicked ) + m_nAllocatedNotKicked;
m_nAllocatedNotKicked = 0;
Assert( nNewPut == pParams->m_nPut );
__lwsync(); // order joblet submission with starting another job
uint nWorkersNeeded = MIN( m_nMaxSpuWorkers, ( nNewPut - pParams->m_nGet ) / 8 + 1 );
while( pParams->m_nWorkers < nWorkersNeeded )
{
cellAtomicIncr32( ( uint32* ) &pParams->m_nWorkers );
// spawn another worker
m_pRoot->m_queuePortSound.pushJob( &m_jobWorker.header, sizeof( m_jobWorker ), 0, 0 );
}
}
void Mp3DecMgr::DeleteDecode( Mp3DecJoblet *pJoblet )
{
// free it up!
Wait( pJoblet );
Assert( pJoblet->IsComplete() && pJoblet->IsAllocated() );
pJoblet->m_nFlags = 0; // it's free now, even if it's in the list of joblets to process
}