1
0
mirror of https://github.com/alliedmodders/hl2sdk.git synced 2025-01-03 16:13:22 +08:00
hl2sdk/utils/phonemeextractor/phonemeextractor_ims.cpp
2013-06-26 15:22:04 -07:00

1075 lines
24 KiB
C++

//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose:
//
// $NoKeywords: $
//=============================================================================//
#include <stdio.h>
#include <stdarg.h>
#include <memory.h>
#include <windows.h>
#include <mmsystem.h>
#include <mmreg.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "phonemeextractor/PhonemeExtractor.h"
#include "ims_helper/ims_helper.h"
#include "tier0/dbg.h"
#include "sentence.h"
#include "PhonemeConverter.h"
#include "tier1/strtools.h"
#define TEXTLESS_WORDNAME "[Textless]"
static IImsHelper *talkback = NULL;
//-----------------------------------------------------------------------------
// Purpose: Expose the interface
//-----------------------------------------------------------------------------
class CPhonemeExtractorLipSinc : public IPhonemeExtractor
{
public:
virtual PE_APITYPE GetAPIType() const
{
return SPEECH_API_LIPSINC;
}
// Used for menus, etc
virtual char const *GetName() const
{
return "IMS (LipSinc)";
}
SR_RESULT Extract(
const char *wavfile,
int numsamples,
void (*pfnPrint)( const char *fmt, ... ),
CSentence& inwords,
CSentence& outwords );
CPhonemeExtractorLipSinc( void );
~CPhonemeExtractorLipSinc( void );
enum
{
MAX_WORD_LENGTH = 128,
};
private:
class CAnalyzedWord
{
public:
char buffer[ MAX_WORD_LENGTH ];
double starttime;
double endtime;
};
class CAnalyzedPhoneme
{
public:
char phoneme[ 32 ];
double starttime;
double endtime;
};
bool InitLipSinc( void );
void ShutdownLipSinc( void );
void DescribeError( TALKBACK_ERR err );
void Printf( char const *fmt, ... );
bool CheckSoundFile( char const *filename );
bool GetInitialized( void );
void SetInitialized( bool init );
void (*m_pfnPrint)( const char *fmt, ... );
char const *ConstructInputSentence( CSentence& inwords );
bool AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords );
char const *ApplyTBWordRules( char const *word );
void ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords );
void ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords );
int GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool checkstart );
int GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime );
int GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime );
CAnalyzedWord *GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index );
CAnalyzedPhoneme *GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index );
int ComputeByteFromTime( float time );
bool m_bInitialized;
float m_flSampleCount;
float m_flDuration;
float m_flSamplesPerSecond;
int m_nBytesPerSample;
HMODULE m_hHelper;
};
CPhonemeExtractorLipSinc::CPhonemeExtractorLipSinc( void )
{
m_hHelper = (HMODULE)0;
m_pfnPrint = NULL;
m_bInitialized = false;
m_flSampleCount = 0.0f;
m_flDuration = 0.0f;
m_flSamplesPerSecond = 0.0f;
m_nBytesPerSample = 0;
}
CPhonemeExtractorLipSinc::~CPhonemeExtractorLipSinc( void )
{
if ( GetInitialized() )
{
ShutdownLipSinc();
}
}
bool CPhonemeExtractorLipSinc::GetInitialized( void )
{
return m_bInitialized;
}
void CPhonemeExtractorLipSinc::SetInitialized( bool init )
{
m_bInitialized = init;
}
int CPhonemeExtractorLipSinc::ComputeByteFromTime( float time )
{
if ( !m_flDuration )
return 0;
float frac = time / m_flDuration;
float sampleNumber = frac * m_flSampleCount;
int bytenumber = sampleNumber * m_nBytesPerSample;
return bytenumber;
}
void CPhonemeExtractorLipSinc::DescribeError( TALKBACK_ERR err )
{
Assert( m_pfnPrint );
// Get the error description.
char errorDesc[256] = "";
if ( err != TALKBACK_NOERR )
{
talkback->TalkBackGetErrorString( err, sizeof(errorDesc), errorDesc );
}
// Report or log the error...
(*m_pfnPrint)( "LIPSINC ERROR: %s\n", errorDesc );
}
//-----------------------------------------------------------------------------
// Purpose:
// Input : *fmt -
// .. -
//-----------------------------------------------------------------------------
void CPhonemeExtractorLipSinc::Printf( char const *fmt, ... )
{
Assert( m_pfnPrint );
char string[ 4096 ];
va_list argptr;
va_start( argptr, fmt );
vsprintf( string, fmt, argptr );
va_end( argptr );
(*m_pfnPrint)( "%s", string );
}
bool CPhonemeExtractorLipSinc::CheckSoundFile( char const *filename )
{
TALKBACK_SOUND_FILE_METRICS fm;
memset( &fm, 0, sizeof( fm ) );
fm.m_size = sizeof( fm );
TALKBACK_ERR err = talkback->TalkBackGetSoundFileMetrics( filename, &fm );
if ( err != TALKBACK_NOERR )
{
DescribeError( err );
return false;
}
if ( fm.m_canBeAnalyzed )
{
Printf( "%s: %.2f s, rate %i, bits %i, channels %i\n",
filename,
fm.m_duration,
fm.m_sampleRate,
fm.m_bitsPerSample,
fm.m_channelCount );
}
m_flDuration = fm.m_duration;
if ( m_flDuration > 0 )
{
m_flSamplesPerSecond = m_flSampleCount / m_flDuration;
}
else
{
m_flSamplesPerSecond = 0.0f;
}
m_nBytesPerSample = ( fm.m_bitsPerSample >> 3 );
m_flSampleCount /= m_nBytesPerSample;
m_nBytesPerSample /= fm.m_channelCount;
return fm.m_canBeAnalyzed ? true : false;
}
typedef IImsHelper *(*pfnImsHelper)(void);
//-----------------------------------------------------------------------------
// Purpose:
// Output : Returns true on success, false on failure.
//-----------------------------------------------------------------------------
bool CPhonemeExtractorLipSinc::InitLipSinc( void )
{
if ( GetInitialized() )
{
return true;
}
m_hHelper = LoadLibrary( "ims_helper.dll" );
if ( !m_hHelper )
{
return false;
}
pfnImsHelper factory = (pfnImsHelper)::GetProcAddress( m_hHelper, "GetImsHelper" );
if ( !factory )
{
FreeLibrary( m_hHelper );
return false;
}
talkback = reinterpret_cast< IImsHelper * >( (*factory)() );
if ( !talkback )
{
FreeLibrary( m_hHelper );
return false;
}
char szExeName[ MAX_PATH ];
szExeName[0] = 0;
GetModuleFileName( (HMODULE)0, szExeName, sizeof( szExeName ) );
char szBaseDir[ MAX_PATH ];
Q_strncpy( szBaseDir, szExeName, sizeof( szBaseDir ) );
Q_StripLastDir( szBaseDir, sizeof( szBaseDir ) );
Q_StripTrailingSlash( szBaseDir );
Q_strlower( szBaseDir );
char coreDataDir[ 512 ];
Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\lipsinc_data\\",
szBaseDir );
Q_FixSlashes( coreDataDir );
char szCheck[ 512 ];
Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir );
struct __stat64 buf;
if ( _stat64( szCheck, &buf ) != 0 )
{
Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\bin\\lipsinc_data\\",
szBaseDir );
Q_FixSlashes( coreDataDir );
Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir );
if ( _stat64( szCheck, &buf ) != 0 )
{
Error( "Unable to find talkback data files in %s.", coreDataDir );
}
}
TALKBACK_ERR err;
err = talkback->TalkBackStartupLibrary( coreDataDir );
if ( err != TALKBACK_NOERR )
{
DescribeError( err );
FreeLibrary( m_hHelper );
return false;
}
long verMajor = 0;
long verMinor = 0;
long verRevision = 0;
err = talkback->TalkBackGetVersion(
&verMajor,
&verMinor,
&verRevision);
if ( err != TALKBACK_NOERR )
{
DescribeError( err );
FreeLibrary( m_hHelper );
return false;
}
Printf( "Lipsinc TalkBack Version %i.%i.%i\n", verMajor, verMinor, verRevision );
m_bInitialized = true;
return true;
}
//-----------------------------------------------------------------------------
// Purpose:
//-----------------------------------------------------------------------------
void CPhonemeExtractorLipSinc::ShutdownLipSinc( void )
{
// HACK HACK: This seems to crash on exit sometimes
__try
{
talkback->TalkBackShutdownLibrary();
FreeLibrary( m_hHelper );
}
__except(EXCEPTION_EXECUTE_HANDLER )
{
OutputDebugString( "----> Crash shutting down TALKBACK sdk, exception caught and ignored\n" );
}
}
//-----------------------------------------------------------------------------
// Purpose:
// Input : inwords -
// Output : char const
//-----------------------------------------------------------------------------
char const *CPhonemeExtractorLipSinc::ConstructInputSentence( CSentence& inwords )
{
static char sentence[ 16384 ];
sentence[ 0 ] = 0;
int last = inwords.m_Words.Size() - 1;
for ( int i = 0 ; i <= last; i++ )
{
CWordTag *w = inwords.m_Words[ i ];
strcat( sentence, w->GetWord() );
if ( i != last )
{
strcat( sentence, " " );
}
}
if ( inwords.m_Words.Count() == 1 &&
!Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) )
{
sentence[ 0 ] = 0;
}
return sentence;
}
bool CPhonemeExtractorLipSinc::AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords )
{
*ppAnalysis = NULL;
TALKBACK_ANALYSIS_SETTINGS settings;
memset( &settings, 0, sizeof( settings ) );
// Set this field to sizeof(TALKBACK_ANALYSIS_SETTINGS) before using the
// structure.
settings.fSize = sizeof( TALKBACK_ANALYSIS_SETTINGS );
// Default value: 30 (frames per second).
settings.fFrameRate = 100;
// Set this to 1 to optimize for flipbook output, 0 to do analysis normally.
//
// Default value: 0 (normal analysis).
settings.fOptimizeForFlipbook = 0;
// Set this to -1 to seed the random number generator with the current time.
// Any other number will be used directly for the random number seed, which
// is useful if you want repeatable speech gestures. This value does not
// influence lip-synching at all.
//
// Default value: -1 (use current time).
settings.fRandomSeed = -1;
// Path to the configuration (.INI) file with phoneme-to-speech-target
// mapping. Set this to NULL to use the default mapping.
//
// Default value: NULL (use default mapping).
settings.fConfigFile = NULL;
char const *text = ConstructInputSentence( inwords );
Printf( "Analyzing: \"%s\"\n", text[ 0 ] ? text : TEXTLESS_WORDNAME );
TALKBACK_ERR err = talkback->TalkBackGetAnalysis(
ppAnalysis,
wavfile,
text,
&settings );
if ( err != TALKBACK_NOERR )
{
DescribeError( err );
return false;
}
Printf( "Analysis successful...\n" );
return true;
}
typedef struct
{
TALKBACK_PHONEME phoneme;
char const *string;
} TBPHONEMES_t;
static TBPHONEMES_t g_TBPhonemeList[]=
{
{ TALKBACK_PHONEME_IY, "iy" },
{ TALKBACK_PHONEME_IH, "ih" },
{ TALKBACK_PHONEME_EH, "eh" },
{ TALKBACK_PHONEME_EY, "ey" },
{ TALKBACK_PHONEME_AE, "ae" },
{ TALKBACK_PHONEME_AA, "aa" },
{ TALKBACK_PHONEME_AW, "aw" },
{ TALKBACK_PHONEME_AY, "ay" },
{ TALKBACK_PHONEME_AH, "ah" },
{ TALKBACK_PHONEME_AO, "ao" },
{ TALKBACK_PHONEME_OY, "oy" },
{ TALKBACK_PHONEME_OW, "ow" },
{ TALKBACK_PHONEME_UH, "uh" },
{ TALKBACK_PHONEME_UW, "uw" },
{ TALKBACK_PHONEME_ER, "er" },
{ TALKBACK_PHONEME_AX, "ax" },
{ TALKBACK_PHONEME_S, "s" },
{ TALKBACK_PHONEME_SH, "sh" },
{ TALKBACK_PHONEME_Z, "z" },
{ TALKBACK_PHONEME_ZH, "zh" },
{ TALKBACK_PHONEME_F, "f" },
{ TALKBACK_PHONEME_TH, "th" },
{ TALKBACK_PHONEME_V, "v" },
{ TALKBACK_PHONEME_DH, "dh" },
{ TALKBACK_PHONEME_M, "m" },
{ TALKBACK_PHONEME_N, "n" },
{ TALKBACK_PHONEME_NG, "ng" },
{ TALKBACK_PHONEME_L, "l" },
{ TALKBACK_PHONEME_R, "r" },
{ TALKBACK_PHONEME_W, "w" },
{ TALKBACK_PHONEME_Y, "y" },
{ TALKBACK_PHONEME_HH, "hh" },
{ TALKBACK_PHONEME_B, "b" },
{ TALKBACK_PHONEME_D, "d" },
{ TALKBACK_PHONEME_JH, "jh" },
{ TALKBACK_PHONEME_G, "g" },
{ TALKBACK_PHONEME_P, "p" },
{ TALKBACK_PHONEME_T, "t" },
{ TALKBACK_PHONEME_K, "k" },
{ TALKBACK_PHONEME_CH, "ch" },
{ TALKBACK_PHONEME_SIL, "<sil>" },
{ -1, NULL }
};
char const *TBPhonemeToString( TALKBACK_PHONEME phoneme )
{
if ( phoneme < TALKBACK_PHONEME_FIRST || phoneme > TALKBACK_PHONEME_LAST )
{
return "Bogus";
}
TBPHONEMES_t *item = &g_TBPhonemeList[ phoneme ];
return item->string;
}
//-----------------------------------------------------------------------------
// Purpose:
// Input : *analysis -
// time -
// start -
// Output : int
//-----------------------------------------------------------------------------
int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool start )
{
long count;
TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count );
if ( err != TALKBACK_NOERR )
{
DescribeError( err );
return -1;
}
if ( count <= 0L )
return -1;
// Bogus
if ( count >= 100000L )
return -1;
for ( int i = 0; i < (int)count; i++ )
{
TALKBACK_PHONEME tbPhoneme = TALKBACK_PHONEME_INVALID;
err = talkback->TalkBackGetPhonemeEnum( analysis, i, &tbPhoneme );
if ( err != TALKBACK_NOERR )
{
DescribeError( err );
continue;
}
double t;
if ( start )
{
err = talkback->TalkBackGetPhonemeStartTime( analysis, i, &t );
}
else
{
err = talkback->TalkBackGetPhonemeEndTime( analysis, i, &t );
}
if ( err != TALKBACK_NOERR )
{
DescribeError( err );
continue;
}
if ( t == time )
{
return i;
}
}
return -1;
}
//-----------------------------------------------------------------------------
// Purpose:
// Input : *analysis -
// starttime -
// Output : int
//-----------------------------------------------------------------------------
int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime )
{
return GetPhonemeIndexAtWord( analysis, starttime, true );
}
//-----------------------------------------------------------------------------
// Purpose:
// Input : *analysis -
// endtime -
// Output : int
//-----------------------------------------------------------------------------
int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime )
{
return GetPhonemeIndexAtWord( analysis, endtime, false );
}
CPhonemeExtractorLipSinc::CAnalyzedPhoneme *CPhonemeExtractorLipSinc::GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index )
{
static CAnalyzedPhoneme p;
memset( &p, 0, sizeof( p ) );
TALKBACK_PHONEME tb;
TALKBACK_ERR err = talkback->TalkBackGetPhonemeEnum( analysis, index, &tb );
if ( err != TALKBACK_NOERR )
{
DescribeError( err );
return NULL;
}
strcpy( p.phoneme, TBPhonemeToString( tb ) );
err = talkback->TalkBackGetPhonemeStartTime( analysis, index, &p.starttime );
if ( err != TALKBACK_NOERR )
{
DescribeError( err );
return NULL;
}
err = talkback->TalkBackGetPhonemeEndTime( analysis, index, &p.endtime );
if ( err != TALKBACK_NOERR )
{
DescribeError( err );
return NULL;
}
return &p;
}
CPhonemeExtractorLipSinc::CAnalyzedWord *CPhonemeExtractorLipSinc::GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index )
{
static CAnalyzedWord w;
memset( &w, 0, sizeof( w ) );
long chars = sizeof( w.buffer );
TALKBACK_ERR err = talkback->TalkBackGetWord( analysis, index, chars, w.buffer );
if ( err != TALKBACK_NOERR )
{
DescribeError( err );
return NULL;
}
err = talkback->TalkBackGetWordStartTime( analysis, index, &w.starttime );
if ( err != TALKBACK_NOERR )
{
DescribeError( err );
return NULL;
}
err = talkback->TalkBackGetWordEndTime( analysis, index, &w.endtime );
if ( err != TALKBACK_NOERR )
{
DescribeError( err );
return NULL;
}
return &w;
}
//-----------------------------------------------------------------------------
// Purpose:
// Input : *w1 -
// *w2 -
// Output : Returns true on success, false on failure.
//-----------------------------------------------------------------------------
bool FuzzyWordMatch( char const *w1, char const *w2 )
{
int len1 = strlen( w1 );
int len2 = strlen( w2 );
int minlen = min( len1, len2 );
// Found a match
if ( !strnicmp( w1, w2, minlen ) )
return true;
int letterdiff = abs( len1 - len2 );
// More than three letters different, don't bother
if ( letterdiff > 5 )
return false;
// Compute a "delta"
char *p1 = (char *)w1;
char *p2 = (char *)w2;
CUtlVector <char> word1;
CUtlVector <char> word2;
while ( *p1 )
{
if ( V_isalpha( *p1 ) )
{
word1.AddToTail( *p1 );
}
p1++;
}
while ( *p2 )
{
if ( V_isalpha( *p2 ) )
{
word2.AddToTail( *p2 );
}
p2++;
}
int i;
for ( i = 0; i < word1.Size(); i++ )
{
char c = word1[ i ];
// See if c is in word 2, if so subtract it out
int idx = word2.Find( c );
if ( idx != word2.InvalidIndex() )
{
word2.Remove( idx );
}
}
if ( word2.Size() <= letterdiff )
return true;
word2.RemoveAll();
while ( *p2 )
{
if ( V_isalpha( *p2 ) )
{
word2.AddToTail( *p2 );
}
p2++;
}
for ( i = 0; i < word2.Size(); i++ )
{
char c = word2[ i ];
// See if c is in word 2, if so subtract it out
int idx = word1.Find( c );
if ( idx != word1.InvalidIndex() )
{
word1.Remove( idx );
}
}
if ( word1.Size() <= letterdiff )
return true;
return false;
}
//-----------------------------------------------------------------------------
// Purpose: For foreign language stuff, if inwords is empty, process anyway...
// Input : *analysis -
// outwords -
//-----------------------------------------------------------------------------
void CPhonemeExtractorLipSinc::ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords )
{
long count;
TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count );
if ( err != TALKBACK_NOERR )
{
DescribeError( err );
return;
}
CWordTag *newWord = new CWordTag;
newWord->SetWord( TEXTLESS_WORDNAME );
float starttime = 0.0f;
float endtime = 1.0f;
for ( int i = 0; i < count; ++i )
{
// Get phoneme and timing info
CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, i );
if ( !ph )
continue;
CPhonemeTag *ptag = new CPhonemeTag;
if ( i == 0 || ( ph->starttime < starttime ) )
{
starttime = ph->starttime;
}
if ( i == 0 || ( ph->endtime > endtime ) )
{
endtime = ph->endtime;
}
ptag->SetStartTime( ph->starttime );
ptag->SetEndTime( ph->endtime );
ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime );
ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime );
ptag->SetTag( ph->phoneme );
ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) );
newWord->m_Phonemes.AddToTail( ptag );
}
newWord->m_flStartTime = starttime;
newWord->m_flEndTime = endtime;
newWord->m_uiStartByte = ComputeByteFromTime( starttime );
newWord->m_uiEndByte = ComputeByteFromTime( endtime );
outwords.Reset();
outwords.AddWordTag( newWord );
outwords.SetTextFromWords();
}
//-----------------------------------------------------------------------------
// Purpose:
// Input : *analysis -
// inwords -
// outwords -
//-----------------------------------------------------------------------------
void CPhonemeExtractorLipSinc::ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords )
{
long count;
TALKBACK_ERR err = talkback->TalkBackGetNumWords( analysis, &count );
if ( err != TALKBACK_NOERR )
{
DescribeError( err );
return;
}
if ( count <= 0L )
{
if ( inwords.m_Words.Count() == 0 ||
!Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) )
{
ProcessWordsTextless( analysis, outwords );
}
return;
}
// Bogus
if ( count >= 100000L )
return;
int inwordpos = 0;
int awordpos = 0;
outwords.Reset();
char previous[ 256 ];
previous[ 0 ] = 0;
while ( inwordpos < inwords.m_Words.Size() )
{
CWordTag *in = inwords.m_Words[ inwordpos ];
if ( awordpos >= count )
{
// Just copy the rest over without phonemes
CWordTag *copy = new CWordTag( *in );
outwords.AddWordTag( copy );
inwordpos++;
continue;
}
// Should never fail
CAnalyzedWord *w = GetAnalyzedWord( analysis, awordpos );
if ( !w )
{
return;
}
if ( !stricmp( w->buffer, "<SIL>" ) )
{
awordpos++;
continue;
}
char const *check = ApplyTBWordRules( in->GetWord() );
if ( !FuzzyWordMatch( check, w->buffer ) )
{
bool advance_input = true;
if ( previous[ 0 ] )
{
if ( FuzzyWordMatch( previous, w->buffer ) )
{
advance_input = false;
}
}
if ( advance_input )
{
inwordpos++;
}
awordpos++;
continue;
}
strcpy( previous, check );
CWordTag *newWord = new CWordTag;
newWord->SetWord( in->GetWord() );
newWord->m_flStartTime = w->starttime;
newWord->m_flEndTime = w->endtime;
newWord->m_uiStartByte = ComputeByteFromTime( w->starttime );
newWord->m_uiEndByte = ComputeByteFromTime( w->endtime );
int phonemestart, phonemeend;
phonemestart = GetPhonemeIndexAtWordStart( analysis, w->starttime );
phonemeend = GetPhonemeIndexAtWordEnd( analysis, w->endtime );
if ( phonemestart >= 0 && phonemeend >= 0 )
{
for ( ; phonemestart <= phonemeend; phonemestart++ )
{
// Get phoneme and timing info
CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, phonemestart );
if ( !ph )
continue;
CPhonemeTag *ptag = new CPhonemeTag;
ptag->SetStartTime( ph->starttime );
ptag->SetEndTime( ph->endtime );
ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime );
ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime );
ptag->SetTag( ph->phoneme );
ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) );
newWord->m_Phonemes.AddToTail( ptag );
}
}
outwords.AddWordTag( newWord );
inwordpos++;
awordpos++;
}
}
char const *CPhonemeExtractorLipSinc::ApplyTBWordRules( char const *word )
{
static char outword[ 256 ];
char const *in = word;
char *out = outword;
while ( *in && ( ( out - outword ) <= 255 ) )
{
if ( *in == '\t' ||
*in == ' ' ||
*in == '\n' ||
*in == '-' ||
*in == '.' ||
*in == ',' ||
*in == ';' ||
*in == '?' ||
*in == '"' ||
*in == ':' ||
*in == '(' ||
*in == ')' )
{
in++;
*out++ = ' ';
continue;
}
if ( !V_isprint( *in ) )
{
in++;
continue;
}
if ( *in >= 128 )
{
in++;
continue;
}
// Skip numbers
if ( *in >= '0' && *in <= '9' )
{
in++;
continue;
}
// Convert all letters to upper case
if ( *in >= 'a' && *in <= 'z' )
{
*out++ = ( *in++ ) - 'a' + 'A';
continue;
}
if ( *in >= 'A' && *in <= 'Z' )
{
*out++ = *in++;
continue;
}
if ( *in == '\'' )
{
*out++ = *in++;
continue;
}
in++;
}
*out = 0;
return outword;
}
//-----------------------------------------------------------------------------
// Purpose: Given a wavfile and a list of inwords, determines the word/phonene
// sample counts for the sentce
// Output : SR_RESULT
//-----------------------------------------------------------------------------
SR_RESULT CPhonemeExtractorLipSinc::Extract(
const char *wavfile,
int numsamples,
void (*pfnPrint)( const char *fmt, ... ),
CSentence& inwords,
CSentence& outwords )
{
// g_enableTalkBackDebuggingOutput = 1;
m_pfnPrint = pfnPrint;
if ( !InitLipSinc() )
{
return SR_RESULT_ERROR;
}
m_flSampleCount = numsamples;
if ( !CheckSoundFile( wavfile ) )
{
FreeLibrary( m_hHelper );
return SR_RESULT_ERROR;
}
TALKBACK_ANALYSIS *analysis = NULL;
if ( !AttemptAnalysis( &analysis, wavfile, inwords ) )
{
FreeLibrary( m_hHelper );
return SR_RESULT_FAILED;
}
if ( strlen( inwords.GetText() ) <= 0 )
{
inwords.SetTextFromWords();
}
outwords = inwords;
// Examine data
ProcessWords( analysis, inwords, outwords );
if ( analysis )
{
talkback->TalkBackFreeAnalysis( &analysis );
}
return SR_RESULT_SUCCESS;
}
EXPOSE_SINGLE_INTERFACE( CPhonemeExtractorLipSinc, IPhonemeExtractor, VPHONEME_EXTRACTOR_INTERFACE );