141 lines
3.8 KiB
C++
141 lines
3.8 KiB
C++
//====== Copyright © 1996-2006, Valve Corporation, All rights reserved. =======//
|
|
//
|
|
// Purpose: Provide a class (SSE/SIMD only) holding a 2d matrix of class FourVectors,
|
|
// for high speed processing in tools.
|
|
//
|
|
// $NoKeywords: $
|
|
//
|
|
//=============================================================================//
|
|
|
|
#include "basetypes.h"
|
|
#include "mathlib/mathlib.h"
|
|
#include "mathlib/simdvectormatrix.h"
|
|
#include "mathlib/ssemath.h"
|
|
#include "tier0/dbg.h"
|
|
|
|
// NOTE: This has to be the last file included!
|
|
#include "tier0/memdbgon.h"
|
|
|
|
|
|
void CSIMDVectorMatrix::CreateFromCSOAAttributes( CSOAContainer const *pSrc,
|
|
int nAttrIdx0, int nAttrIdx1, int nAttrIdx2 )
|
|
{
|
|
SetSize( pSrc->NumCols(), pSrc->NumRows() );
|
|
|
|
FourVectors *p_write_ptr = m_pData;
|
|
int n_vectors_per_source_line = pSrc->NumQuadsPerRow();
|
|
for( int y = 0; y < pSrc->NumRows(); y++ )
|
|
{
|
|
fltx4 const * data_in0 = reinterpret_cast<fltx4 const *>( pSrc->ConstRowPtr( nAttrIdx0, y ) );
|
|
fltx4 const * data_in1 = reinterpret_cast<fltx4 const *>( pSrc->ConstRowPtr( nAttrIdx1, y ) );
|
|
fltx4 const * data_in2 = reinterpret_cast<fltx4 const *>( pSrc->ConstRowPtr( nAttrIdx2, y ) );
|
|
|
|
fltx4 *data_out = reinterpret_cast < fltx4 *> ( p_write_ptr );
|
|
// copy full input blocks
|
|
for( int x = 0; x < n_vectors_per_source_line; x++ )
|
|
{
|
|
*(data_out++) = (* data_in0++ );
|
|
*(data_out++) = (* data_in1++ );
|
|
*(data_out++) = (* data_in2++ );
|
|
}
|
|
// advance ptrs to next line
|
|
p_write_ptr += m_nPaddedWidth;
|
|
}
|
|
}
|
|
|
|
void CSIMDVectorMatrix::CreateFromRGBA_FloatImageData( int srcwidth, int srcheight,
|
|
float const * srcdata )
|
|
{
|
|
Assert( srcwidth && srcheight && srcdata );
|
|
SetSize( srcwidth, srcheight );
|
|
|
|
FourVectors * p_write_ptr = m_pData;
|
|
int n_vectors_per_source_line = ( srcwidth >> 2 );
|
|
int ntrailing_pixels_per_source_line = ( srcwidth & 3 );
|
|
for( int y = 0; y < srcheight; y++ )
|
|
{
|
|
float const * data_in = srcdata;
|
|
float * data_out = reinterpret_cast < float *> ( p_write_ptr );
|
|
// copy full input blocks
|
|
for( int x = 0; x < n_vectors_per_source_line; x++ )
|
|
{
|
|
for( int c = 0; c < 3; c++ )
|
|
{
|
|
data_out[0]= data_in[c]; // x0
|
|
data_out[1]= data_in[4 + c]; // x1
|
|
data_out[2]= data_in[8 + c]; // x2
|
|
data_out[3]= data_in[12 + c]; // x3
|
|
data_out += 4;
|
|
}
|
|
data_in += 16;
|
|
}
|
|
// now, copy trailing data and pad with copies
|
|
if ( ntrailing_pixels_per_source_line )
|
|
{
|
|
for( int c = 0; c < 3; c++ )
|
|
{
|
|
for( int cp = 0; cp < 4; cp++ )
|
|
{
|
|
int real_cp = MIN( cp, ntrailing_pixels_per_source_line - 1 );
|
|
data_out[4 * c + cp]= data_in[c + 4 * real_cp];
|
|
}
|
|
}
|
|
}
|
|
// advance ptrs to next line
|
|
p_write_ptr += m_nPaddedWidth;
|
|
srcdata += 4 * srcwidth;
|
|
}
|
|
}
|
|
|
|
void CSIMDVectorMatrix::RaiseToPower( float power )
|
|
{
|
|
int nv = NVectors();
|
|
if ( nv )
|
|
{
|
|
int fixed_point_exp = ( int ) ( 4.0 * power );
|
|
FourVectors * src = m_pData;
|
|
do
|
|
{
|
|
src->x = Pow_FixedPoint_Exponent_SIMD( src->x, fixed_point_exp );
|
|
src->y = Pow_FixedPoint_Exponent_SIMD( src->y, fixed_point_exp );
|
|
src->z = Pow_FixedPoint_Exponent_SIMD( src->z, fixed_point_exp );
|
|
src++;
|
|
} while (-- nv );
|
|
}
|
|
}
|
|
|
|
CSIMDVectorMatrix & CSIMDVectorMatrix::operator += ( CSIMDVectorMatrix const & src )
|
|
{
|
|
Assert( m_nWidth == src.m_nWidth );
|
|
Assert( m_nHeight == src.m_nHeight );
|
|
int nv = NVectors();
|
|
if ( nv )
|
|
{
|
|
FourVectors * srcv = src.m_pData;
|
|
FourVectors * destv = m_pData;
|
|
do // !! speed !! inline more iters
|
|
{
|
|
* ( destv++ ) += * ( srcv++ );
|
|
} while (-- nv );
|
|
}
|
|
return * this;
|
|
}
|
|
|
|
CSIMDVectorMatrix & CSIMDVectorMatrix::operator *= ( Vector const & src )
|
|
{
|
|
int nv = NVectors();
|
|
if ( nv )
|
|
{
|
|
FourVectors scalevalue;
|
|
scalevalue.DuplicateVector( src );
|
|
FourVectors * destv = m_pData;
|
|
do // !! speed !! inline more iters
|
|
{
|
|
destv->VProduct( scalevalue );
|
|
destv++;
|
|
} while (-- nv );
|
|
}
|
|
return * this;
|
|
}
|
|
|