csgo-2018-source/mathlib/simdvectormatrix.cpp
2021-07-24 21:11:47 -07:00

141 lines
3.8 KiB
C++

//====== Copyright © 1996-2006, Valve Corporation, All rights reserved. =======//
//
// Purpose: Provide a class (SSE/SIMD only) holding a 2d matrix of class FourVectors,
// for high speed processing in tools.
//
// $NoKeywords: $
//
//=============================================================================//
#include "basetypes.h"
#include "mathlib/mathlib.h"
#include "mathlib/simdvectormatrix.h"
#include "mathlib/ssemath.h"
#include "tier0/dbg.h"
// NOTE: This has to be the last file included!
#include "tier0/memdbgon.h"
void CSIMDVectorMatrix::CreateFromCSOAAttributes( CSOAContainer const *pSrc,
int nAttrIdx0, int nAttrIdx1, int nAttrIdx2 )
{
SetSize( pSrc->NumCols(), pSrc->NumRows() );
FourVectors *p_write_ptr = m_pData;
int n_vectors_per_source_line = pSrc->NumQuadsPerRow();
for( int y = 0; y < pSrc->NumRows(); y++ )
{
fltx4 const * data_in0 = reinterpret_cast<fltx4 const *>( pSrc->ConstRowPtr( nAttrIdx0, y ) );
fltx4 const * data_in1 = reinterpret_cast<fltx4 const *>( pSrc->ConstRowPtr( nAttrIdx1, y ) );
fltx4 const * data_in2 = reinterpret_cast<fltx4 const *>( pSrc->ConstRowPtr( nAttrIdx2, y ) );
fltx4 *data_out = reinterpret_cast < fltx4 *> ( p_write_ptr );
// copy full input blocks
for( int x = 0; x < n_vectors_per_source_line; x++ )
{
*(data_out++) = (* data_in0++ );
*(data_out++) = (* data_in1++ );
*(data_out++) = (* data_in2++ );
}
// advance ptrs to next line
p_write_ptr += m_nPaddedWidth;
}
}
void CSIMDVectorMatrix::CreateFromRGBA_FloatImageData( int srcwidth, int srcheight,
float const * srcdata )
{
Assert( srcwidth && srcheight && srcdata );
SetSize( srcwidth, srcheight );
FourVectors * p_write_ptr = m_pData;
int n_vectors_per_source_line = ( srcwidth >> 2 );
int ntrailing_pixels_per_source_line = ( srcwidth & 3 );
for( int y = 0; y < srcheight; y++ )
{
float const * data_in = srcdata;
float * data_out = reinterpret_cast < float *> ( p_write_ptr );
// copy full input blocks
for( int x = 0; x < n_vectors_per_source_line; x++ )
{
for( int c = 0; c < 3; c++ )
{
data_out[0]= data_in[c]; // x0
data_out[1]= data_in[4 + c]; // x1
data_out[2]= data_in[8 + c]; // x2
data_out[3]= data_in[12 + c]; // x3
data_out += 4;
}
data_in += 16;
}
// now, copy trailing data and pad with copies
if ( ntrailing_pixels_per_source_line )
{
for( int c = 0; c < 3; c++ )
{
for( int cp = 0; cp < 4; cp++ )
{
int real_cp = MIN( cp, ntrailing_pixels_per_source_line - 1 );
data_out[4 * c + cp]= data_in[c + 4 * real_cp];
}
}
}
// advance ptrs to next line
p_write_ptr += m_nPaddedWidth;
srcdata += 4 * srcwidth;
}
}
void CSIMDVectorMatrix::RaiseToPower( float power )
{
int nv = NVectors();
if ( nv )
{
int fixed_point_exp = ( int ) ( 4.0 * power );
FourVectors * src = m_pData;
do
{
src->x = Pow_FixedPoint_Exponent_SIMD( src->x, fixed_point_exp );
src->y = Pow_FixedPoint_Exponent_SIMD( src->y, fixed_point_exp );
src->z = Pow_FixedPoint_Exponent_SIMD( src->z, fixed_point_exp );
src++;
} while (-- nv );
}
}
CSIMDVectorMatrix & CSIMDVectorMatrix::operator += ( CSIMDVectorMatrix const & src )
{
Assert( m_nWidth == src.m_nWidth );
Assert( m_nHeight == src.m_nHeight );
int nv = NVectors();
if ( nv )
{
FourVectors * srcv = src.m_pData;
FourVectors * destv = m_pData;
do // !! speed !! inline more iters
{
* ( destv++ ) += * ( srcv++ );
} while (-- nv );
}
return * this;
}
CSIMDVectorMatrix & CSIMDVectorMatrix::operator *= ( Vector const & src )
{
int nv = NVectors();
if ( nv )
{
FourVectors scalevalue;
scalevalue.DuplicateVector( src );
FourVectors * destv = m_pData;
do // !! speed !! inline more iters
{
destv->VProduct( scalevalue );
destv++;
} while (-- nv );
}
return * this;
}