2010-07-22 01:46:14 -05:00
//====== Copyright <20> 1996-2007, Valve Corporation, All rights reserved. =======//
//
// Purpose:
//
// $NoKeywords: $
//
// A Fixed-allocation class for maintaining a 1d or 2d or 3d array of data in a structure-of-arrays
// (SOA) sse-friendly manner.
// =============================================================================//
# ifndef UTLSOACONTAINER_H
# define UTLSOACONTAINER_H
# ifdef _WIN32
# pragma once
# endif
# include "tier0/platform.h"
# include "tier0/dbg.h"
# include "tier0/threadtools.h"
# include "tier1/utlmemory.h"
# include "tier1/utlblockmemory.h"
# include "mathlib/ssemath.h"
// strided pointers. gives you a class that acts like a pointer, but the ++ and += operators do the
// right thing
template < class T > class CStridedPtr
{
protected :
T * m_pData ;
size_t m_nStride ;
public :
FORCEINLINE CStridedPtr < T > ( void * pData , size_t nByteStride )
{
m_pData = reinterpret_cast < T * > ( pData ) ;
m_nStride = nByteStride / sizeof ( T ) ;
}
FORCEINLINE CStridedPtr < T > ( void ) { }
T * operator - > ( void ) const
{
return m_pData ;
}
T & operator * ( void ) const
{
return * m_pData ;
}
FORCEINLINE operator T * ( void )
{
return m_pData ;
}
FORCEINLINE CStridedPtr < T > & operator + + ( void )
{
m_pData + = m_nStride ;
return * this ;
}
FORCEINLINE void operator + = ( size_t nNumElements )
{
m_pData + = nNumElements * m_nStride ;
}
FORCEINLINE size_t Stride ( void ) const
{
return m_nStride ;
}
} ;
template < class T > class CStridedConstPtr
{
protected :
const T * m_pData ;
size_t m_nStride ;
public :
FORCEINLINE CStridedConstPtr < T > ( void const * pData , size_t nByteStride )
{
m_pData = reinterpret_cast < T const * > ( pData ) ;
m_nStride = nByteStride / sizeof ( T ) ;
}
FORCEINLINE CStridedConstPtr < T > ( void ) { }
const T * operator - > ( void ) const
{
return m_pData ;
}
const T & operator * ( void ) const
{
return * m_pData ;
}
FORCEINLINE operator const T * ( void ) const
{
return m_pData ;
}
FORCEINLINE CStridedConstPtr < T > & operator + + ( void )
{
m_pData + = m_nStride ;
return * this ;
}
FORCEINLINE void operator + = ( size_t nNumElements )
{
m_pData + = nNumElements * m_nStride ;
}
FORCEINLINE size_t Stride ( void ) const
{
return m_nStride ;
}
} ;
// allowed field data types. if you change these values, you need to change the tables in the .cpp file
enum EAttributeDataType
{
ATTRDATATYPE_NONE = - 1 , // pad and varargs ender
ATTRDATATYPE_FLOAT = 0 , // a float attribute
ATTRDATATYPE_4V , // vector data type, stored as class FourVectors
ATTRDATATYPE_INT , // integer. not especially sse-able on all architectures.
ATTRDATATYPE_POINTER , // a pointer.
ATTRDATATYPE_COUNT ,
} ;
# define MAX_SOA_FIELDS 32
class KMeansQuantizedValue ;
class IKMeansErrorMetric ;
typedef fltx4 ( * UNARYSIMDFUNCTION ) ( fltx4 const & ) ;
typedef fltx4 ( * BINARYSIMDFUNCTION ) ( fltx4 const & , fltx4 const & ) ;
class CSOAAttributeReference ;
/// mode of threading for a container. Normalyy automatically set based upon dimensions, but
/// controllable via SetThreadMode.
enum SOAThreadMode_t
{
SOATHREADMODE_NONE = 0 ,
SOATHREADMODE_BYROWS = 1 ,
SOATHREADMODE_BYSLICES = 2 ,
SOATHREADMODE_BYROWS_AND_SLICES = 3 ,
SOATHREADMODE_AUTO = - 1 , // compute based upon dimensions
} ;
class CSOAContainer
{
friend class CSOAAttributeReference ;
public :
// Constructor, destructor
CSOAContainer ( void ) ; // an empty one with no attributes
CSOAContainer ( int nCols , int nRows , int nSlices , . . . ) ;
~ CSOAContainer ( void ) ;
// !!!!! UPDATE SERIALIZATION CODE WHENEVER THE STRUCTURE OF CSOAContainer CHANGES !!!!!
// To avoid dependency on datamodel, serialization is implemented in utlsoacontainer_serialization.cpp, in dmxloader.lib
//bool Serialize( CDmxElement *pRootElement );
//bool Unserialize( const CDmxElement *pRootElement );
// Set the data type for an attribute. If you set the data type, but tell it not to allocate,
// the data type will be set but writes will assert, and reads will give you back zeros. if
// AllocateData hasn't been called yet, this will set up for AllocateData to reserve space for
// this attribute. If you have already called AllocateData, but wish to add an attribute, you
// can also use this, which will result in separate memory being allocated for this attribute.
void SetAttributeType ( int nAttrIdx , EAttributeDataType nDataType , bool bAllocateMemory = true ) ;
EAttributeDataType GetAttributeType ( int nAttrIdx ) const ;
// Set the attribute type for a field, if that field is not already present (potentially
// allocating memory). You can use this, for instance, to make sure an already loaded image has
// an alpha channel.
void EnsureDataType ( int nAttrIdx , EAttributeDataType nDataType ) ;
// set back to un-initted state, freeing memory
void Purge ( void ) ;
// Allocate, purge data
void AllocateData ( int nNCols , int nNRows , int nSlices = 1 ) ; // actually allocate the memory and set the pointers up
void PurgeData ( void ) ;
// Did the container allocate memory for this attribute?
bool HasAllocatedMemory ( int nAttrIdx ) const ;
// easy constructor for 2d using varargs. call like
// #define ATTR_RED 0
// #define ATTR_GREEN 1
// #define ATTR_BLUE 2
// CSOAContainer myimage( 256, 256, ATTR_RED, ATTRDATATYPE_FLOAT, ATTR_GREEN, ATTRDATATYPE_FLOAT,
// ATTR_BLUE, ATTRDATATYPE_FLOAT, -1 );
int NumCols ( void ) const ;
int NumRows ( void ) const ;
int NumSlices ( void ) const ;
void AssertDataType ( int nAttrIdx , EAttributeDataType nDataType ) const ;
// # of groups of 4 elements per row
int NumQuadsPerRow ( void ) const ;
int Count ( void ) const ; // for 1d data
int NumElements ( void ) const ;
// how much to step to go from the end of one row to the start of the next one. Basically, how
// many bytes to add at the end of a row when iterating over the whole 2d array with ++
size_t RowToRowStep ( int nAttrIdx ) const ;
template < class T > T * RowPtr ( int nAttributeIdx , int nRowNumber , int nSliceNumber = 0 ) const ;
void const * ConstRowPtr ( int nAttributeIdx , int nRowNumber , int nSliceNumber = 0 ) const ;
template < class T > T * ElementPointer ( int nAttributeIdx , int nX = 0 , int nY = 0 , int nZ = 0 ) const ;
FourVectors * ElementPointer4V ( int nAttributeIdx , int nX = 0 , int nY = 0 , int nZ = 0 ) const ;
size_t ItemByteStride ( int nAttributeIdx ) const ;
FORCEINLINE float & FloatValue ( int nAttrIdx , int nX , int nY , int nZ ) const
{
AssertDataType ( nAttrIdx , ATTRDATATYPE_FLOAT ) ;
return RowPtr < float > ( nAttrIdx , nY , nZ ) [ nX ] ;
}
// return a reference to an attribute, which can have operations performed on it. For instance,
// this is valid code to zero out the red component of a whole image:
// myImage[FBM_ATTR_RED] = 0.;
CSOAAttributeReference operator [ ] ( int nAttrIdx ) ;
// this is just an alias for readbaility w/ ptrs. instead of (*p)[FBM_ATTR_RED], you can do p->Attr( FBM_ATTR_RED );
FORCEINLINE CSOAAttributeReference Attr ( int nAttrIdx ) ;
// copy the attribute data from another soacontainer. must be compatible geometry.
void CopyAttrFrom ( CSOAContainer const & other , int nDestAttributeIdx , int nSrcAttributeIndex = - 1 ) ;
// copy the attribute data from another attribute. must be compatible data format
void CopyAttrToAttr ( int nSrcAttributeIndex , int nDestAttributeIndex ) ;
// copy a subvolume of attribute data from one container to another.
void CopyRegionFrom ( CSOAContainer const & src , int nSrcAttr , int nDestAttr ,
int nSrcMinX , int nSrcMaxX , int nSrcMinY , int nSrcMaxY , int nSrcMinZ , int nSrcMaxZ ,
int nDestX , int nDestY , int nDestZ ) ;
// copy all fields from a region of src to this.
void CopyRegionFrom ( CSOAContainer const & src ,
int nSrcMinX , int nSrcMaxX , int nSrcMinY , int nSrcMaxY , int nSrcMinZ , int nSrcMaxZ ,
int nDestX , int nDestY , int nDestZ ) ;
// move all the data from one csoacontainer to another, leaving the source empty. this is just
// a pointer copy.
FORCEINLINE void MoveDataFrom ( CSOAContainer other ) ;
// arithmetic and data filling functions. All SIMD and hopefully fast
/// set all elements of a float attribute to random #s
void RandomizeAttribute ( int nAttr , float flMin , float flMax ) const ;
/// this.attr = vec
void FillAttr ( int nAttr , Vector const & vecValue ) ;
/// this.attr = float
void FillAttr ( int nAttr , float flValue ) ;
/// this.nDestAttr *= src.nSrcAttr
void MulAttr ( CSOAContainer const & src , int nSrcAttr , int nDestAttr ) ;
/// Returns the result of repeatedly combining attr values with the initial value using the specified function.
/// For instance, SumAttributeValue is just ReduceAttr<AddSIMD>( attr, FOUR_ZEROS );
template < BINARYSIMDFUNCTION fn > float ReduceAttr ( int nSrcAttr , fltx4 const & fl4InitialValue ) const ;
template < BINARYSIMDFUNCTION fn > void ApplyBinaryFunctionToAttr ( int nDestAttr , fltx4 const & flFnArg1 ) ;
/// this.attr = fn1( fn2( attr, arg2 ), arg1 )
template < BINARYSIMDFUNCTION fn1 , BINARYSIMDFUNCTION fn2 > void ApplyTwoComposedBinaryFunctionsToAttr ( int nDestAttr , fltx4 const & flFnArg1 , fltx4 const & flFnArg2 ) ;
/// this.nDestAttr *= flValue
void MulAttr ( int nDestAttr , float flScale )
{
ApplyBinaryFunctionToAttr < MulSIMD > ( nDestAttr , ReplicateX4 ( flScale ) ) ;
}
void AddToAttr ( int nDestAttr , float flAddend )
{
ApplyBinaryFunctionToAttr < AddSIMD > ( nDestAttr , ReplicateX4 ( flAddend ) ) ;
}
// this.attr = max( this.attr, flminvalue )
void MaxAttr ( int nDestAttr , float flMinValue )
{
ApplyBinaryFunctionToAttr < MaxSIMD > ( nDestAttr , ReplicateX4 ( flMinValue ) ) ;
}
/// this.attr = min( this.attr, flminvalue )
void MinAttr ( int nDestAttr , float flMaxValue )
{
ApplyBinaryFunctionToAttr < MinSIMD > ( nDestAttr , ReplicateX4 ( flMaxValue ) ) ;
}
void ClampAttr ( int nDestAttr , float flMinValue , float flMaxValue )
{
ApplyTwoComposedBinaryFunctionsToAttr < MinSIMD , MaxSIMD > ( nDestAttr , ReplicateX4 ( flMaxValue ) , ReplicateX4 ( flMinValue ) ) ;
}
/// this.attr = normalize( this.attr )
void NormalizeAttr ( int nAttr ) ;
/// fill 2d a rectangle with values interpolated from 4 corner values.
void FillAttrWithInterpolatedValues ( int nAttr , float flValue00 , float flValue10 , float flValue01 , float flValue11 ) const ;
void FillAttrWithInterpolatedValues ( int nAttr , Vector flValue00 , Vector flValue10 ,
Vector const & flValue01 , Vector const & flValue11 ) const ;
/// grab 3 scalar attributes from one csoaa and fill in a fourvector attr in.
void PackScalarAttributesToVectorAttribute ( CSOAContainer * pInput ,
int nVecAttributeOut ,
int nScalarAttributeX ,
int nScalarAttributeY ,
int nScalarAttributeZ ) ;
/// grab the 3 components of a vector attribute and store in 3 scalar attributes.
void UnPackVectorAttributeToScalarAttributes ( CSOAContainer * pInput ,
int nVecAttributeIn ,
int nScalarAttributeX ,
int nScalarAttributeY ,
int nScalarAttributeZ ) ;
/// this.attrout = src.attrin * vec (component by component )
void MultiplyVectorAttribute ( CSOAContainer * pInput , int nAttributeIn , Vector const & vecScalar , int nAttributeOut ) ;
/// Given an soa container of a different dimension, resize one attribute from it to fit this
/// table's geometry. point sampling only
void ResampleAttribute ( CSOAContainer & pInput , int nAttr ) ;
/// sum of all floats in an attribute
float SumAttributeValue ( int nAttr ) const ;
/// sum(attr) / ( w * h * d )
float AverageFloatAttributeValue ( int nAttr ) const ;
/// maximum float value in a float attr
float MaxAttributeValue ( int nAttr ) const ;
/// minimum float value in a float attr
float MinAttributeValue ( int nAttr ) const ;
/// scalartargetattribute += w*exp( vecdir dot ndirection)
void AddGaussianSRBF ( float flWeight , Vector vecDir , int nDirectionAttribute , int nScalarTargetAttribute ) ;
/// vec3targetattribute += w*exp( vecdir dot ndirection)
void AddGaussianSRBF ( Vector vecWeight , Vector vecDir , int nDirectionAttribute ,
int nVectorTargetAttribute ) ;
/// find the largest value of a vector attribute
void FindLargestMagnitudeVector ( int nAttr , int * nx , int * ny , int * nz ) ;
void KMeansQuantization ( int const * pFieldIndices , int nNumFields ,
KMeansQuantizedValue * pOutValues ,
int nNumResultsDesired , IKMeansErrorMetric * pErrorCalculator ,
int nFieldToStoreIndexInto , int nNumIterations ,
int nChannelToReceiveErrorSignal = - 1 ) ;
// Calculate the signed distance, in voxels, between all voxels and a surface boundary defined
// by nSrcField being >0. Voxels with nSrcField <0 will end up with negative distances. Voxels
// with nSrcField == 0 will get 0, and nSrcField >0 will yield positive distances. Note the
// min/max x/y/z fields don't reflect the range to be written, but rather represent the bounds
// of updated voxels that you want your distance field modified to take into account. This
// volume will be bloated based upon the nMaxDistance parameter and simd padding. A
// brute-force algorithm is used, but it is threaded and simd'd. Large "nMaxDistance" values
// applied to large images can take a long time, as the execution time per output pixel is
// proportional to maxdistance^2. The rect argument, if passed, will be modified to be the
// entire rectangle modified by the operation.
void GenerateDistanceField ( int nSrcField , int nDestField ,
int nMaxDistance ,
Rect3D_t * pRect = NULL ) ;
void SetThreadMode ( SOAThreadMode_t eThreadMode ) ;
protected :
int m_nColumns ; // # of rows and columns created with
int m_nRows ;
int m_nSlices ;
int m_nPaddedColumns ; // # of columns rounded up for sse
int m_nNumQuadsPerRow ; // # of groups of 4 elements per row
uint8 * m_pDataMemory ; // the actual data memory
uint8 * m_pAttributePtrs [ MAX_SOA_FIELDS ] ;
EAttributeDataType m_nDataType [ MAX_SOA_FIELDS ] ;
size_t m_nStrideInBytes [ MAX_SOA_FIELDS ] ; // stride from one field datum to another
size_t m_nRowStrideInBytes [ MAX_SOA_FIELDS ] ; // stride from one row datum to another per field
size_t m_nSliceStrideInBytes [ MAX_SOA_FIELDS ] ; // stride from one slice datum to another per field
uint32 m_nFieldPresentMask ;
uint8 * m_pConstantDataMemory ;
uint8 * m_pSeparateDataMemory [ MAX_SOA_FIELDS ] ; // for fields allocated separately from the main allocation
SOAThreadMode_t m_eThreadMode ; // set thread mode
FORCEINLINE void Init ( void )
{
memset ( m_nDataType , 0xff , sizeof ( m_nDataType ) ) ;
memset ( m_pSeparateDataMemory , 0 , sizeof ( m_pSeparateDataMemory ) ) ;
# ifdef _DEBUG
memset ( m_pAttributePtrs , 0xFF , sizeof ( m_pAttributePtrs ) ) ;
memset ( m_nStrideInBytes , 0xFF , sizeof ( m_nStrideInBytes ) ) ;
memset ( m_nRowStrideInBytes , 0xFF , sizeof ( m_nRowStrideInBytes ) ) ;
memset ( m_nSliceStrideInBytes , 0xFF , sizeof ( m_nSliceStrideInBytes ) ) ;
# endif
m_pConstantDataMemory = NULL ;
m_pDataMemory = 0 ;
m_nNumQuadsPerRow = 0 ;
m_nColumns = m_nPaddedColumns = m_nRows = m_nSlices = 0 ;
m_nFieldPresentMask = 0 ;
m_eThreadMode = SOATHREADMODE_NONE ;
}
void UpdateDistanceRow ( int nSearchRadius , int nMinX , int nMaxX , int nY , int nZ ,
int nSrcField , int nDestField ) ;
// parallel helper functions. These do the work, and all take a row/column range as their first arguments.
void CopyAttrFromPartial ( int nStartRow , int nNumRows , int nStartSlice , int nEndSlice , CSOAContainer const * pOther , int nDestAttributeIndex , int nSrcAttributeIndex ) ;
void FillAttrPartial ( int nStartRow , int nNumRows , int nStartSlice , int nEndSlice , int nAttr , fltx4 fl4Value ) ;
// Allocation utility funcs (NOTE: all allocs are multiples of 16, and are aligned allocs)
size_t DataMemorySize ( void ) const ; // total bytes of data memory to allocate at m_pDataMemory (if all attributes were allocated in a single block)
size_t ConstantMemorySize ( void ) const ; // total bytes of constant memory to allocate at m_pConstantDataMemory (if all constant attributes were allocated in a single block)
size_t AttributeMemorySize ( int nAttrIndex ) const ; // total bytes of data memory allocated to a single attribute (constant or otherwise)
void AllocateDataMemory ( void ) ;
void AllocateConstantMemory ( void ) ;
} ;
class CSOAAttributeReference ;
// define binary op class to allow this construct without temps:
// dest( FBM_ATTR_RED ) = src( FBM_ATTR_BLUE ) + src( FBM_ATTR_GREEN )
2012-05-21 02:49:35 -05:00
template < BINARYSIMDFUNCTION fn , class Ref > class CSOAAttributeReferenceBinaryOp
2010-07-22 01:46:14 -05:00
{
public :
2012-05-21 02:49:35 -05:00
Ref m_opA ;
Ref m_opB ;
2010-07-22 01:46:14 -05:00
2012-05-21 02:49:35 -05:00
CSOAAttributeReferenceBinaryOp ( Ref const & a , Ref const & b )
2010-07-22 01:46:14 -05:00
{
a . CopyTo ( m_opA ) ;
b . CopyTo ( m_opB ) ;
}
} ;
# define DEFINE_OP( opname, fnname ) \
2012-05-21 02:49:35 -05:00
FORCEINLINE CSOAAttributeReferenceBinaryOp < fnname , CSOAAttributeReference > operator opname ( CSOAAttributeReference const & other ) const \
2010-07-22 01:46:14 -05:00
{ \
2012-05-21 02:49:35 -05:00
return CSOAAttributeReferenceBinaryOp < fnname , CSOAAttributeReference > ( * this , other ) ; \
2010-07-22 01:46:14 -05:00
}
class CSOAAttributeReference
{
friend class CSOAContainer ;
class CSOAContainer * m_pContainer ;
int m_nAttributeID ;
public :
FORCEINLINE void operator * = ( float flScale ) const
{
m_pContainer - > MulAttr ( m_nAttributeID , flScale ) ;
}
FORCEINLINE void operator + = ( float flAddend ) const
{
m_pContainer - > AddToAttr ( m_nAttributeID , flAddend ) ;
}
FORCEINLINE void operator - = ( float flAddend ) const
{
m_pContainer - > AddToAttr ( m_nAttributeID , - flAddend ) ;
}
FORCEINLINE void operator = ( float flValue ) const
{
m_pContainer - > FillAttr ( m_nAttributeID , flValue ) ;
}
FORCEINLINE void operator = ( CSOAAttributeReference const & other ) const
{
m_pContainer - > CopyAttrFrom ( * other . m_pContainer , m_nAttributeID , other . m_nAttributeID ) ;
}
// these operator overloads let you do
// dst[ATT1] = src1[ATT] + src2[ATT] with no temporaries generated
DEFINE_OP ( + , AddSIMD ) ;
DEFINE_OP ( * , MulSIMD ) ;
DEFINE_OP ( - , SubSIMD ) ;
DEFINE_OP ( / , DivSIMD ) ;
2012-05-21 02:49:35 -05:00
template < BINARYSIMDFUNCTION fn > FORCEINLINE void operator = ( CSOAAttributeReferenceBinaryOp < fn , CSOAAttributeReference > const & op ) ;
2010-07-22 01:46:14 -05:00
FORCEINLINE void CopyTo ( CSOAAttributeReference & other ) const ; // since operator= is over-ridden
} ;
2012-05-21 02:49:35 -05:00
template < BINARYSIMDFUNCTION fn > FORCEINLINE void CSOAAttributeReference : : operator = ( CSOAAttributeReferenceBinaryOp < fn , CSOAAttributeReference > const & op )
2010-07-22 01:46:14 -05:00
{
m_pContainer - > AssertDataType ( m_nAttributeID , ATTRDATATYPE_FLOAT ) ;
fltx4 * pOut = m_pContainer - > RowPtr < fltx4 > ( m_nAttributeID , 0 ) ;
2012-05-21 02:49:35 -05:00
fltx4 * pInA = op . m_opA . m_pContainer - > template RowPtr < fltx4 > ( op . m_opA . m_nAttributeID , 0 ) ;
fltx4 * pInB = op . m_opB . m_pContainer - > template RowPtr < fltx4 > ( op . m_opB . m_nAttributeID , 0 ) ;
2010-07-22 01:46:14 -05:00
size_t nRowToRowStride = m_pContainer - > RowToRowStep ( m_nAttributeID ) / sizeof ( fltx4 ) ;
int nRowCtr = m_pContainer - > NumRows ( ) * m_pContainer - > NumSlices ( ) ;
do
{
int nColCtr = m_pContainer - > NumQuadsPerRow ( ) ;
do
{
* ( pOut + + ) = fn ( * ( pInA + + ) , * ( pInB + + ) ) ;
} while ( - - nColCtr ) ;
pOut + = nRowToRowStride ;
pInA + = nRowToRowStride ;
pInB + = nRowToRowStride ;
} while ( - - nRowCtr ) ;
}
FORCEINLINE void CSOAAttributeReference : : CopyTo ( CSOAAttributeReference & other ) const
{
other . m_pContainer = m_pContainer ;
other . m_nAttributeID = m_nAttributeID ;
}
FORCEINLINE CSOAAttributeReference CSOAContainer : : operator [ ] ( int nAttrIdx )
{
CSOAAttributeReference ret ;
ret . m_pContainer = this ;
ret . m_nAttributeID = nAttrIdx ;
return ret ;
}
FORCEINLINE CSOAAttributeReference CSOAContainer : : Attr ( int nAttrIdx )
{
return ( * this ) [ nAttrIdx ] ;
}
template < BINARYSIMDFUNCTION fn1 , BINARYSIMDFUNCTION fn2 > void CSOAContainer : : ApplyTwoComposedBinaryFunctionsToAttr ( int nDestAttr , fltx4 const & fl4FnArg1 , fltx4 const & fl4FnArg2 )
{
if ( m_nDataType [ nDestAttr ] = = ATTRDATATYPE_4V )
{
FourVectors * pOut = RowPtr < FourVectors > ( nDestAttr , 0 ) ;
size_t nRowToRowStride = RowToRowStep ( nDestAttr ) / sizeof ( FourVectors ) ;
int nRowCtr = NumRows ( ) * NumSlices ( ) ;
do
{
int nColCtr = NumQuadsPerRow ( ) ;
do
{
pOut - > x = fn1 ( fn2 ( pOut - > x , fl4FnArg2 ) , fl4FnArg1 ) ;
pOut - > y = fn1 ( fn2 ( pOut - > y , fl4FnArg2 ) , fl4FnArg1 ) ;
pOut - > z = fn1 ( fn2 ( pOut - > z , fl4FnArg2 ) , fl4FnArg1 ) ;
} while ( - - nColCtr ) ;
pOut + = nRowToRowStride ;
} while ( - - nRowCtr ) ;
}
else
{
AssertDataType ( nDestAttr , ATTRDATATYPE_FLOAT ) ;
fltx4 * pOut = RowPtr < fltx4 > ( nDestAttr , 0 ) ;
size_t nRowToRowStride = RowToRowStep ( nDestAttr ) / sizeof ( fltx4 ) ;
int nRowCtr = NumRows ( ) * NumSlices ( ) ;
do
{
int nColCtr = NumQuadsPerRow ( ) ;
do
{
2012-05-21 02:49:35 -05:00
* ( pOut ) = fn1 ( fn2 ( * pOut , fl4FnArg2 ) , fl4FnArg1 ) ;
pOut + + ;
2010-07-22 01:46:14 -05:00
} while ( - - nColCtr ) ;
pOut + = nRowToRowStride ;
} while ( - - nRowCtr ) ;
}
}
template < BINARYSIMDFUNCTION fn > void CSOAContainer : : ApplyBinaryFunctionToAttr ( int nDestAttr , fltx4 const & fl4FnArg1 )
{
if ( m_nDataType [ nDestAttr ] = = ATTRDATATYPE_4V )
{
FourVectors * pOut = RowPtr < FourVectors > ( nDestAttr , 0 ) ;
size_t nRowToRowStride = RowToRowStep ( nDestAttr ) / sizeof ( FourVectors ) ;
int nRowCtr = NumRows ( ) * NumSlices ( ) ;
do
{
int nColCtr = NumQuadsPerRow ( ) ;
do
{
pOut - > x = fn ( pOut - > x , fl4FnArg1 ) ;
pOut - > y = fn ( pOut - > y , fl4FnArg1 ) ;
pOut - > z = fn ( pOut - > z , fl4FnArg1 ) ;
} while ( - - nColCtr ) ;
pOut + = nRowToRowStride ;
} while ( - - nRowCtr ) ;
}
else
{
AssertDataType ( nDestAttr , ATTRDATATYPE_FLOAT ) ;
fltx4 * pOut = RowPtr < fltx4 > ( nDestAttr , 0 ) ;
size_t nRowToRowStride = RowToRowStep ( nDestAttr ) / sizeof ( fltx4 ) ;
int nRowCtr = NumRows ( ) * NumSlices ( ) ;
do
{
int nColCtr = NumQuadsPerRow ( ) ;
do
{
2012-05-21 02:49:35 -05:00
* ( pOut ) = fn ( * pOut , fl4FnArg1 ) ;
pOut + + ;
2010-07-22 01:46:14 -05:00
} while ( - - nColCtr ) ;
pOut + = nRowToRowStride ;
} while ( - - nRowCtr ) ;
}
}
template < BINARYSIMDFUNCTION fn > float CSOAContainer : : ReduceAttr ( int nSrcAttr , fltx4 const & fl4InitialValue ) const
{
AssertDataType ( nSrcAttr , ATTRDATATYPE_FLOAT ) ;
fltx4 fl4Result = fl4InitialValue ;
fltx4 const * pIn = RowPtr < fltx4 > ( nSrcAttr , 0 ) ;
size_t nRowToRowStride = RowToRowStep ( nSrcAttr ) / sizeof ( fltx4 ) ;
int nRowCtr = NumRows ( ) * NumSlices ( ) ;
fltx4 fl4LastColumnMask = LoadAlignedSIMD ( g_SIMD_SkipTailMask [ NumCols ( ) & 3 ] ) ;
do
{
for ( int i = 0 ; i < NumQuadsPerRow ( ) - 1 ; i + + )
{
fl4Result = fn ( fl4Result , * ( pIn + + ) ) ;
}
// handle the last column in case its not a multiple of 4 wide
fl4Result = MaskedAssign ( fl4LastColumnMask , fn ( fl4Result , * ( pIn + + ) ) , fl4Result ) ;
pIn + = nRowToRowStride ;
} while ( - - nRowCtr ) ;
// now, combine the subfields
fl4Result = fn (
fn ( fl4Result , SplatYSIMD ( fl4Result ) ) ,
fn ( SplatZSIMD ( fl4Result ) , SplatWSIMD ( fl4Result ) ) ) ;
return SubFloat ( fl4Result , 0 ) ;
}
# define QUANTIZER_NJOBS 1 // # of simultaneous subjobs to execute for kmeans quantizer
// kmeans quantization classes
// the array of quantized values returned by quantization
class KMeansQuantizedValue
{
public :
FourVectors m_vecValuePosition ; // replicated
fltx4 m_fl4Values [ MAX_SOA_FIELDS ] ; // replicated
float m_flValueAccumulators [ QUANTIZER_NJOBS ] [ MAX_SOA_FIELDS ] ;
float m_flWeightAccumulators [ QUANTIZER_NJOBS ] ;
FORCEINLINE float operator ( ) ( int n )
{
return SubFloat ( m_fl4Values [ n ] , 0 ) ;
}
} ;
class KMeansSampleDescriptor
{
public :
fltx4 * m_pInputValues [ MAX_SOA_FIELDS ] ;
FORCEINLINE fltx4 const & operator ( ) ( int nField ) const
{
return * m_pInputValues [ nField ] ;
}
} ;
class IKMeansErrorMetric
{
public :
virtual void CalculateError ( KMeansSampleDescriptor const & sampleAddresses ,
FourVectors const & v4SamplePositions ,
KMeansQuantizedValue const & valueToCompareAgainst ,
fltx4 * pfl4ErrOut ) = 0 ;
// for things like normalization, etc
virtual void PostAdjustQuantizedValue ( KMeansQuantizedValue & valueToAdjust )
{
}
// for global fixup after each adjustment step
virtual void PostStep ( int const * pFieldIndices , int nNumFields ,
KMeansQuantizedValue * pValues , int nNumQuantizedValues ,
int nIndexField , CSOAContainer & data )
{
}
} ;
FORCEINLINE CSOAContainer : : CSOAContainer ( void )
{
Init ( ) ;
}
//-----------------------------------------------------------------------------
// Did the container allocate memory for this attribute?
//-----------------------------------------------------------------------------
FORCEINLINE bool CSOAContainer : : HasAllocatedMemory ( int nAttrIdx ) const
{
return ( m_nFieldPresentMask & ( 1 < < nAttrIdx ) ) ! = 0 ;
}
FORCEINLINE EAttributeDataType CSOAContainer : : GetAttributeType ( int nAttrIdx ) const
{
Assert ( ( nAttrIdx > = 0 ) & & ( nAttrIdx < MAX_SOA_FIELDS ) ) ;
return m_nDataType [ nAttrIdx ] ;
}
FORCEINLINE void CSOAContainer : : EnsureDataType ( int nAttrIdx , EAttributeDataType nDataType )
{
if ( ! HasAllocatedMemory ( nAttrIdx ) )
{
SetAttributeType ( nAttrIdx , nDataType ) ;
}
}
FORCEINLINE int CSOAContainer : : NumRows ( void ) const
{
return m_nRows ;
}
FORCEINLINE int CSOAContainer : : NumCols ( void ) const
{
return m_nColumns ;
}
FORCEINLINE int CSOAContainer : : NumSlices ( void ) const
{
return m_nSlices ;
}
FORCEINLINE void CSOAContainer : : AssertDataType ( int nAttrIdx , EAttributeDataType nDataType ) const
{
Assert ( nAttrIdx > = 0 ) ;
Assert ( nAttrIdx < MAX_SOA_FIELDS ) ;
Assert ( m_nDataType [ nAttrIdx ] = = nDataType ) ;
}
// # of groups of 4 elements per row
FORCEINLINE int CSOAContainer : : NumQuadsPerRow ( void ) const
{
return m_nNumQuadsPerRow ;
}
FORCEINLINE int CSOAContainer : : Count ( void ) const // for 1d data
{
return NumCols ( ) ;
}
FORCEINLINE int CSOAContainer : : NumElements ( void ) const
{
return NumCols ( ) * NumRows ( ) * NumSlices ( ) ;
}
// how much to step to go from the end of one row to the start of the next one. Basically, how
// many bytes to add at the end of a row when iterating over the whole 2d array with ++
FORCEINLINE size_t CSOAContainer : : RowToRowStep ( int nAttrIdx ) const
{
return 0 ;
}
template < class T > FORCEINLINE T * CSOAContainer : : RowPtr ( int nAttributeIdx , int nRowNumber , int nSliceNumber ) const
{
Assert ( nRowNumber < m_nRows ) ;
Assert ( nAttributeIdx < MAX_SOA_FIELDS ) ;
Assert ( m_nDataType [ nAttributeIdx ] ! = ATTRDATATYPE_NONE ) ;
Assert ( ( m_nFieldPresentMask & ( 1 < < nAttributeIdx ) ) | | ( ( nRowNumber = = 0 ) & & ( nSliceNumber = = 0 ) ) ) ;
return reinterpret_cast < T * > (
m_pAttributePtrs [ nAttributeIdx ] +
+ nRowNumber * m_nRowStrideInBytes [ nAttributeIdx ]
+ nSliceNumber * m_nSliceStrideInBytes [ nAttributeIdx ] ) ;
}
FORCEINLINE void const * CSOAContainer : : ConstRowPtr ( int nAttributeIdx , int nRowNumber , int nSliceNumber ) const
{
Assert ( nRowNumber < m_nRows ) ;
Assert ( nAttributeIdx < MAX_SOA_FIELDS ) ;
Assert ( m_nDataType [ nAttributeIdx ] ! = ATTRDATATYPE_NONE ) ;
return m_pAttributePtrs [ nAttributeIdx ]
+ nRowNumber * m_nRowStrideInBytes [ nAttributeIdx ]
+ nSliceNumber * m_nSliceStrideInBytes [ nAttributeIdx ] ;
}
template < class T > FORCEINLINE T * CSOAContainer : : ElementPointer ( int nAttributeIdx , int nX , int nY , int nZ ) const
{
Assert ( nAttributeIdx < MAX_SOA_FIELDS ) ;
Assert ( nX < m_nColumns ) ;
Assert ( nY < m_nRows ) ;
Assert ( nZ < m_nSlices ) ;
Assert ( m_nDataType [ nAttributeIdx ] ! = ATTRDATATYPE_NONE ) ;
Assert ( m_nDataType [ nAttributeIdx ] ! = ATTRDATATYPE_4V ) ;
return reinterpret_cast < T * > ( m_pAttributePtrs [ nAttributeIdx ]
+ nX * m_nStrideInBytes [ nAttributeIdx ]
+ nY * m_nRowStrideInBytes [ nAttributeIdx ]
+ nZ * m_nSliceStrideInBytes [ nAttributeIdx ]
) ;
}
FORCEINLINE FourVectors * CSOAContainer : : ElementPointer4V ( int nAttributeIdx , int nX , int nY , int nZ ) const
{
Assert ( nAttributeIdx < MAX_SOA_FIELDS ) ;
Assert ( nX < m_nColumns ) ;
Assert ( nY < m_nRows ) ;
Assert ( nZ < m_nSlices ) ;
Assert ( m_nDataType [ nAttributeIdx ] = = ATTRDATATYPE_4V ) ;
int nXIdx = nX / 4 ;
uint8 * pRet = m_pAttributePtrs [ nAttributeIdx ]
+ nXIdx * 4 * m_nStrideInBytes [ nAttributeIdx ]
+ nY * m_nRowStrideInBytes [ nAttributeIdx ]
+ nZ * m_nSliceStrideInBytes [ nAttributeIdx ] ;
pRet + = 4 * ( nX & 3 ) ;
return reinterpret_cast < FourVectors * > ( pRet ) ;
}
FORCEINLINE size_t CSOAContainer : : ItemByteStride ( int nAttributeIdx ) const
{
Assert ( nAttributeIdx < MAX_SOA_FIELDS ) ;
Assert ( m_nDataType [ nAttributeIdx ] ! = ATTRDATATYPE_NONE ) ;
return m_nStrideInBytes [ nAttributeIdx ] ;
}
// move all the data from one csoacontainer to another, leaving the source empty.
// this is just a pointer copy.
FORCEINLINE void CSOAContainer : : MoveDataFrom ( CSOAContainer other )
{
( * this ) = other ;
other . Init ( ) ;
}
class CFltX4AttributeIterator : public CStridedConstPtr < fltx4 >
{
FORCEINLINE CFltX4AttributeIterator ( CSOAContainer const * pContainer , int nAttribute , int nRowNumber = 0 )
: CStridedConstPtr < fltx4 > ( pContainer - > ConstRowPtr ( nAttribute , nRowNumber ) ,
pContainer - > ItemByteStride ( nAttribute ) )
{
}
} ;
class CFltX4AttributeWriteIterator : public CStridedPtr < fltx4 >
{
FORCEINLINE CFltX4AttributeWriteIterator ( CSOAContainer const * pContainer , int nAttribute , int nRowNumber = 0 )
: CStridedPtr < fltx4 > ( pContainer - > RowPtr < uint8 > ( nAttribute , nRowNumber ) ,
pContainer - > ItemByteStride ( nAttribute ) )
{
}
} ;
FORCEINLINE FourVectors CompressSIMD ( FourVectors const & a , FourVectors const & b )
{
FourVectors ret ;
ret . x = CompressSIMD ( a . x , b . x ) ;
ret . y = CompressSIMD ( a . y , b . y ) ;
ret . z = CompressSIMD ( a . z , b . z ) ;
return ret ;
}
FORCEINLINE FourVectors Compress4SIMD ( FourVectors const & a , FourVectors const & b ,
FourVectors const & c , FourVectors const & d )
{
FourVectors ret ;
ret . x = Compress4SIMD ( a . x , b . x , c . x , d . x ) ;
ret . y = Compress4SIMD ( a . y , b . y , c . y , d . y ) ;
ret . z = Compress4SIMD ( a . z , b . z , c . z , d . z ) ;
return ret ;
}
# endif