mirror of
https://github.com/alliedmodders/hl2sdk.git
synced 2025-01-05 17:13:36 +08:00
898 lines
30 KiB
C++
898 lines
30 KiB
C++
//====== Copyright © 1996-2007, Valve Corporation, All rights reserved. =======//
|
|
//
|
|
// Purpose:
|
|
//
|
|
// $NoKeywords: $
|
|
//
|
|
// A Fixed-allocation class for maintaining a 1d or 2d or 3d array of data in a structure-of-arrays
|
|
// (SOA) sse-friendly manner.
|
|
// =============================================================================//
|
|
|
|
#ifndef UTLSOACONTAINER_H
|
|
#define UTLSOACONTAINER_H
|
|
|
|
#ifdef _WIN32
|
|
#pragma once
|
|
#endif
|
|
|
|
|
|
#include "tier0/platform.h"
|
|
#include "tier0/dbg.h"
|
|
#include "tier0/threadtools.h"
|
|
#include "tier1/utlmemory.h"
|
|
#include "tier1/utlblockmemory.h"
|
|
#include "mathlib/ssemath.h"
|
|
|
|
|
|
|
|
|
|
// strided pointers. gives you a class that acts like a pointer, but the ++ and += operators do the
|
|
// right thing
|
|
template<class T> class CStridedPtr
|
|
{
|
|
protected:
|
|
T *m_pData;
|
|
size_t m_nStride;
|
|
|
|
public:
|
|
FORCEINLINE CStridedPtr<T>( void *pData, size_t nByteStride )
|
|
{
|
|
m_pData = reinterpret_cast<T *>( pData );
|
|
m_nStride = nByteStride / sizeof( T );
|
|
}
|
|
|
|
FORCEINLINE CStridedPtr<T>( void ) {}
|
|
T *operator->(void) const
|
|
{
|
|
return m_pData;
|
|
}
|
|
|
|
T & operator*(void) const
|
|
{
|
|
return *m_pData;
|
|
}
|
|
|
|
FORCEINLINE operator T *(void)
|
|
{
|
|
return m_pData;
|
|
}
|
|
|
|
FORCEINLINE CStridedPtr<T> & operator++(void)
|
|
{
|
|
m_pData += m_nStride;
|
|
return *this;
|
|
}
|
|
|
|
FORCEINLINE void operator+=( size_t nNumElements )
|
|
{
|
|
m_pData += nNumElements * m_nStride;
|
|
}
|
|
|
|
FORCEINLINE size_t Stride( void ) const
|
|
{
|
|
return m_nStride;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
template<class T> class CStridedConstPtr
|
|
{
|
|
protected:
|
|
const T *m_pData;
|
|
size_t m_nStride;
|
|
|
|
public:
|
|
FORCEINLINE CStridedConstPtr<T>( void const *pData, size_t nByteStride )
|
|
{
|
|
m_pData = reinterpret_cast<T const *>( pData );
|
|
m_nStride = nByteStride / sizeof( T );
|
|
}
|
|
|
|
FORCEINLINE CStridedConstPtr<T>( void ) {}
|
|
|
|
const T *operator->(void) const
|
|
{
|
|
return m_pData;
|
|
}
|
|
|
|
const T & operator*(void) const
|
|
{
|
|
return *m_pData;
|
|
}
|
|
|
|
FORCEINLINE operator const T *(void) const
|
|
{
|
|
return m_pData;
|
|
}
|
|
|
|
FORCEINLINE CStridedConstPtr<T> &operator++(void)
|
|
{
|
|
m_pData += m_nStride;
|
|
return *this;
|
|
}
|
|
FORCEINLINE void operator+=( size_t nNumElements )
|
|
{
|
|
m_pData += nNumElements*m_nStride;
|
|
}
|
|
FORCEINLINE size_t Stride( void ) const
|
|
{
|
|
return m_nStride;
|
|
|
|
}
|
|
};
|
|
|
|
// allowed field data types. if you change these values, you need to change the tables in the .cpp file
|
|
enum EAttributeDataType
|
|
{
|
|
ATTRDATATYPE_NONE = -1, // pad and varargs ender
|
|
ATTRDATATYPE_FLOAT = 0, // a float attribute
|
|
ATTRDATATYPE_4V, // vector data type, stored as class FourVectors
|
|
ATTRDATATYPE_INT, // integer. not especially sse-able on all architectures.
|
|
ATTRDATATYPE_POINTER, // a pointer.
|
|
|
|
ATTRDATATYPE_COUNT,
|
|
};
|
|
|
|
#define MAX_SOA_FIELDS 32
|
|
|
|
class KMeansQuantizedValue;
|
|
class IKMeansErrorMetric;
|
|
|
|
typedef fltx4 (*UNARYSIMDFUNCTION)( fltx4 const & );
|
|
typedef fltx4 (*BINARYSIMDFUNCTION)( fltx4 const &, fltx4 const & );
|
|
|
|
class CSOAAttributeReference;
|
|
|
|
|
|
|
|
/// mode of threading for a container. Normalyy automatically set based upon dimensions, but
|
|
/// controllable via SetThreadMode.
|
|
enum SOAThreadMode_t
|
|
{
|
|
SOATHREADMODE_NONE = 0,
|
|
SOATHREADMODE_BYROWS = 1,
|
|
SOATHREADMODE_BYSLICES = 2,
|
|
SOATHREADMODE_BYROWS_AND_SLICES = 3,
|
|
|
|
SOATHREADMODE_AUTO = -1, // compute based upon dimensions
|
|
};
|
|
|
|
|
|
class CSOAContainer
|
|
{
|
|
friend class CSOAAttributeReference;
|
|
|
|
public:
|
|
// Constructor, destructor
|
|
CSOAContainer( void ); // an empty one with no attributes
|
|
CSOAContainer( int nCols, int nRows, int nSlices, ... );
|
|
~CSOAContainer( void );
|
|
|
|
// !!!!! UPDATE SERIALIZATION CODE WHENEVER THE STRUCTURE OF CSOAContainer CHANGES !!!!!
|
|
// To avoid dependency on datamodel, serialization is implemented in utlsoacontainer_serialization.cpp, in dmxloader.lib
|
|
//bool Serialize( CDmxElement *pRootElement );
|
|
//bool Unserialize( const CDmxElement *pRootElement );
|
|
|
|
|
|
// Set the data type for an attribute. If you set the data type, but tell it not to allocate,
|
|
// the data type will be set but writes will assert, and reads will give you back zeros. if
|
|
// AllocateData hasn't been called yet, this will set up for AllocateData to reserve space for
|
|
// this attribute. If you have already called AllocateData, but wish to add an attribute, you
|
|
// can also use this, which will result in separate memory being allocated for this attribute.
|
|
void SetAttributeType( int nAttrIdx, EAttributeDataType nDataType, bool bAllocateMemory = true );
|
|
EAttributeDataType GetAttributeType( int nAttrIdx ) const;
|
|
|
|
|
|
// Set the attribute type for a field, if that field is not already present (potentially
|
|
// allocating memory). You can use this, for instance, to make sure an already loaded image has
|
|
// an alpha channel.
|
|
void EnsureDataType( int nAttrIdx, EAttributeDataType nDataType );
|
|
|
|
// set back to un-initted state, freeing memory
|
|
void Purge( void );
|
|
|
|
// Allocate, purge data
|
|
void AllocateData( int nNCols, int nNRows, int nSlices = 1 ); // actually allocate the memory and set the pointers up
|
|
void PurgeData( void );
|
|
|
|
// Did the container allocate memory for this attribute?
|
|
bool HasAllocatedMemory( int nAttrIdx ) const;
|
|
|
|
// easy constructor for 2d using varargs. call like
|
|
// #define ATTR_RED 0
|
|
// #define ATTR_GREEN 1
|
|
// #define ATTR_BLUE 2
|
|
// CSOAContainer myimage( 256, 256, ATTR_RED, ATTRDATATYPE_FLOAT, ATTR_GREEN, ATTRDATATYPE_FLOAT,
|
|
// ATTR_BLUE, ATTRDATATYPE_FLOAT, -1 );
|
|
|
|
int NumCols( void ) const;
|
|
int NumRows( void ) const;
|
|
int NumSlices( void ) const;
|
|
void AssertDataType( int nAttrIdx, EAttributeDataType nDataType ) const;
|
|
|
|
// # of groups of 4 elements per row
|
|
int NumQuadsPerRow( void ) const;
|
|
int Count( void ) const; // for 1d data
|
|
int NumElements( void ) const;
|
|
|
|
// how much to step to go from the end of one row to the start of the next one. Basically, how
|
|
// many bytes to add at the end of a row when iterating over the whole 2d array with ++
|
|
size_t RowToRowStep( int nAttrIdx ) const;
|
|
template<class T> T *RowPtr( int nAttributeIdx, int nRowNumber, int nSliceNumber = 0 ) const;
|
|
void const *ConstRowPtr( int nAttributeIdx, int nRowNumber, int nSliceNumber = 0 ) const;
|
|
template<class T> T *ElementPointer( int nAttributeIdx, int nX = 0, int nY = 0, int nZ = 0 ) const;
|
|
FourVectors *ElementPointer4V( int nAttributeIdx, int nX = 0, int nY = 0, int nZ = 0 ) const;
|
|
size_t ItemByteStride( int nAttributeIdx ) const;
|
|
|
|
FORCEINLINE float &FloatValue( int nAttrIdx, int nX, int nY, int nZ ) const
|
|
{
|
|
AssertDataType( nAttrIdx, ATTRDATATYPE_FLOAT );
|
|
return RowPtr<float>( nAttrIdx, nY, nZ )[nX];
|
|
}
|
|
|
|
// return a reference to an attribute, which can have operations performed on it. For instance,
|
|
// this is valid code to zero out the red component of a whole image:
|
|
// myImage[FBM_ATTR_RED] = 0.;
|
|
CSOAAttributeReference operator[]( int nAttrIdx );
|
|
|
|
// this is just an alias for readbaility w/ ptrs. instead of (*p)[FBM_ATTR_RED], you can do p->Attr( FBM_ATTR_RED );
|
|
FORCEINLINE CSOAAttributeReference Attr( int nAttrIdx );
|
|
|
|
// copy the attribute data from another soacontainer. must be compatible geometry.
|
|
void CopyAttrFrom( CSOAContainer const &other, int nDestAttributeIdx, int nSrcAttributeIndex = -1 );
|
|
|
|
// copy the attribute data from another attribute. must be compatible data format
|
|
void CopyAttrToAttr( int nSrcAttributeIndex, int nDestAttributeIndex);
|
|
|
|
// copy a subvolume of attribute data from one container to another.
|
|
void CopyRegionFrom( CSOAContainer const &src, int nSrcAttr, int nDestAttr,
|
|
int nSrcMinX, int nSrcMaxX, int nSrcMinY, int nSrcMaxY, int nSrcMinZ, int nSrcMaxZ,
|
|
int nDestX, int nDestY, int nDestZ );
|
|
|
|
// copy all fields from a region of src to this.
|
|
void CopyRegionFrom( CSOAContainer const &src,
|
|
int nSrcMinX, int nSrcMaxX, int nSrcMinY, int nSrcMaxY, int nSrcMinZ, int nSrcMaxZ,
|
|
int nDestX, int nDestY, int nDestZ );
|
|
|
|
// move all the data from one csoacontainer to another, leaving the source empty. this is just
|
|
// a pointer copy.
|
|
FORCEINLINE void MoveDataFrom( CSOAContainer other );
|
|
|
|
// arithmetic and data filling functions. All SIMD and hopefully fast
|
|
|
|
/// set all elements of a float attribute to random #s
|
|
void RandomizeAttribute( int nAttr, float flMin, float flMax ) const;
|
|
|
|
/// this.attr = vec
|
|
void FillAttr( int nAttr, Vector const &vecValue );
|
|
|
|
/// this.attr = float
|
|
void FillAttr( int nAttr, float flValue );
|
|
|
|
/// this.nDestAttr *= src.nSrcAttr
|
|
void MulAttr( CSOAContainer const &src, int nSrcAttr, int nDestAttr );
|
|
|
|
/// Returns the result of repeatedly combining attr values with the initial value using the specified function.
|
|
/// For instance, SumAttributeValue is just ReduceAttr<AddSIMD>( attr, FOUR_ZEROS );
|
|
template<BINARYSIMDFUNCTION fn> float ReduceAttr( int nSrcAttr, fltx4 const &fl4InitialValue ) const;
|
|
|
|
template<BINARYSIMDFUNCTION fn> void ApplyBinaryFunctionToAttr( int nDestAttr, fltx4 const &flFnArg1 );
|
|
|
|
/// this.attr = fn1( fn2( attr, arg2 ), arg1 )
|
|
template<BINARYSIMDFUNCTION fn1, BINARYSIMDFUNCTION fn2> void ApplyTwoComposedBinaryFunctionsToAttr( int nDestAttr, fltx4 const &flFnArg1, fltx4 const &flFnArg2 );
|
|
|
|
/// this.nDestAttr *= flValue
|
|
void MulAttr( int nDestAttr, float flScale )
|
|
{
|
|
ApplyBinaryFunctionToAttr<MulSIMD>( nDestAttr, ReplicateX4( flScale ) );
|
|
}
|
|
|
|
void AddToAttr( int nDestAttr, float flAddend )
|
|
{
|
|
ApplyBinaryFunctionToAttr<AddSIMD>( nDestAttr, ReplicateX4( flAddend ) );
|
|
}
|
|
|
|
// this.attr = max( this.attr, flminvalue )
|
|
void MaxAttr( int nDestAttr, float flMinValue )
|
|
{
|
|
ApplyBinaryFunctionToAttr<MaxSIMD>( nDestAttr, ReplicateX4( flMinValue ) );
|
|
}
|
|
|
|
/// this.attr = min( this.attr, flminvalue )
|
|
void MinAttr( int nDestAttr, float flMaxValue )
|
|
{
|
|
ApplyBinaryFunctionToAttr<MinSIMD>( nDestAttr, ReplicateX4( flMaxValue ) );
|
|
}
|
|
|
|
void ClampAttr( int nDestAttr, float flMinValue, float flMaxValue )
|
|
{
|
|
ApplyTwoComposedBinaryFunctionsToAttr<MinSIMD, MaxSIMD>( nDestAttr, ReplicateX4( flMaxValue ), ReplicateX4( flMinValue ) );
|
|
}
|
|
|
|
/// this.attr = normalize( this.attr )
|
|
void NormalizeAttr( int nAttr );
|
|
|
|
/// fill 2d a rectangle with values interpolated from 4 corner values.
|
|
void FillAttrWithInterpolatedValues( int nAttr, float flValue00, float flValue10, float flValue01, float flValue11 ) const;
|
|
void FillAttrWithInterpolatedValues( int nAttr, Vector flValue00, Vector flValue10,
|
|
Vector const &flValue01, Vector const &flValue11 ) const;
|
|
|
|
/// grab 3 scalar attributes from one csoaa and fill in a fourvector attr in.
|
|
void PackScalarAttributesToVectorAttribute( CSOAContainer *pInput,
|
|
int nVecAttributeOut,
|
|
int nScalarAttributeX,
|
|
int nScalarAttributeY,
|
|
int nScalarAttributeZ );
|
|
|
|
/// grab the 3 components of a vector attribute and store in 3 scalar attributes.
|
|
void UnPackVectorAttributeToScalarAttributes( CSOAContainer *pInput,
|
|
int nVecAttributeIn,
|
|
int nScalarAttributeX,
|
|
int nScalarAttributeY,
|
|
int nScalarAttributeZ );
|
|
|
|
/// this.attrout = src.attrin * vec (component by component )
|
|
void MultiplyVectorAttribute( CSOAContainer *pInput, int nAttributeIn, Vector const &vecScalar, int nAttributeOut );
|
|
|
|
/// Given an soa container of a different dimension, resize one attribute from it to fit this
|
|
/// table's geometry. point sampling only
|
|
void ResampleAttribute( CSOAContainer &pInput, int nAttr );
|
|
|
|
/// sum of all floats in an attribute
|
|
float SumAttributeValue( int nAttr ) const;
|
|
|
|
/// sum(attr) / ( w * h * d )
|
|
float AverageFloatAttributeValue( int nAttr ) const;
|
|
|
|
/// maximum float value in a float attr
|
|
float MaxAttributeValue( int nAttr ) const;
|
|
|
|
/// minimum float value in a float attr
|
|
float MinAttributeValue( int nAttr ) const;
|
|
|
|
|
|
/// scalartargetattribute += w*exp( vecdir dot ndirection)
|
|
void AddGaussianSRBF( float flWeight, Vector vecDir, int nDirectionAttribute, int nScalarTargetAttribute );
|
|
|
|
/// vec3targetattribute += w*exp( vecdir dot ndirection)
|
|
void AddGaussianSRBF( Vector vecWeight, Vector vecDir, int nDirectionAttribute,
|
|
int nVectorTargetAttribute );
|
|
|
|
|
|
/// find the largest value of a vector attribute
|
|
void FindLargestMagnitudeVector( int nAttr, int *nx, int *ny, int *nz );
|
|
|
|
void KMeansQuantization( int const *pFieldIndices, int nNumFields,
|
|
KMeansQuantizedValue *pOutValues,
|
|
int nNumResultsDesired, IKMeansErrorMetric *pErrorCalculator,
|
|
int nFieldToStoreIndexInto, int nNumIterations,
|
|
int nChannelToReceiveErrorSignal = -1 );
|
|
|
|
// Calculate the signed distance, in voxels, between all voxels and a surface boundary defined
|
|
// by nSrcField being >0. Voxels with nSrcField <0 will end up with negative distances. Voxels
|
|
// with nSrcField == 0 will get 0, and nSrcField >0 will yield positive distances. Note the
|
|
// min/max x/y/z fields don't reflect the range to be written, but rather represent the bounds
|
|
// of updated voxels that you want your distance field modified to take into account. This
|
|
// volume will be bloated based upon the nMaxDistance parameter and simd padding. A
|
|
// brute-force algorithm is used, but it is threaded and simd'd. Large "nMaxDistance" values
|
|
// applied to large images can take a long time, as the execution time per output pixel is
|
|
// proportional to maxdistance^2. The rect argument, if passed, will be modified to be the
|
|
// entire rectangle modified by the operation.
|
|
void GenerateDistanceField( int nSrcField, int nDestField,
|
|
int nMaxDistance,
|
|
Rect3D_t *pRect = NULL );
|
|
|
|
void SetThreadMode( SOAThreadMode_t eThreadMode );
|
|
|
|
protected:
|
|
int m_nColumns; // # of rows and columns created with
|
|
int m_nRows;
|
|
int m_nSlices;
|
|
int m_nPaddedColumns; // # of columns rounded up for sse
|
|
int m_nNumQuadsPerRow; // # of groups of 4 elements per row
|
|
uint8 *m_pDataMemory; // the actual data memory
|
|
uint8 *m_pAttributePtrs[MAX_SOA_FIELDS];
|
|
EAttributeDataType m_nDataType[MAX_SOA_FIELDS];
|
|
size_t m_nStrideInBytes[MAX_SOA_FIELDS]; // stride from one field datum to another
|
|
size_t m_nRowStrideInBytes[MAX_SOA_FIELDS]; // stride from one row datum to another per field
|
|
size_t m_nSliceStrideInBytes[MAX_SOA_FIELDS]; // stride from one slice datum to another per field
|
|
uint32 m_nFieldPresentMask;
|
|
uint8 *m_pConstantDataMemory;
|
|
uint8 *m_pSeparateDataMemory[MAX_SOA_FIELDS]; // for fields allocated separately from the main allocation
|
|
SOAThreadMode_t m_eThreadMode; // set thread mode
|
|
|
|
FORCEINLINE void Init( void )
|
|
{
|
|
memset( m_nDataType, 0xff, sizeof( m_nDataType ) );
|
|
memset( m_pSeparateDataMemory, 0, sizeof( m_pSeparateDataMemory ) );
|
|
|
|
#ifdef _DEBUG
|
|
memset( m_pAttributePtrs, 0xFF, sizeof( m_pAttributePtrs ) );
|
|
memset( m_nStrideInBytes, 0xFF, sizeof( m_nStrideInBytes ) );
|
|
memset( m_nRowStrideInBytes, 0xFF, sizeof( m_nRowStrideInBytes ) );
|
|
memset( m_nSliceStrideInBytes, 0xFF, sizeof( m_nSliceStrideInBytes ) );
|
|
#endif
|
|
|
|
m_pConstantDataMemory = NULL;
|
|
m_pDataMemory = 0;
|
|
m_nNumQuadsPerRow = 0;
|
|
m_nColumns = m_nPaddedColumns = m_nRows = m_nSlices = 0;
|
|
m_nFieldPresentMask = 0;
|
|
m_eThreadMode = SOATHREADMODE_NONE;
|
|
}
|
|
|
|
void UpdateDistanceRow( int nSearchRadius, int nMinX, int nMaxX, int nY, int nZ,
|
|
int nSrcField, int nDestField );
|
|
|
|
// parallel helper functions. These do the work, and all take a row/column range as their first arguments.
|
|
void CopyAttrFromPartial( int nStartRow, int nNumRows, int nStartSlice, int nEndSlice, CSOAContainer const *pOther, int nDestAttributeIndex, int nSrcAttributeIndex );
|
|
void FillAttrPartial( int nStartRow, int nNumRows, int nStartSlice, int nEndSlice, int nAttr, fltx4 fl4Value );
|
|
|
|
// Allocation utility funcs (NOTE: all allocs are multiples of 16, and are aligned allocs)
|
|
size_t DataMemorySize( void ) const; // total bytes of data memory to allocate at m_pDataMemory (if all attributes were allocated in a single block)
|
|
size_t ConstantMemorySize( void ) const; // total bytes of constant memory to allocate at m_pConstantDataMemory (if all constant attributes were allocated in a single block)
|
|
size_t AttributeMemorySize( int nAttrIndex ) const; // total bytes of data memory allocated to a single attribute (constant or otherwise)
|
|
void AllocateDataMemory( void );
|
|
void AllocateConstantMemory( void );
|
|
};
|
|
|
|
class CSOAAttributeReference;
|
|
|
|
// define binary op class to allow this construct without temps:
|
|
// dest( FBM_ATTR_RED ) = src( FBM_ATTR_BLUE ) + src( FBM_ATTR_GREEN )
|
|
template<BINARYSIMDFUNCTION fn, class Ref> class CSOAAttributeReferenceBinaryOp
|
|
{
|
|
public:
|
|
Ref m_opA;
|
|
Ref m_opB;
|
|
|
|
CSOAAttributeReferenceBinaryOp( Ref const &a, Ref const & b )
|
|
{
|
|
a.CopyTo( m_opA );
|
|
b.CopyTo( m_opB );
|
|
}
|
|
|
|
};
|
|
|
|
#define DEFINE_OP( opname, fnname ) \
|
|
FORCEINLINE CSOAAttributeReferenceBinaryOp<fnname, CSOAAttributeReference> operator opname( CSOAAttributeReference const &other ) const \
|
|
{ \
|
|
return CSOAAttributeReferenceBinaryOp<fnname, CSOAAttributeReference>( *this, other ); \
|
|
}
|
|
|
|
class CSOAAttributeReference
|
|
{
|
|
friend class CSOAContainer;
|
|
|
|
class CSOAContainer *m_pContainer;
|
|
int m_nAttributeID;
|
|
|
|
public:
|
|
FORCEINLINE void operator *=( float flScale ) const
|
|
{
|
|
m_pContainer->MulAttr( m_nAttributeID, flScale );
|
|
}
|
|
FORCEINLINE void operator +=( float flAddend ) const
|
|
{
|
|
m_pContainer->AddToAttr( m_nAttributeID, flAddend );
|
|
}
|
|
FORCEINLINE void operator -=( float flAddend ) const
|
|
{
|
|
m_pContainer->AddToAttr( m_nAttributeID, -flAddend );
|
|
}
|
|
FORCEINLINE void operator =( float flValue ) const
|
|
{
|
|
m_pContainer->FillAttr( m_nAttributeID, flValue );
|
|
}
|
|
|
|
FORCEINLINE void operator =( CSOAAttributeReference const &other ) const
|
|
{
|
|
m_pContainer->CopyAttrFrom( *other.m_pContainer, m_nAttributeID, other.m_nAttributeID );
|
|
}
|
|
|
|
// these operator overloads let you do
|
|
// dst[ATT1] = src1[ATT] + src2[ATT] with no temporaries generated
|
|
DEFINE_OP( +, AddSIMD );
|
|
DEFINE_OP( *, MulSIMD );
|
|
DEFINE_OP( -, SubSIMD );
|
|
DEFINE_OP( /, DivSIMD );
|
|
|
|
template<BINARYSIMDFUNCTION fn> FORCEINLINE void operator =( CSOAAttributeReferenceBinaryOp<fn, CSOAAttributeReference> const &op );
|
|
|
|
FORCEINLINE void CopyTo( CSOAAttributeReference &other ) const; // since operator= is over-ridden
|
|
};
|
|
|
|
|
|
template<BINARYSIMDFUNCTION fn> FORCEINLINE void CSOAAttributeReference::operator =( CSOAAttributeReferenceBinaryOp<fn, CSOAAttributeReference> const &op )
|
|
{
|
|
m_pContainer->AssertDataType( m_nAttributeID, ATTRDATATYPE_FLOAT );
|
|
fltx4 *pOut = m_pContainer->RowPtr<fltx4>( m_nAttributeID, 0 );
|
|
fltx4 *pInA = op.m_opA.m_pContainer->template RowPtr<fltx4>( op.m_opA.m_nAttributeID, 0 );
|
|
fltx4 *pInB = op.m_opB.m_pContainer->template RowPtr<fltx4>( op.m_opB.m_nAttributeID, 0 );
|
|
size_t nRowToRowStride = m_pContainer->RowToRowStep( m_nAttributeID ) / sizeof( fltx4 );
|
|
int nRowCtr = m_pContainer->NumRows() * m_pContainer->NumSlices();
|
|
do
|
|
{
|
|
int nColCtr = m_pContainer->NumQuadsPerRow();
|
|
do
|
|
{
|
|
*(pOut++) = fn( *( pInA++ ), *( pInB++ ) );
|
|
} while ( --nColCtr );
|
|
pOut += nRowToRowStride;
|
|
pInA += nRowToRowStride;
|
|
pInB += nRowToRowStride;
|
|
} while ( --nRowCtr );
|
|
}
|
|
|
|
FORCEINLINE void CSOAAttributeReference::CopyTo( CSOAAttributeReference &other ) const
|
|
{
|
|
other.m_pContainer = m_pContainer;
|
|
other.m_nAttributeID = m_nAttributeID;
|
|
}
|
|
|
|
|
|
|
|
FORCEINLINE CSOAAttributeReference CSOAContainer::operator[]( int nAttrIdx )
|
|
{
|
|
CSOAAttributeReference ret;
|
|
ret.m_pContainer = this;
|
|
ret.m_nAttributeID = nAttrIdx;
|
|
return ret;
|
|
}
|
|
|
|
FORCEINLINE CSOAAttributeReference CSOAContainer::Attr( int nAttrIdx )
|
|
{
|
|
return (*this)[nAttrIdx];
|
|
}
|
|
|
|
template<BINARYSIMDFUNCTION fn1, BINARYSIMDFUNCTION fn2> void CSOAContainer::ApplyTwoComposedBinaryFunctionsToAttr( int nDestAttr, fltx4 const &fl4FnArg1, fltx4 const &fl4FnArg2 )
|
|
{
|
|
if ( m_nDataType[nDestAttr] == ATTRDATATYPE_4V )
|
|
{
|
|
FourVectors *pOut = RowPtr<FourVectors>( nDestAttr, 0 );
|
|
size_t nRowToRowStride = RowToRowStep( nDestAttr ) / sizeof( FourVectors );
|
|
int nRowCtr = NumRows() * NumSlices();
|
|
do
|
|
{
|
|
int nColCtr = NumQuadsPerRow();
|
|
do
|
|
{
|
|
pOut->x = fn1( fn2( pOut->x, fl4FnArg2 ), fl4FnArg1 );
|
|
pOut->y = fn1( fn2( pOut->y, fl4FnArg2 ), fl4FnArg1 );
|
|
pOut->z = fn1( fn2( pOut->z, fl4FnArg2 ), fl4FnArg1 );
|
|
} while ( --nColCtr );
|
|
pOut += nRowToRowStride;
|
|
} while ( --nRowCtr );
|
|
}
|
|
else
|
|
{
|
|
AssertDataType( nDestAttr, ATTRDATATYPE_FLOAT );
|
|
fltx4 *pOut = RowPtr<fltx4>( nDestAttr, 0 );
|
|
size_t nRowToRowStride = RowToRowStep( nDestAttr ) / sizeof( fltx4 );
|
|
int nRowCtr = NumRows() * NumSlices();
|
|
do
|
|
{
|
|
int nColCtr = NumQuadsPerRow();
|
|
do
|
|
{
|
|
*( pOut ) = fn1( fn2( *pOut, fl4FnArg2 ), fl4FnArg1 );
|
|
pOut++;
|
|
} while ( --nColCtr );
|
|
pOut += nRowToRowStride;
|
|
} while ( --nRowCtr );
|
|
}
|
|
}
|
|
|
|
template<BINARYSIMDFUNCTION fn> void CSOAContainer::ApplyBinaryFunctionToAttr( int nDestAttr, fltx4 const &fl4FnArg1 )
|
|
{
|
|
if ( m_nDataType[nDestAttr] == ATTRDATATYPE_4V )
|
|
{
|
|
FourVectors *pOut = RowPtr<FourVectors>( nDestAttr, 0 );
|
|
size_t nRowToRowStride = RowToRowStep( nDestAttr ) / sizeof( FourVectors );
|
|
int nRowCtr = NumRows() * NumSlices();
|
|
do
|
|
{
|
|
int nColCtr = NumQuadsPerRow();
|
|
do
|
|
{
|
|
pOut->x = fn( pOut->x, fl4FnArg1 );
|
|
pOut->y = fn( pOut->y, fl4FnArg1 );
|
|
pOut->z = fn( pOut->z, fl4FnArg1 );
|
|
} while ( --nColCtr );
|
|
pOut += nRowToRowStride;
|
|
} while ( --nRowCtr );
|
|
}
|
|
else
|
|
{
|
|
AssertDataType( nDestAttr, ATTRDATATYPE_FLOAT );
|
|
fltx4 *pOut = RowPtr<fltx4>( nDestAttr, 0 );
|
|
size_t nRowToRowStride = RowToRowStep( nDestAttr ) / sizeof( fltx4 );
|
|
int nRowCtr = NumRows() * NumSlices();
|
|
do
|
|
{
|
|
int nColCtr = NumQuadsPerRow();
|
|
do
|
|
{
|
|
*(pOut) = fn( *pOut, fl4FnArg1 );
|
|
pOut++;
|
|
} while ( --nColCtr );
|
|
pOut += nRowToRowStride;
|
|
} while ( --nRowCtr );
|
|
}
|
|
}
|
|
|
|
template<BINARYSIMDFUNCTION fn> float CSOAContainer::ReduceAttr( int nSrcAttr, fltx4 const &fl4InitialValue ) const
|
|
{
|
|
AssertDataType( nSrcAttr, ATTRDATATYPE_FLOAT );
|
|
fltx4 fl4Result = fl4InitialValue;
|
|
fltx4 const *pIn = RowPtr<fltx4>( nSrcAttr, 0 );
|
|
size_t nRowToRowStride = RowToRowStep( nSrcAttr ) / sizeof( fltx4 );
|
|
int nRowCtr = NumRows() * NumSlices();
|
|
fltx4 fl4LastColumnMask = LoadAlignedSIMD( g_SIMD_SkipTailMask[NumCols() & 3 ] );
|
|
do
|
|
{
|
|
for( int i = 0; i < NumQuadsPerRow() - 1; i++ )
|
|
{
|
|
fl4Result = fn( fl4Result, *( pIn++ ) );
|
|
}
|
|
// handle the last column in case its not a multiple of 4 wide
|
|
fl4Result = MaskedAssign( fl4LastColumnMask, fn( fl4Result, *( pIn++ ) ), fl4Result );
|
|
pIn += nRowToRowStride;
|
|
} while ( --nRowCtr );
|
|
// now, combine the subfields
|
|
fl4Result = fn(
|
|
fn( fl4Result, SplatYSIMD( fl4Result ) ),
|
|
fn( SplatZSIMD( fl4Result ), SplatWSIMD( fl4Result ) ) );
|
|
return SubFloat( fl4Result, 0 );
|
|
}
|
|
|
|
|
|
|
|
#define QUANTIZER_NJOBS 1 // # of simultaneous subjobs to execute for kmeans quantizer
|
|
|
|
// kmeans quantization classes
|
|
// the array of quantized values returned by quantization
|
|
class KMeansQuantizedValue
|
|
{
|
|
public:
|
|
FourVectors m_vecValuePosition; // replicated
|
|
fltx4 m_fl4Values[MAX_SOA_FIELDS]; // replicated
|
|
|
|
float m_flValueAccumulators[QUANTIZER_NJOBS][MAX_SOA_FIELDS];
|
|
float m_flWeightAccumulators[QUANTIZER_NJOBS];
|
|
|
|
FORCEINLINE float operator()( int n )
|
|
{
|
|
return SubFloat( m_fl4Values[n], 0 );
|
|
}
|
|
|
|
};
|
|
|
|
class KMeansSampleDescriptor
|
|
{
|
|
public:
|
|
fltx4 *m_pInputValues[MAX_SOA_FIELDS];
|
|
|
|
FORCEINLINE fltx4 const & operator()( int nField ) const
|
|
{
|
|
return *m_pInputValues[nField];
|
|
}
|
|
|
|
};
|
|
|
|
class IKMeansErrorMetric
|
|
{
|
|
public:
|
|
virtual void CalculateError( KMeansSampleDescriptor const &sampleAddresses,
|
|
FourVectors const &v4SamplePositions,
|
|
KMeansQuantizedValue const &valueToCompareAgainst,
|
|
fltx4 *pfl4ErrOut ) =0;
|
|
|
|
// for things like normalization, etc
|
|
virtual void PostAdjustQuantizedValue( KMeansQuantizedValue &valueToAdjust )
|
|
{
|
|
}
|
|
|
|
// for global fixup after each adjustment step
|
|
virtual void PostStep( int const *pFieldIndices, int nNumFields,
|
|
KMeansQuantizedValue *pValues, int nNumQuantizedValues,
|
|
int nIndexField, CSOAContainer &data )
|
|
{
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
FORCEINLINE CSOAContainer::CSOAContainer( void )
|
|
{
|
|
Init();
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Did the container allocate memory for this attribute?
|
|
//-----------------------------------------------------------------------------
|
|
FORCEINLINE bool CSOAContainer::HasAllocatedMemory( int nAttrIdx ) const
|
|
{
|
|
return ( m_nFieldPresentMask & ( 1 << nAttrIdx ) ) != 0;
|
|
}
|
|
|
|
|
|
|
|
FORCEINLINE EAttributeDataType CSOAContainer::GetAttributeType( int nAttrIdx ) const
|
|
{
|
|
Assert( ( nAttrIdx >= 0 ) && ( nAttrIdx < MAX_SOA_FIELDS ) );
|
|
return m_nDataType[nAttrIdx];
|
|
}
|
|
|
|
FORCEINLINE void CSOAContainer::EnsureDataType( int nAttrIdx, EAttributeDataType nDataType )
|
|
{
|
|
if ( !HasAllocatedMemory( nAttrIdx ) )
|
|
{
|
|
SetAttributeType( nAttrIdx, nDataType );
|
|
}
|
|
}
|
|
|
|
FORCEINLINE int CSOAContainer::NumRows( void ) const
|
|
{
|
|
return m_nRows;
|
|
}
|
|
|
|
FORCEINLINE int CSOAContainer::NumCols( void ) const
|
|
{
|
|
return m_nColumns;
|
|
}
|
|
FORCEINLINE int CSOAContainer::NumSlices( void ) const
|
|
{
|
|
return m_nSlices;
|
|
}
|
|
|
|
|
|
FORCEINLINE void CSOAContainer::AssertDataType( int nAttrIdx, EAttributeDataType nDataType ) const
|
|
{
|
|
Assert( nAttrIdx >= 0 );
|
|
Assert( nAttrIdx < MAX_SOA_FIELDS );
|
|
Assert( m_nDataType[ nAttrIdx ] == nDataType );
|
|
}
|
|
|
|
|
|
// # of groups of 4 elements per row
|
|
FORCEINLINE int CSOAContainer::NumQuadsPerRow( void ) const
|
|
{
|
|
return m_nNumQuadsPerRow;
|
|
}
|
|
|
|
FORCEINLINE int CSOAContainer::Count( void ) const // for 1d data
|
|
{
|
|
return NumCols();
|
|
}
|
|
|
|
FORCEINLINE int CSOAContainer::NumElements( void ) const
|
|
{
|
|
return NumCols() * NumRows() * NumSlices();
|
|
}
|
|
|
|
|
|
// how much to step to go from the end of one row to the start of the next one. Basically, how
|
|
// many bytes to add at the end of a row when iterating over the whole 2d array with ++
|
|
FORCEINLINE size_t CSOAContainer::RowToRowStep( int nAttrIdx ) const
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
template<class T> FORCEINLINE T *CSOAContainer::RowPtr( int nAttributeIdx, int nRowNumber, int nSliceNumber ) const
|
|
{
|
|
Assert( nRowNumber < m_nRows );
|
|
Assert( nAttributeIdx < MAX_SOA_FIELDS );
|
|
Assert( m_nDataType[nAttributeIdx] != ATTRDATATYPE_NONE );
|
|
Assert( ( m_nFieldPresentMask & ( 1 << nAttributeIdx ) ) || ( ( nRowNumber == 0 ) && ( nSliceNumber == 0 ) ) );
|
|
return reinterpret_cast<T *>(
|
|
m_pAttributePtrs[nAttributeIdx] +
|
|
+ nRowNumber * m_nRowStrideInBytes[nAttributeIdx]
|
|
+ nSliceNumber * m_nSliceStrideInBytes[nAttributeIdx] );
|
|
}
|
|
|
|
FORCEINLINE void const *CSOAContainer::ConstRowPtr( int nAttributeIdx, int nRowNumber, int nSliceNumber ) const
|
|
{
|
|
Assert( nRowNumber < m_nRows );
|
|
Assert( nAttributeIdx < MAX_SOA_FIELDS );
|
|
Assert( m_nDataType[nAttributeIdx] != ATTRDATATYPE_NONE );
|
|
return m_pAttributePtrs[nAttributeIdx]
|
|
+ nRowNumber * m_nRowStrideInBytes[nAttributeIdx]
|
|
+ nSliceNumber * m_nSliceStrideInBytes[nAttributeIdx];
|
|
}
|
|
|
|
|
|
template<class T> FORCEINLINE T *CSOAContainer::ElementPointer( int nAttributeIdx, int nX, int nY, int nZ ) const
|
|
{
|
|
Assert( nAttributeIdx < MAX_SOA_FIELDS );
|
|
Assert( nX < m_nColumns );
|
|
Assert( nY < m_nRows );
|
|
Assert( nZ < m_nSlices );
|
|
Assert( m_nDataType[nAttributeIdx] != ATTRDATATYPE_NONE );
|
|
Assert( m_nDataType[nAttributeIdx] != ATTRDATATYPE_4V );
|
|
return reinterpret_cast<T *>( m_pAttributePtrs[nAttributeIdx]
|
|
+ nX * m_nStrideInBytes[nAttributeIdx]
|
|
+ nY * m_nRowStrideInBytes[nAttributeIdx]
|
|
+ nZ * m_nSliceStrideInBytes[nAttributeIdx]
|
|
);
|
|
}
|
|
|
|
FORCEINLINE FourVectors *CSOAContainer::ElementPointer4V( int nAttributeIdx, int nX, int nY, int nZ ) const
|
|
{
|
|
Assert( nAttributeIdx < MAX_SOA_FIELDS );
|
|
Assert( nX < m_nColumns );
|
|
Assert( nY < m_nRows );
|
|
Assert( nZ < m_nSlices );
|
|
Assert( m_nDataType[nAttributeIdx] == ATTRDATATYPE_4V );
|
|
int nXIdx = nX / 4;
|
|
uint8 *pRet = m_pAttributePtrs[nAttributeIdx]
|
|
+ nXIdx * 4 * m_nStrideInBytes[nAttributeIdx]
|
|
+ nY * m_nRowStrideInBytes[nAttributeIdx]
|
|
+ nZ * m_nSliceStrideInBytes[nAttributeIdx];
|
|
pRet += 4 * ( nX & 3 );
|
|
return reinterpret_cast<FourVectors *>( pRet );
|
|
}
|
|
FORCEINLINE size_t CSOAContainer::ItemByteStride( int nAttributeIdx ) const
|
|
{
|
|
Assert( nAttributeIdx < MAX_SOA_FIELDS );
|
|
Assert( m_nDataType[nAttributeIdx] != ATTRDATATYPE_NONE );
|
|
return m_nStrideInBytes[ nAttributeIdx ];
|
|
}
|
|
|
|
// move all the data from one csoacontainer to another, leaving the source empty.
|
|
// this is just a pointer copy.
|
|
FORCEINLINE void CSOAContainer::MoveDataFrom( CSOAContainer other )
|
|
{
|
|
(*this) = other;
|
|
other.Init();
|
|
}
|
|
|
|
|
|
|
|
|
|
class CFltX4AttributeIterator : public CStridedConstPtr<fltx4>
|
|
{
|
|
FORCEINLINE CFltX4AttributeIterator( CSOAContainer const *pContainer, int nAttribute, int nRowNumber = 0 )
|
|
: CStridedConstPtr<fltx4>( pContainer->ConstRowPtr( nAttribute, nRowNumber),
|
|
pContainer->ItemByteStride( nAttribute ) )
|
|
{
|
|
}
|
|
};
|
|
|
|
class CFltX4AttributeWriteIterator : public CStridedPtr<fltx4>
|
|
{
|
|
FORCEINLINE CFltX4AttributeWriteIterator( CSOAContainer const *pContainer, int nAttribute, int nRowNumber = 0 )
|
|
: CStridedPtr<fltx4>( pContainer->RowPtr<uint8>( nAttribute, nRowNumber),
|
|
pContainer->ItemByteStride( nAttribute ) )
|
|
{
|
|
}
|
|
|
|
};
|
|
|
|
FORCEINLINE FourVectors CompressSIMD( FourVectors const &a, FourVectors const &b )
|
|
{
|
|
FourVectors ret;
|
|
ret.x = CompressSIMD( a.x, b.x );
|
|
ret.y = CompressSIMD( a.y, b.y );
|
|
ret.z = CompressSIMD( a.z, b.z );
|
|
return ret;
|
|
}
|
|
|
|
FORCEINLINE FourVectors Compress4SIMD( FourVectors const &a, FourVectors const &b,
|
|
FourVectors const &c, FourVectors const &d )
|
|
{
|
|
FourVectors ret;
|
|
ret.x = Compress4SIMD( a.x, b.x, c.x, d.x );
|
|
ret.y = Compress4SIMD( a.y, b.y, c.y, d.y );
|
|
ret.z = Compress4SIMD( a.z, b.z, c.z, d.z );
|
|
return ret;
|
|
}
|
|
|
|
|
|
|
|
#endif
|