From 9a1ab79372797d2dfa645653332bb5ca5e887873 Mon Sep 17 00:00:00 2001
From: nillerusr <nillerusr@gmail.com>
Date: Tue, 21 Jun 2022 22:15:14 +0300
Subject: [PATCH] amd64: fix mempool, utlbuffer align

---
 materialsystem/stdshaders/commandbuilder.h |  2 +-
 public/dt_send.cpp                         |  8 ++++----
 public/tier1/mempool.h                     |  4 ++--
 public/tier1/utlbuffer.h                   |  7 ++++---
 tier1/mempool.cpp                          |  7 ++++++-
 togl/linuxwin/dx9asmtogl2.cpp              | 21 +++++++++++++++------
 6 files changed, 32 insertions(+), 17 deletions(-)
diff --git a/materialsystem/stdshaders/commandbuilder.h b/materialsystem/stdshaders/commandbuilder.h
index e40ee0a5..4aca5d59 100644
--- a/materialsystem/stdshaders/commandbuilder.h
+++ b/materialsystem/stdshaders/commandbuilder.h
@@ -49,7 +49,7 @@ public:
 	template<class T> FORCEINLINE void Put( T const &nValue )
 	{
 		EnsureCapacity( sizeof( T ) );
-		*( reinterpret_cast<T *>( m_pDataOut ) ) = nValue;
+		memcpy( m_pDataOut, &nValue, sizeof(T) );
 		m_pDataOut += sizeof( nValue );
 #ifdef DBGFLAG_ASSERT
 		m_nNumBytesRemaining -= sizeof( nValue );
diff --git a/public/dt_send.cpp b/public/dt_send.cpp
index 211bc107..65af606d 100644
--- a/public/dt_send.cpp
+++ b/public/dt_send.cpp
@@ -317,18 +317,18 @@ void* SendProxy_SendLocalDataTable( const SendProp *pProp, const void *pStruct,
 // ---------------------------------------------------------------------- //
 float AssignRangeMultiplier( int nBits, double range )
 {
-	unsigned long iHighValue;
+	uint32 iHighValue;
 	if ( nBits == 32 )
 		iHighValue = 0xFFFFFFFE;
 	else
-		iHighValue = ((1 << (unsigned long)nBits) - 1);
+		iHighValue = ((1 << (uint32)nBits) - 1);
 
 	float fHighLowMul = iHighValue / range;
 	if ( CloseEnough( range, 0 ) )
 		fHighLowMul = iHighValue;
 	
 	// If the precision is messing us up, then adjust it so it won't.
-	if ( (unsigned long)(fHighLowMul * range) > iHighValue ||
+	if ( (uint32)(fHighLowMul * range) > iHighValue ||
 		 (fHighLowMul * range) > (double)iHighValue )
 	{
 		// Squeeze it down smaller and smaller until it's going to produce an integer
@@ -338,7 +338,7 @@ float AssignRangeMultiplier( int nBits, double range )
 		for ( i=0; i < ARRAYSIZE( multipliers ); i++ )
 		{
 			fHighLowMul = (float)( iHighValue / range ) * multipliers[i];
-			if ( (unsigned long)(fHighLowMul * range) > iHighValue ||
+			if ( (uint32)(fHighLowMul * range) > iHighValue ||
 				(fHighLowMul * range) > (double)iHighValue )
 			{
 			}
diff --git a/public/tier1/mempool.h b/public/tier1/mempool.h
index e01bc9ef..14f143c7 100644
--- a/public/tier1/mempool.h
+++ b/public/tier1/mempool.h
@@ -432,7 +432,7 @@ inline void CClassMemoryPool<T>::Clear()
 		static   CUtlMemoryPool   s_Allocator
     
 #define DEFINE_FIXEDSIZE_ALLOCATOR( _class, _initsize, _grow )					\
-	CUtlMemoryPool   _class::s_Allocator(sizeof(_class), _initsize, _grow, #_class " pool")
+	CUtlMemoryPool   _class::s_Allocator(sizeof(_class), _initsize, _grow, #_class " pool", alignof(_class))
 
 #define DEFINE_FIXEDSIZE_ALLOCATOR_ALIGNED( _class, _initsize, _grow, _alignment )		\
 	CUtlMemoryPool   _class::s_Allocator(sizeof(_class), _initsize, _grow, #_class " pool", _alignment )
@@ -447,7 +447,7 @@ inline void CClassMemoryPool<T>::Clear()
 		static   CMemoryPoolMT   s_Allocator
 
 #define DEFINE_FIXEDSIZE_ALLOCATOR_MT( _class, _initsize, _grow )					\
-	CMemoryPoolMT   _class::s_Allocator(sizeof(_class), _initsize, _grow, #_class " pool")
+	CMemoryPoolMT   _class::s_Allocator(sizeof(_class), _initsize, _grow, #_class " pool", alignof(_class))
 
 //-----------------------------------------------------------------------------
 // Macros that make it simple to make a class use a fixed-size allocator
diff --git a/public/tier1/utlbuffer.h b/public/tier1/utlbuffer.h
index 59909410..9c95082f 100644
--- a/public/tier1/utlbuffer.h
+++ b/public/tier1/utlbuffer.h
@@ -672,7 +672,7 @@ inline void CUtlBuffer::GetObject( T *dest )
 	{
 		if ( !m_Byteswap.IsSwappingBytes() || ( sizeof( T ) == 1 ) )
 		{
-			*dest = *(T *)PeekGet();
+			memcpy( dest, PeekGet(), sizeof( T ) );
 		}
 		else
 		{
@@ -704,6 +704,7 @@ inline void CUtlBuffer::GetTypeBin( T &dest )
 	{
 		if ( !m_Byteswap.IsSwappingBytes() || ( sizeof( T ) == 1 ) )
 		{
+			memcpy(&dest, PeekGet(), sizeof(T) );
 			dest = *(T *)PeekGet();
 		}
 		else
@@ -1050,7 +1051,7 @@ inline void CUtlBuffer::PutObject( T *src )
 	{
 		if ( !m_Byteswap.IsSwappingBytes() || ( sizeof( T ) == 1 ) )
 		{
-			*(T *)PeekPut() = *src;
+			memcpy( PeekPut(), src, sizeof( T ) );
 		}
 		else
 		{
@@ -1079,7 +1080,7 @@ inline void CUtlBuffer::PutTypeBin( T src )
 	{
 		if ( !m_Byteswap.IsSwappingBytes() || ( sizeof( T ) == 1 ) )
 		{
-			*(T *)PeekPut() = src;
+			memcpy( PeekPut(), &src, sizeof( T ) );
 		}
 		else
 		{
diff --git a/tier1/mempool.cpp b/tier1/mempool.cpp
index 9cb9e223..fcfb22f7 100644
--- a/tier1/mempool.cpp
+++ b/tier1/mempool.cpp
@@ -41,7 +41,12 @@ CUtlMemoryPool::CUtlMemoryPool( int blockSize, int numElements, int growMode, co
 	}
 #endif
 
-	m_nAlignment = ( nAlignment != 0 ) ? nAlignment : 1;
+#ifdef PLATFORM_64BITS
+	m_nAlignment = ( nAlignment != 0 ) ? nAlignment : 8;
+#else
+	m_nAlignment = ( nAlignment != 0 ) ? nAlignment : 4;
+#endif
+
 	Assert( IsPowerOfTwo( m_nAlignment ) );
 	m_BlockSize = blockSize < sizeof(void*) ? sizeof(void*) : blockSize;
 	m_BlockSize = AlignValue( m_BlockSize, m_nAlignment );
diff --git a/togl/linuxwin/dx9asmtogl2.cpp b/togl/linuxwin/dx9asmtogl2.cpp
index 22be8fc9..281df0b4 100644
--- a/togl/linuxwin/dx9asmtogl2.cpp
+++ b/togl/linuxwin/dx9asmtogl2.cpp
@@ -2092,23 +2092,32 @@ static uint PrintDoubleInt( char *pBuf, uint nBufSize, double f, uint nMinChars
 
 		if ( bAnyDigitsLeft )
 		{
-			uint n = remainder % 100U; remainder /= 100U; *reinterpret_cast<uint16*>(pDst - 1) = reinterpret_cast<const uint16*>(pDigits)[n]; 
-			n = remainder % 100U; remainder /= 100U; *reinterpret_cast<uint16*>(pDst - 1 - 2) = reinterpret_cast<const uint16*>(pDigits)[n]; 
+			uint n = remainder % 100U; remainder /= 100U;
+			memcpy( reinterpret_cast<uint16*>(pDst - 1), &(reinterpret_cast<const uint16*>(pDigits)[n]), sizeof(uint16) );
+			n = remainder % 100U; remainder /= 100U;
+			memcpy( reinterpret_cast<uint16*>(pDst - 3), &(reinterpret_cast<const uint16*>(pDigits)[n]), sizeof(uint16) );
 			Assert( remainder < 100U );
-			*reinterpret_cast<uint16*>(pDst - 1 - 4) = reinterpret_cast<const uint16*>(pDigits)[remainder]; 
+			memcpy( reinterpret_cast<uint16*>(pDst - 5), &(reinterpret_cast<const uint16*>(pDigits)[remainder]), sizeof(uint16) );
 			pDst -= 6;
 		}
 		else
 		{
-			uint n = remainder % 100U; remainder /= 100U; *reinterpret_cast<uint16*>(pDst - 1) = reinterpret_cast<const uint16*>(pDigits)[n]; --pDst; if ( ( n >= 10 ) || ( remainder ) ) --pDst;
+			uint n = remainder % 100U; remainder /= 100U;
+			memcpy( reinterpret_cast<uint16*>(pDst - 1), &(reinterpret_cast<const uint16*>(pDigits)[n]), sizeof(uint16) );
+			--pDst; if ( ( n >= 10 ) || ( remainder ) ) --pDst;
+
 			if ( remainder )
 			{
-				n = remainder % 100U; remainder /= 100U; *reinterpret_cast<uint16*>(pDst - 1) = reinterpret_cast<const uint16*>(pDigits)[n]; --pDst; if ( ( n >= 10 ) || ( remainder ) ) --pDst;
+				n = remainder % 100U; remainder /= 100U;
+				memcpy( reinterpret_cast<uint16*>(pDst - 1), &(reinterpret_cast<const uint16*>(pDigits)[n]), sizeof(uint16) );
+
+				--pDst; if ( ( n >= 10 ) || ( remainder ) ) --pDst;
 
 				if ( remainder )
 				{
 					Assert( remainder < 100U );
-					*reinterpret_cast<uint16*>(pDst - 1) = reinterpret_cast<const uint16*>(pDigits)[remainder]; --pDst; if ( remainder >= 10 ) --pDst;
+					memcpy( reinterpret_cast<uint16*>(pDst - 1), &(reinterpret_cast<const uint16*>(pDigits)[remainder]), sizeof(uint16) );
+					--pDst; if ( remainder >= 10 ) --pDst;
 				}
 			}
 		}