660 lines
20 KiB
C++
660 lines
20 KiB
C++
//===== Copyright (c) 1996-2008, Valve Corporation, All rights reserved. ======//
|
|
//
|
|
// Purpose: Support for mapping from a quad mesh to Bicubic Patches, as a means
|
|
// of rendering approximate Catmull-Clark subdivision surfaces
|
|
//
|
|
//===========================================================================//
|
|
|
|
#include "studio.h"
|
|
#include "studiorendercontext.h"
|
|
#include "materialsystem/imaterialsystem.h"
|
|
#include "materialsystem/imaterial.h"
|
|
#include "materialsystem/imaterialvar.h"
|
|
#include "materialsystem/itexture.h"
|
|
#include "materialsystem/imesh.h"
|
|
#include "mathlib/mathlib.h"
|
|
#include "studiorender.h"
|
|
#include "optimize.h"
|
|
#include "tier1/convar.h"
|
|
#include "tier1/keyvalues.h"
|
|
#include "tier0/vprof.h"
|
|
|
|
// memdbgon must be the last include file in a .cpp file!!!
|
|
#include "tier0/memdbgon.h"
|
|
|
|
#define R_STUDIOSUBD
|
|
#include "r_studiosubd_patches.h"
|
|
|
|
#ifdef _DEBUG
|
|
// Temporary debug arrays
|
|
extern CUtlVector<Vector4D> g_DebugCornerPositions;
|
|
extern CUtlVector<Vector4D> g_DebugEdgePositions;
|
|
extern CUtlVector<Vector4D> g_DebugInteriorPositions;
|
|
#endif
|
|
|
|
//
|
|
// Check out CL# 584588 for an SSE-ized version of the older versions of these
|
|
// routines, which came from an older MS doc, by way of the DX10 SDK
|
|
//
|
|
|
|
static void R_TransformVert( const Vector *pSrcPos, matrix3x4_t *pSkinMat, Vector4DAligned &pos )
|
|
{
|
|
VPROF_BUDGET( "R_TransformVert", _T("SubD Rendering") );
|
|
|
|
// NOTE: Could add SSE stuff here, if we knew what SSE stuff could make it faster
|
|
pos.x = pSrcPos->x * (*pSkinMat)[0][0] + pSrcPos->y * (*pSkinMat)[0][1] + pSrcPos->z * (*pSkinMat)[0][2] + (*pSkinMat)[0][3];
|
|
pos.y = pSrcPos->x * (*pSkinMat)[1][0] + pSrcPos->y * (*pSkinMat)[1][1] + pSrcPos->z * (*pSkinMat)[1][2] + (*pSkinMat)[1][3];
|
|
pos.z = pSrcPos->x * (*pSkinMat)[2][0] + pSrcPos->y * (*pSkinMat)[2][1] + pSrcPos->z * (*pSkinMat)[2][2] + (*pSkinMat)[2][3];
|
|
pos.w = 1.0f;
|
|
}
|
|
|
|
|
|
// This function is duplicate code ****
|
|
static matrix3x4_t *ComputeSkinMatrixSSE( mstudioboneweight_t &boneweights, matrix3x4_t *pPoseToWorld, matrix3x4_t &scratchMatrix )
|
|
{
|
|
VPROF_BUDGET( "ComputeSkinMatrixSSE", _T("SubD Rendering") );
|
|
|
|
// NOTE: pPoseToWorld, being cache aligned, doesn't need explicit initialization
|
|
#if defined( _WIN32 ) && !defined( WIN64 ) && !defined( _X360 )
|
|
switch( boneweights.numbones )
|
|
{
|
|
default:
|
|
case 1:
|
|
return &pPoseToWorld[boneweights.bone[0]];
|
|
|
|
case 2:
|
|
{
|
|
matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]];
|
|
matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]];
|
|
float *pWeights = boneweights.weight;
|
|
|
|
_asm
|
|
{
|
|
mov eax, DWORD PTR [pWeights]
|
|
movss xmm6, dword ptr[eax] ; boneweights.weight[0]
|
|
movss xmm7, dword ptr[eax + 4] ; boneweights.weight[1]
|
|
|
|
mov eax, DWORD PTR [boneMat0]
|
|
mov ecx, DWORD PTR [boneMat1]
|
|
mov edi, DWORD PTR [scratchMatrix]
|
|
|
|
// Fill xmm6, and 7 with all the bone weights
|
|
shufps xmm6, xmm6, 0
|
|
shufps xmm7, xmm7, 0
|
|
|
|
// Load up all rows of the three matrices
|
|
movaps xmm0, XMMWORD PTR [eax]
|
|
movaps xmm1, XMMWORD PTR [ecx]
|
|
movaps xmm2, XMMWORD PTR [eax + 16]
|
|
movaps xmm3, XMMWORD PTR [ecx + 16]
|
|
movaps xmm4, XMMWORD PTR [eax + 32]
|
|
movaps xmm5, XMMWORD PTR [ecx + 32]
|
|
|
|
// Multiply the rows by the weights
|
|
mulps xmm0, xmm6
|
|
mulps xmm1, xmm7
|
|
mulps xmm2, xmm6
|
|
mulps xmm3, xmm7
|
|
mulps xmm4, xmm6
|
|
mulps xmm5, xmm7
|
|
|
|
addps xmm0, xmm1
|
|
addps xmm2, xmm3
|
|
addps xmm4, xmm5
|
|
|
|
movaps XMMWORD PTR [edi], xmm0
|
|
movaps XMMWORD PTR [edi + 16], xmm2
|
|
movaps XMMWORD PTR [edi + 32], xmm4
|
|
}
|
|
}
|
|
return &scratchMatrix;
|
|
|
|
case 3:
|
|
{
|
|
matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]];
|
|
matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]];
|
|
matrix3x4_t &boneMat2 = pPoseToWorld[boneweights.bone[2]];
|
|
float *pWeights = boneweights.weight;
|
|
|
|
_asm
|
|
{
|
|
mov eax, DWORD PTR [pWeights]
|
|
movss xmm5, dword ptr[eax] ; boneweights.weight[0]
|
|
movss xmm6, dword ptr[eax + 4] ; boneweights.weight[1]
|
|
movss xmm7, dword ptr[eax + 8] ; boneweights.weight[2]
|
|
|
|
mov eax, DWORD PTR [boneMat0]
|
|
mov ecx, DWORD PTR [boneMat1]
|
|
mov edx, DWORD PTR [boneMat2]
|
|
mov edi, DWORD PTR [scratchMatrix]
|
|
|
|
// Fill xmm5, 6, and 7 with all the bone weights
|
|
shufps xmm5, xmm5, 0
|
|
shufps xmm6, xmm6, 0
|
|
shufps xmm7, xmm7, 0
|
|
|
|
// Load up the first row of the three matrices
|
|
movaps xmm0, XMMWORD PTR [eax]
|
|
movaps xmm1, XMMWORD PTR [ecx]
|
|
movaps xmm2, XMMWORD PTR [edx]
|
|
|
|
// Multiply the rows by the weights
|
|
mulps xmm0, xmm5
|
|
mulps xmm1, xmm6
|
|
mulps xmm2, xmm7
|
|
|
|
addps xmm0, xmm1
|
|
addps xmm0, xmm2
|
|
movaps XMMWORD PTR [edi], xmm0
|
|
|
|
// Load up the second row of the three matrices
|
|
movaps xmm0, XMMWORD PTR [eax + 16]
|
|
movaps xmm1, XMMWORD PTR [ecx + 16]
|
|
movaps xmm2, XMMWORD PTR [edx + 16]
|
|
|
|
// Multiply the rows by the weights
|
|
mulps xmm0, xmm5
|
|
mulps xmm1, xmm6
|
|
mulps xmm2, xmm7
|
|
|
|
addps xmm0, xmm1
|
|
addps xmm0, xmm2
|
|
movaps XMMWORD PTR [edi + 16], xmm0
|
|
|
|
// Load up the third row of the three matrices
|
|
movaps xmm0, XMMWORD PTR [eax + 32]
|
|
movaps xmm1, XMMWORD PTR [ecx + 32]
|
|
movaps xmm2, XMMWORD PTR [edx + 32]
|
|
|
|
// Multiply the rows by the weights
|
|
mulps xmm0, xmm5
|
|
mulps xmm1, xmm6
|
|
mulps xmm2, xmm7
|
|
|
|
addps xmm0, xmm1
|
|
addps xmm0, xmm2
|
|
movaps XMMWORD PTR [edi + 32], xmm0
|
|
}
|
|
}
|
|
return &scratchMatrix;
|
|
|
|
case 4:
|
|
{
|
|
matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]];
|
|
matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]];
|
|
matrix3x4_t &boneMat2 = pPoseToWorld[boneweights.bone[2]];
|
|
matrix3x4_t &boneMat3 = pPoseToWorld[boneweights.bone[3]];
|
|
float *pWeights = boneweights.weight;
|
|
|
|
_asm
|
|
{
|
|
mov eax, DWORD PTR [pWeights]
|
|
movss xmm4, dword ptr[eax] ; boneweights.weight[0]
|
|
movss xmm5, dword ptr[eax + 4] ; boneweights.weight[1]
|
|
movss xmm6, dword ptr[eax + 8] ; boneweights.weight[2]
|
|
movss xmm7, dword ptr[eax + 12] ; boneweights.weight[3]
|
|
|
|
mov eax, DWORD PTR [boneMat0]
|
|
mov ecx, DWORD PTR [boneMat1]
|
|
mov edx, DWORD PTR [boneMat2]
|
|
mov esi, DWORD PTR [boneMat3]
|
|
mov edi, DWORD PTR [scratchMatrix]
|
|
|
|
// Fill xmm5, 6, and 7 with all the bone weights
|
|
shufps xmm4, xmm4, 0
|
|
shufps xmm5, xmm5, 0
|
|
shufps xmm6, xmm6, 0
|
|
shufps xmm7, xmm7, 0
|
|
|
|
// Load up the first row of the four matrices
|
|
movaps xmm0, XMMWORD PTR [eax]
|
|
movaps xmm1, XMMWORD PTR [ecx]
|
|
movaps xmm2, XMMWORD PTR [edx]
|
|
movaps xmm3, XMMWORD PTR [esi]
|
|
|
|
// Multiply the rows by the weights
|
|
mulps xmm0, xmm4
|
|
mulps xmm1, xmm5
|
|
mulps xmm2, xmm6
|
|
mulps xmm3, xmm7
|
|
|
|
addps xmm0, xmm1
|
|
addps xmm2, xmm3
|
|
addps xmm0, xmm2
|
|
movaps XMMWORD PTR [edi], xmm0
|
|
|
|
// Load up the second row of the three matrices
|
|
movaps xmm0, XMMWORD PTR [eax + 16]
|
|
movaps xmm1, XMMWORD PTR [ecx + 16]
|
|
movaps xmm2, XMMWORD PTR [edx + 16]
|
|
movaps xmm3, XMMWORD PTR [esi + 16]
|
|
|
|
// Multiply the rows by the weights
|
|
mulps xmm0, xmm4
|
|
mulps xmm1, xmm5
|
|
mulps xmm2, xmm6
|
|
mulps xmm3, xmm7
|
|
|
|
addps xmm0, xmm1
|
|
addps xmm2, xmm3
|
|
addps xmm0, xmm2
|
|
movaps XMMWORD PTR [edi + 16], xmm0
|
|
|
|
// Load up the third row of the three matrices
|
|
movaps xmm0, XMMWORD PTR [eax + 32]
|
|
movaps xmm1, XMMWORD PTR [ecx + 32]
|
|
movaps xmm2, XMMWORD PTR [edx + 32]
|
|
movaps xmm3, XMMWORD PTR [esi + 32]
|
|
|
|
// Multiply the rows by the weights
|
|
mulps xmm0, xmm4
|
|
mulps xmm1, xmm5
|
|
mulps xmm2, xmm6
|
|
mulps xmm3, xmm7
|
|
|
|
addps xmm0, xmm1
|
|
addps xmm2, xmm3
|
|
addps xmm0, xmm2
|
|
movaps XMMWORD PTR [edi + 32], xmm0
|
|
}
|
|
}
|
|
return &scratchMatrix;
|
|
}
|
|
#else
|
|
#ifndef LINUX
|
|
#pragma message( "ComputeSkinMatrixSSE C implementation only" )
|
|
#endif
|
|
extern matrix3x4_t *ComputeSkinMatrix( mstudioboneweight_t &boneweights, matrix3x4_t *pPoseToWorld, matrix3x4_t &scratchMatrix );
|
|
return ComputeSkinMatrix( boneweights, pPoseToWorld, scratchMatrix );
|
|
#endif
|
|
|
|
Assert( 0 );
|
|
return NULL;
|
|
}
|
|
|
|
#ifdef _DEBUG
|
|
static ConVar mat_tess_dump( "mat_tess_dump", "0", FCVAR_CHEAT );
|
|
#endif
|
|
|
|
void CStudioRender::SkinSubDCage( mstudiovertex_t *pVertices, int nNumVertices,
|
|
matrix3x4_t *pPoseToWorld, CCachedRenderData &vertexCache,
|
|
unsigned short* pGroupToMesh, fltx4 *vOutput, bool bDoFlex )
|
|
{
|
|
VPROF_BUDGET( "CStudioRender::SkinSubDCage", _T("SubD Rendering") );
|
|
|
|
Vector *pSrcPos;
|
|
ALIGN16 matrix3x4_t *pSkinMat, temp ALIGN16_POST;
|
|
|
|
Assert( nNumVertices > 0 );
|
|
|
|
for ( int j=0; j < nNumVertices; ++j )
|
|
{
|
|
mstudiovertex_t &vert = pVertices[pGroupToMesh[j]];
|
|
|
|
pSkinMat = ComputeSkinMatrixSSE( vert.m_BoneWeights, pPoseToWorld, temp );
|
|
|
|
if ( bDoFlex && vertexCache.IsVertexFlexed( pGroupToMesh[j] ) )
|
|
{
|
|
CachedPosNormTan_t* pFlexedVertex = vertexCache.GetFlexVertex( pGroupToMesh[j] );
|
|
pSrcPos = &pFlexedVertex->m_Position.AsVector3D();
|
|
|
|
// Copy strange signed, 0..3 wrinkle tangent-flip encoding over to tangent.w
|
|
pFlexedVertex->m_TangentS.w = pFlexedVertex->m_Position.w;
|
|
}
|
|
else // non-flexed case
|
|
{
|
|
pSrcPos = &vert.m_vecPosition;
|
|
}
|
|
|
|
// Transform into world space
|
|
Vector4DAligned vTemp;
|
|
R_TransformVert( pSrcPos, pSkinMat, *(Vector4DAligned*)&vTemp );
|
|
vOutput[j] = LoadAlignedSIMD( (float *) &vTemp );
|
|
}
|
|
}
|
|
|
|
inline unsigned short *InitializeTopologyIndexStruct( TopologyIndexStruct &quad, unsigned short *topologyIndex )
|
|
{
|
|
quad.vtx1RingSize = topologyIndex; topologyIndex += 4;
|
|
quad.vtx1RingCenterQuadOffset = topologyIndex; topologyIndex += 4;
|
|
quad.valences = topologyIndex; topologyIndex += 4;
|
|
quad.minOneRingOffset = topologyIndex; topologyIndex += 4;
|
|
quad.bndVtx = topologyIndex; topologyIndex += 4;
|
|
quad.bndEdge = topologyIndex; topologyIndex += 4;
|
|
quad.cornerVtx = topologyIndex; topologyIndex += 4;
|
|
quad.loopGapAngle = topologyIndex; topologyIndex += 4;
|
|
quad.nbCornerVtx = topologyIndex; topologyIndex += 4;
|
|
quad.edgeBias = topologyIndex; topologyIndex += 8;
|
|
quad.vUV0 = topologyIndex; topologyIndex += 4;
|
|
quad.vUV1 = topologyIndex; topologyIndex += 4;
|
|
quad.vUV2 = topologyIndex; topologyIndex += 4;
|
|
quad.vUV3 = topologyIndex; topologyIndex += 4;
|
|
quad.oneRing = topologyIndex;
|
|
topologyIndex += quad.vtx1RingSize[0]+quad.vtx1RingSize[1]+quad.vtx1RingSize[2]+quad.vtx1RingSize[3];
|
|
|
|
return topologyIndex;
|
|
}
|
|
|
|
static ConVar mat_tessellation_update_buffers( "mat_tessellation_update_buffers", "1", FCVAR_CHEAT );
|
|
static ConVar mat_tessellation_cornertangents( "mat_tessellation_cornertangents", "1", FCVAR_CHEAT );
|
|
static ConVar mat_tessellation_accgeometrytangents( "mat_tessellation_accgeometrytangents", "0", FCVAR_CHEAT );
|
|
|
|
#ifdef _DEBUG
|
|
|
|
bool NotQuiteEqual( Vector4D &vA, Vector4D &vB )
|
|
{
|
|
float flEpsilon = 0.05f;
|
|
Vector4D vDelta = vA - vB;
|
|
float flDist = sqrt( vDelta.x * vDelta.x + vDelta.y * vDelta.y + vDelta.z * vDelta.z );
|
|
bool bSameVector = ( vA.x == vB.x ) && ( vA.y == vB.y ) && ( vA.z == vB.z );
|
|
|
|
return ( flDist < flEpsilon ) && !bSameVector;
|
|
}
|
|
|
|
void DumpDebugPositions()
|
|
{
|
|
|
|
for ( int i=0; i< g_DebugCornerPositions.Count(); i++ )
|
|
{
|
|
bool bCrack = false;
|
|
for ( int j=0; j< g_DebugCornerPositions.Count(); j++ )
|
|
{
|
|
if ( NotQuiteEqual( g_DebugCornerPositions[i], g_DebugCornerPositions[j] ) )
|
|
{
|
|
bCrack = true;
|
|
Assert(0);
|
|
}
|
|
}
|
|
|
|
DevMsg( "%s C - %.15f, %.15f, %.15f\n", bCrack ? "*** " : " ", g_DebugCornerPositions[i].x, g_DebugCornerPositions[i].y, g_DebugCornerPositions[i].z );
|
|
}
|
|
|
|
for ( int i=0; i< g_DebugEdgePositions.Count(); i++ )
|
|
{
|
|
bool bCrack = false;
|
|
for ( int j=0; j< g_DebugEdgePositions.Count(); j++ )
|
|
{
|
|
if ( NotQuiteEqual( g_DebugEdgePositions[i], g_DebugEdgePositions[j] ) )
|
|
{
|
|
bCrack = true;
|
|
}
|
|
}
|
|
|
|
DevMsg( "%s E - %.15f, %.15f, %.15f\n", bCrack ? "*** " : " ", g_DebugEdgePositions[i].x, g_DebugEdgePositions[i].y, g_DebugEdgePositions[i].z );
|
|
}
|
|
|
|
for ( int i=0; i< g_DebugInteriorPositions.Count(); i++ )
|
|
{
|
|
bool bCrack = false;
|
|
for ( int j=0; j< g_DebugInteriorPositions.Count(); j++ )
|
|
{
|
|
if ( NotQuiteEqual( g_DebugInteriorPositions[i], g_DebugInteriorPositions[j] ) )
|
|
{
|
|
bCrack = true;
|
|
}
|
|
}
|
|
|
|
DevMsg( "%s I - %.15f, %.15f, %.15f\n", bCrack ? "*** " : " ", g_DebugInteriorPositions[i].x, g_DebugInteriorPositions[i].y, g_DebugInteriorPositions[i].z );
|
|
}
|
|
}
|
|
|
|
#endif // _DEBUG
|
|
|
|
void GenerateWorldSpacePatches( float *pSubDBuff, int nNumPatches, unsigned short *pTopologyIndices, fltx4 *pWSVertices, bool bRegularPatch )
|
|
{
|
|
VPROF_BUDGET( "CStudioRender::GenerateWorldSpacePatches", _T("SubD Rendering") );
|
|
|
|
TopologyIndexStruct quad;
|
|
unsigned short *nextPatchIndices = InitializeTopologyIndexStruct( quad, pTopologyIndices );
|
|
|
|
set_ShowACCGeometryTangents(mat_tessellation_accgeometrytangents.GetBool());
|
|
set_UseCornerTangents(mat_tessellation_cornertangents.GetBool());
|
|
|
|
ALIGN16 Vector4D Geo[16] ALIGN16_POST;
|
|
ALIGN16 Vector4D TanU[12] ALIGN16_POST;
|
|
ALIGN16 Vector4D TanV[12] ALIGN16_POST;
|
|
|
|
#ifdef _DEBUG
|
|
if ( mat_tess_dump.GetBool() )
|
|
{
|
|
// Debug Arrays
|
|
g_DebugCornerPositions.EnsureCapacity( nNumPatches * 4 );
|
|
g_DebugEdgePositions.EnsureCapacity( nNumPatches * 8 );
|
|
g_DebugInteriorPositions.EnsureCapacity( nNumPatches * 4 );
|
|
|
|
// Empty the arrays this time around
|
|
g_DebugCornerPositions.RemoveAll();
|
|
g_DebugEdgePositions.RemoveAll();
|
|
g_DebugInteriorPositions.RemoveAll();
|
|
}
|
|
#endif
|
|
|
|
for( int p = 0; p < nNumPatches; p++ )
|
|
{
|
|
#if defined( USE_OPT )
|
|
ComputeACCAllPatches( pWSVertices, &quad, Geo, TanU, TanV, bRegularPatch );
|
|
#else
|
|
ComputeACCGeometryPatch( pWSVertices, &quad, Geo );
|
|
ComputeACCTangentPatches( pWSVertices, &quad, Geo, TanU, TanV );
|
|
#endif
|
|
|
|
for ( int i=0; i < 16; i++ )
|
|
{
|
|
pSubDBuff[ i * 3 + 0 ] = Geo[i].x;
|
|
pSubDBuff[ i * 3 + 1 ] = Geo[i].y;
|
|
pSubDBuff[ i * 3 + 2 ] = Geo[i].z;
|
|
|
|
}
|
|
|
|
for ( int i=0; i<12; i++ )
|
|
{
|
|
pSubDBuff[ i * 3 + 0 + 48 ] = TanU[ i ].x;
|
|
pSubDBuff[ i * 3 + 1 + 48 ] = TanU[ i ].y;
|
|
pSubDBuff[ i * 3 + 2 + 48 ] = TanU[ i ].z;
|
|
}
|
|
|
|
for ( int i=0; i<12; i++ )
|
|
{
|
|
pSubDBuff[ i * 3 + 0 + 84 ] = TanV[ i ].x;
|
|
pSubDBuff[ i * 3 + 1 + 84 ] = TanV[ i ].y;
|
|
pSubDBuff[ i * 3 + 2 + 84 ] = TanV[ i ].z;
|
|
}
|
|
|
|
pSubDBuff += 120; // 30 * sizeof( float )
|
|
|
|
nextPatchIndices = InitializeTopologyIndexStruct( quad, nextPatchIndices );
|
|
}
|
|
|
|
#ifdef _DEBUG
|
|
if ( mat_tess_dump.GetBool() )
|
|
{
|
|
// These should be a particular size
|
|
Assert( g_DebugCornerPositions.Count() == ( nNumPatches * 4 ) );
|
|
Assert( g_DebugEdgePositions.Count() == ( nNumPatches * 8 ) );
|
|
Assert( g_DebugInteriorPositions.Count() == ( nNumPatches * 4 ) );
|
|
|
|
DumpDebugPositions();
|
|
mat_tess_dump.SetValue( 0 ); // Turn back off
|
|
}
|
|
#endif
|
|
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------------
|
|
// Top level function for mapping a quad mesh to an array of Bicubic Bezier patches
|
|
//-----------------------------------------------------------------------------------
|
|
void CStudioRender::GenerateBicubicPatches( mstudiomesh_t* pmesh, studiomeshgroup_t* pGroup, bool bDoFlex )
|
|
{
|
|
#if defined( LINUX )
|
|
Assert(0);
|
|
#else
|
|
VPROF_BUDGET( "CStudioRender::GenerateBicubicPatches", _T("SubD Rendering") );
|
|
|
|
FillTables(); // This only does work the first time through
|
|
|
|
Assert( pmesh );
|
|
Assert( pGroup );
|
|
|
|
const mstudio_meshvertexdata_t *vertData = pmesh->GetVertexData( m_pStudioHdr );
|
|
Assert( vertData );
|
|
|
|
mstudiovertex_t *pVertices = vertData->Vertex( 0 );
|
|
|
|
m_vSkinnedSubDVertices.SetCount( pGroup->m_NumVertices );
|
|
|
|
// First, apply software flexing and skinning to the vertices
|
|
SkinSubDCage( pVertices, pGroup->m_NumVertices, m_PoseToWorld,
|
|
m_VertexCache, pGroup->m_pGroupIndexToMeshIndex, m_vSkinnedSubDVertices.Base(), bDoFlex );
|
|
|
|
// Early out
|
|
if ( mat_tessellation_update_buffers.GetBool() == false )
|
|
return;
|
|
|
|
// Lock the subd buffers
|
|
int nNumPatches = 0;
|
|
for ( int s=0; s<pGroup->m_NumStrips; ++s )
|
|
{
|
|
nNumPatches += pGroup->m_pUniqueFaces[s];
|
|
}
|
|
|
|
CMatRenderContextPtr pRenderContext( g_pMaterialSystem );
|
|
float *pSubDBuff = pRenderContext->LockSubDBuffer( nNumPatches );
|
|
|
|
// Now we are in world space, we can map to array of Bicubic patches
|
|
int totalIndices = 0;
|
|
float *pCurrentPtr = pSubDBuff;
|
|
for ( int s=0; s<pGroup->m_NumStrips; ++s )
|
|
{
|
|
OptimizedModel::StripHeader_t *pStrip = &pGroup->m_pStripData[s];
|
|
int StripFaces = pGroup->m_pUniqueFaces[s];
|
|
|
|
GenerateWorldSpacePatches( pCurrentPtr, StripFaces, &pGroup->m_pTopologyIndices[totalIndices], m_vSkinnedSubDVertices.Base(), ( pStrip->flags & OptimizedModel::STRIP_IS_QUADLIST_REG ) != 0 );
|
|
|
|
totalIndices += pStrip->numTopologyIndices;
|
|
pCurrentPtr += StripFaces * 120;
|
|
}
|
|
|
|
// Unlock subd buffers
|
|
pRenderContext->UnlockSubDBuffer( );
|
|
|
|
#endif // !LINUX
|
|
}
|
|
|
|
|
|
// Transform Tangent vector
|
|
static void R_TransformTangent( const Vector4D *pSrcTangentS, matrix3x4_t *pSkinMat, Vector4DAligned &tangentS )
|
|
{
|
|
VPROF_BUDGET( "R_TransformTangent", _T("SubD Rendering") );
|
|
|
|
tangentS.x = pSrcTangentS->x * (*pSkinMat)[0][0] + pSrcTangentS->y * (*pSkinMat)[0][1] + pSrcTangentS->z * (*pSkinMat)[0][2];
|
|
tangentS.y = pSrcTangentS->x * (*pSkinMat)[1][0] + pSrcTangentS->y * (*pSkinMat)[1][1] + pSrcTangentS->z * (*pSkinMat)[1][2];
|
|
tangentS.z = pSrcTangentS->x * (*pSkinMat)[2][0] + pSrcTangentS->y * (*pSkinMat)[2][1] + pSrcTangentS->z * (*pSkinMat)[2][2];
|
|
tangentS.w = pSrcTangentS->w;
|
|
}
|
|
|
|
// Transforms per-vertex tangent vector, copies texture coordinates etc into dynamic VB
|
|
void CStudioRender::SoftwareProcessQuadMesh( mstudiomesh_t* pmesh, CMeshBuilder& meshBuilder,
|
|
int numFaces, unsigned short* pGroupToMesh,
|
|
unsigned short *pTopologyIndices, bool bTangentSpace, bool bDoFlex )
|
|
{
|
|
VPROF_BUDGET( "CStudioRender::SoftwareProcessQuadMesh", _T("SubD Rendering") );
|
|
|
|
Vector4D *pStudioTangentS = NULL;
|
|
|
|
ALIGN16 QuadTessVertex_t quadVertex ALIGN16_POST;
|
|
|
|
// QuadTessVertex_t currently has the following map:
|
|
// +-----------------------------------+
|
|
// | tanX | tanY | tanZ | sBWrnk | <- Tangent in .xyz, Binormal sign flip bit plus wrinkle in .w
|
|
// +-----------------------------------+
|
|
// | tcU0 | tcV0 | tcU1 | tcV1 | <- Interior TC, Parametric V Edge TC
|
|
// +-----------------------------------+
|
|
// | tcU2 | tcV2 | tcU3 | tcV3 | <- Parametric U Edge TC, Corner TC
|
|
// +-----------------------------------+
|
|
|
|
quadVertex.m_vTangent.Init( 1.0f, 0.0f, 0.0f, 1.0f );
|
|
|
|
ALIGN16 matrix3x4_t *pSkinMat, matTemp ALIGN16_POST;
|
|
|
|
Assert( numFaces > 0 );
|
|
|
|
const mstudio_meshvertexdata_t *pVertData = pmesh->GetVertexData( m_pStudioHdr );
|
|
Assert( pVertData );
|
|
if ( !pVertData )
|
|
return;
|
|
|
|
mstudiovertex_t *pVertices = pVertData->Vertex( 0 );
|
|
|
|
|
|
if ( bTangentSpace )
|
|
{
|
|
pStudioTangentS = pVertData->TangentS( 0 );
|
|
}
|
|
|
|
TopologyIndexStruct quad;
|
|
unsigned short *nextPatchIndices = InitializeTopologyIndexStruct( quad, pTopologyIndices );
|
|
|
|
for ( int i=0; i < numFaces; ++i ) // Run over faces
|
|
{
|
|
int patchCorner = 0;
|
|
|
|
#if 0
|
|
Vector4D debugTangent[4];
|
|
for ( int j=0; j < 4; ++j )
|
|
{
|
|
int idx = quad.oneRing[patchCorner];
|
|
memcpy( &debugTangent[j], &pStudioTangentS[idx], sizeof( Vector4D ) );
|
|
patchCorner += quad.vtx1RingSize[j];
|
|
}
|
|
|
|
// These should be the same sign for a given patch.
|
|
// If they're not, that's bad
|
|
Assert( ( debugTangent[0].w == debugTangent[1].w ) &&
|
|
( debugTangent[1].w == debugTangent[2].w ) &&
|
|
( debugTangent[2].w == debugTangent[3].w ) );
|
|
|
|
patchCorner = 0;
|
|
#endif
|
|
|
|
for ( int j=0; j < 4; ++j ) // Four verts per face
|
|
{
|
|
int idx = quad.oneRing[patchCorner];
|
|
mstudiovertex_t &vert = pVertices[idx];
|
|
|
|
if ( bTangentSpace )
|
|
{
|
|
pSkinMat = ComputeSkinMatrixSSE( vert.m_BoneWeights, m_PoseToWorld, matTemp );
|
|
|
|
if ( bDoFlex && m_VertexCache.IsVertexFlexed( idx ) )
|
|
{
|
|
CachedPosNormTan_t* pFlexedVertex = m_VertexCache.GetFlexVertex( idx );
|
|
R_TransformTangent( &(pFlexedVertex->m_TangentS), pSkinMat, *(Vector4DAligned*)&quadVertex.m_vTangent );
|
|
}
|
|
else // non-flexed case
|
|
{
|
|
R_TransformTangent( &pStudioTangentS[idx], pSkinMat, *(Vector4DAligned*)&quadVertex.m_vTangent );
|
|
quadVertex.m_vTangent.w *= 2; // non-flexed vertex should have wrinkle of -2 or +2
|
|
}
|
|
}
|
|
|
|
// Store 4 texcoords per quad corner
|
|
quadVertex.m_vUV01.x = pVertices[ quad.vUV0[j] ].m_vecTexCoord.x;
|
|
quadVertex.m_vUV01.y = pVertices[ quad.vUV0[j] ].m_vecTexCoord.y;
|
|
quadVertex.m_vUV01.z = pVertices[ quad.vUV1[j] ].m_vecTexCoord.x;
|
|
quadVertex.m_vUV01.w = pVertices[ quad.vUV1[j] ].m_vecTexCoord.y;
|
|
quadVertex.m_vUV23.x = pVertices[ quad.vUV2[j] ].m_vecTexCoord.x;
|
|
quadVertex.m_vUV23.y = pVertices[ quad.vUV2[j] ].m_vecTexCoord.y;
|
|
quadVertex.m_vUV23.z = pVertices[ quad.vUV3[j] ].m_vecTexCoord.x;
|
|
quadVertex.m_vUV23.w = pVertices[ quad.vUV3[j] ].m_vecTexCoord.y;
|
|
|
|
meshBuilder.FastQuadVertexSSE( quadVertex );
|
|
|
|
patchCorner += quad.vtx1RingSize[j];
|
|
}
|
|
|
|
nextPatchIndices = InitializeTopologyIndexStruct( quad, nextPatchIndices );
|
|
}
|
|
|
|
meshBuilder.FastAdvanceNVertices( numFaces * 4 );
|
|
}
|