2021-07-24 21:11:47 -07:00

3365 lines
104 KiB
C++

//================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================
//
// Per draw call Gcm state
// Render states, vo/fp consts
//
//==================================================================================================
#ifndef INCLUDED_GCMDRAWSTATE_H
#define INCLUDED_GCMDRAWSTATE_H
#ifndef SPU
#include "tier0/platform.h"
#include "tier0/dbg.h"
#include "cell\gcm.h"
#include "gcmconfig.h"
#include "ps3gcmmemory.h"
#include "dxabstract_def.h"
#include "dxabstract.h"
#include "shaderapi/commandbuffer.h"
#include "shaderapi/shareddefs.h"
#include "mathlib/vector4d.h"
#include "mathlib/vmatrix.h"
#include <shaderapi/ishaderdynamic.h>
#include <vprof.h>
#include "SpuMgr_ppu.h"
#else
#include "spumgr_spu.h"
#include "cell/gcm_spu.h"
#include "cell/gcm/gcm_method_data.h"
#include "dxabstract_def.h"
#include "gcmtexture.h"
#include "gcmlabels.h"
#include "shaderapi/shareddefs.h"
#include "shaderapi/commandbuffer.h"
#include <shaderapi/ishaderdynamic.h>
#include "gcmdraw_spu.h"
#endif
//--------------------------------------------------------------------------------------------------
// Defines
//--------------------------------------------------------------------------------------------------
//#define GCM_DS_SAFE
#define GCM_DRAWSTATE_MAX 9 // We have this many drawstate structures
// we fill half of them and send to SPU
// then we fill the other half
// Need an odd number since one extra is required in SendToSPU
#define GCM_NUMDRAWCALLS_SPU ((GCM_DRAWSTATE_MAX-1)/2)
#define GCM_DS_FIFOPERDRAW 0x4000 // 16K is our max epr draw call FIFO.
// in practice we see a highwater of 0x2800
// which combines a full RSx reset with a drawcall
#define GCM_DS_MAXDATAPERDRAWCALL 0x2000 // Highwater mark is abt 3K
#define GCM_DS_MAXFPCONST 96
#define GCM_DS_MAXVPCONST 256
#define GCM_DS_MAXDYNECB 0x40000 // 64K ring buffer. if <8K left wraps
#define GCM_DS_INST_ENVMAP 1
#define GCM_DS_INST_LIGHTMAP 2
#define GCM_DS_INST_PAINTMAP 4
#define MAX_SAMPLERS 16
//--------------------------------------------------------------------------------------------------
// Global externs
//--------------------------------------------------------------------------------------------------
extern uint8 g_d3drs_defvalue_indices[D3DRS_VALUE_LIMIT];
extern uint32 g_d3drs_defvalues[11];
extern uint32 dxtogl_stencilmode[10];
extern uint16 dxtogl_blendop[7];
extern uint8 dxtogl_addressMode[6];
extern uint8 dxtogl_anisoIndexHalf[32]; // indexed by [ dxsamp->maxAniso / 2 ]
extern uint8 dxtogl_minFilter[4][4]; // indexed by _D3DTEXTUREFILTERTYPE on both axes: [row is min filter][col is mip filter].
extern uint8 dxtogl_magFilter[4]; // indexed by _D3DTEXTUREFILTERTYPE
extern int g_bZcullAuto;
extern int g_nZcullDefault;
extern int g_nZcullMoveForward;
extern int g_nZcullPushBack;
extern vec_float4 g_aFPConst[GCM_DS_MAXFPCONST];
extern vec_float4 g_aVPConst[GCM_DS_MAXVPCONST];
extern D3DStreamDesc g_dxGcmVertexStreamSources[D3D_MAX_STREAMS];
extern uint32 g_UPHigh;
extern uint32 g_UPFrame;
extern volatile uint32_t * g_label_fppatch_ring_seg;
extern uint8 g_aDynECB[GCM_DS_MAXDYNECB];
extern uint32 g_nDynECBIdx;
extern uint8 gPackData[][GCM_DS_MAXDATAPERDRAWCALL];
//--------------------------------------------------------------------------------------------------
// Structs used as params
//--------------------------------------------------------------------------------------------------
struct DrawScissor_t
{
uint16 x, y, w, h;
};
struct UpdateSurface_t
{
// if the scissor is logically disabled, set scissor to this size
//uint16 m_nRenderTargetWidth, m_nRenderTargetHeight;
CPs3gcmTextureData_t m_texC, m_texZ;
};
struct FpHeader_t
{
uint32 m_nUcodeSize;
uint32 m_nPatchCount;
uint32 m_nShaderControl0;
uint32 m_nTexControls; // Always <= 16; 1 tex control corresponds to 2 words in the tex control table
// data[]
// Allocate memory layout as :
// FpHeader_t
// uCode
// Patches
// Texcontrols
// total size = AlignValue( sizeof( FpHeader_t ) + m_nUcodeSize + (sizeof( uint32 ) * nPatchCount)
// + (2 * sizeof( uint32 ) * nTexControls) , 16);
};
//--------------------------------------------------------------------------------------------------
// Vertex streams
//--------------------------------------------------------------------------------------------------
struct SetVertexDataArrayCache_t
{
union Data_t
{
vector signed int m_vi;
struct Unpacked_t
{
uint32 m_uiLocalMemoryBuffer; // after adding the offset
uint32 m_nSize;
uint32 m_nStride;
uint32 m_nType;
//IDirect3DVertexBuffer9 *m_vtxBuffer; // for debug only
//uint32 m_nBaseVertexOffset; // debug only
} m_unpacked;
} m_data;
SetVertexDataArrayCache_t(){}
SetVertexDataArrayCache_t( D3DStreamDesc &dsd, D3DVERTEXELEMENT9_GCM::GcmDecl_t const &gcmvad, uint nBaseVertexIndex )
{
//m_vtxBuffer = dsd.m_vtxBuffer;
uint nBaseVertexOffset = dsd.m_offset + ( nBaseVertexIndex * dsd.m_stride ) + gcmvad.m_offset;
uint uiLocalMemoryBuffer = dsd.m_nLocalBufferOffset + nBaseVertexOffset;
m_data.m_vi = ( vector signed int ) { uiLocalMemoryBuffer, gcmvad.m_datasize, gcmvad.m_datatype, dsd.m_stride };
// m_stride = dsd.m_stride;
// m_size = gcmvad.m_datasize;
// m_type = gcmvad.m_datatype;
}
uint GetLocalOffset()const { return m_data.m_unpacked.m_uiLocalMemoryBuffer; }
bool IsNull()const { return vec_all_eq( m_data.m_vi, (vector signed int){0,0,0,0} ); }
void SetNull(){ m_data.m_vi = ( vector signed int ){0,0,0,0}; }
void Invalidate(){ m_data.m_vi = (vector signed int){-1,-1,-1,-1};}
bool operator != ( const SetVertexDataArrayCache_t& that ) const { return !vec_all_eq( m_data.m_vi, that.m_data.m_vi ); }
void operator = ( const SetVertexDataArrayCache_t& that ) { m_data.m_vi = that.m_data.m_vi ; }
};
// This is global, since it is only written by the flush code
extern SetVertexDataArrayCache_t g_cacheSetVertexDataArray[ D3D_MAX_STREAMS ]; // Vertex stream setup
//--------------------------------------------------------------------------------------------------
// SPU draw commands
//--------------------------------------------------------------------------------------------------
enum DrawCmd
{
CmdCommitStates = 1,
CmdDrawPrim,
CmdDrawPrimUP,
CmdEndFrame
};
//--------------------------------------------------------------------------------------------------
// GcmDrawState.. Holds data that is commited once a draw, clear etc... is made..
//--------------------------------------------------------------------------------------------------
#define DRAWSTATE_SIZEOFDMA (uintp(&(((CGcmDrawState*)(0))->m_pData)+1)-uintp(&(((CGcmDrawState*)(0))->m_cmd)))
struct CGcmDrawState
{
// DrawData used by DrawPrimUP
struct DrawData { uint8 m_type; uint8 m_idx; uint16 m_size; /*uint8 m_data[m_count];*/ };
//--------------------------------------------------------------------------------------------------
// Enums
//--------------------------------------------------------------------------------------------------
// Data that gets packes and then unpacked as a cmd stream
enum GcmDataType
{
kDataFpuConsts = 1,
kDataVpuConsts,
kDataStreamDesc,
kDataZcullStats,
kDataZcullLimit,
kDataViewport,
kDataSetRenderState,
kDataSetZpassPixelCountEnable,
kDataSetClearReport,
kDataSetReport,
kDataUpdateSurface,
kDataClearSurface,
kDataResetSurface,
kDataTransferImage,
kDataViewPort,
kDataScissor,
kDataTexture,
kDataEcbTexture,
kDataResetTexture,
kDataUpdateVtxBufferOffset,
kDataECB,
kDataBeginScene,
kDataSetWorldSpaceCameraPosition,
kDataSetWriteBackEndLabel
};
// RenderStates
enum GcmDirtyStateFlags_t
{
kDirtyBlendFactor = ( 1 << 0 ),
kDirtyAlphaFunc = ( 1 << 1 ),
kDirtyStencilOp = ( 1 << 2 ),
kDirtyStencilFunc = ( 1 << 3 ),
kDirtyDepthBias = ( 1 << 4 ),
kDirtyScissor = ( 1 << 5 ),
kDirtyDepthMask = ( 1 << 6 ),
kDirtyZEnable = ( 1 << 7 ),
kDirtyZFunc = ( 1 << 8 ),
kDirtyColorWriteEnable = ( 1 << 9 ),
kDirtyCullMode = ( 1 << 10 ),
kDirtyAlphablendEnable = ( 1 << 11 ),
kDirtySrgbWriteEnable = ( 1 << 12 ),
kDirtyAlphaTestEnable = ( 1 << 13 ),
kDirtyStencilEnable = ( 1 << 14 ),
kDirtyStencilWriteMask = ( 1 << 15 ),
kDirtyFillMode = ( 1 << 16 ),
kDirtyBlendOp = ( 1 << 17 ),
kDirtyResetRsx = ( 1 << 18 ),
kDirtyZeroAllPSConsts = ( 1 << 19 ),
kDirtyZeroAllVSConsts = ( 1 << 20)
};
// Dirty flags for caches and other misc settings
enum GcmDirtyCacheFlags_t
{
kDirtyVxConstants = ( 1 << 0 ),
kDirtyClipPlanes = ( 1 << 1 ),
kDirtyVxShader = ( 1 << 2 ),
kDirtyPxShader = ( 1 << 3 ),
kDirtyPxConstants = ( 1 << 4 ),
kDirtyVxCache = ( 1 << 5 ),
kDirtyTxCache = ( 1 << 6 )
};
//--------------------------------------------------------------------------------------------------
// Data we are interested in per draw call
//--------------------------------------------------------------------------------------------------
// Data that is DMA'd to the SPU directly and not packed
uint32 m_cmd;
uint32 m_param[8];
uint32 m_eaOutputFIFO;
uint32 m_eaOutputUCode;
uint32 m_nFreeLabel; // Nonzero values are set
uint16 m_nBackBufferSize[2];
uint16 m_dirtySamplersMask; // Sampler dirty flags
uint16 m_dirtyCachesMask; // Caches reset for Shaders flush
uint32 m_dirtyStatesMask; // Render state dirty flags
uint32 m_shaderVxConstants; // Booleans, go into a SetTransformbranchbits call
PixelShader9Data_t* m_pPixelShaderData;
VertexShader9Data_t* m_pVertexShaderData;
uint32 m_nNumECB;
uint8* m_aECB[3]; // No More than three per draw call (static, semi-static & dynamic)
uint32 m_aSizeECB[3];
struct FixedData
{
uint32 m_nSampler;
uint8 m_aSamplerIdx[D3D_MAX_SAMPLERS];
D3DSamplerDesc m_aSamplers[D3D_MAX_SAMPLERS];
uint32 m_nInstanced;
CPs3BindTexture_t m_instanceEnvCubemap;
CPs3BindTexture_t m_instanceLightmap;
CPs3BindTexture_t m_instancePaintmap;
};
// Unpack pointer and cursors
FixedData* m_pFixed; // Fixed sized data uploaded per call
uint8* m_pDataCursor;
uint8* m_pData;
// Fixed Data that is unpacked
D3DSamplerDesc m_aSamplers[D3D_MAX_SAMPLERS];
// Data that is unpacked, or derived, or code generated somewhere (Init etc...)
CPs3BindTexture_t m_aBindTexture[CBCMD_MAX_PS3TEX]; // Textures that are set from ECBs
float m_vecWorldSpaceCameraPosition[4];
uint32 m_nSetTransformBranchBits; // here for now because they init in begin scene
uint32 m_nDisabledSamplers;
uint16 m_blends[2];
struct { uint32 func, ref; } m_alphaFunc;
struct { uint32 fail, dfail, dpass; } m_stencilOp;
struct { uint32 func, ref, mask; } m_stencilFunc;
struct { uint32 factor, units; } m_depthBias;
struct { uint16 x, y, w, h, enabled; } m_scissor; // kDirtyScissor
uint16 m_nSetDepthMask;
uint32 m_ZEnable;
uint32 m_ZFunc;
uint32 m_ColorWriteEnable;
uint32 m_CullMode;
uint32 m_AlphablendEnable;
uint32 m_SrgbWriteEnable;
uint32 m_AlphaTestEnable;
uint32 m_StencilEnable;
uint32 m_StencilWriteMask;
uint32 m_FillMode;
uint32 m_BlendOp;
uint32 m_userClipPlanesState;
CPs3gcmTextureData_t m_textures[D3D_MAX_TEXTURES];
float m_viewZ[2];
uint16 m_viewportSize[4];
//--------------------------------------------------------------------------------------------------
// Methods
//--------------------------------------------------------------------------------------------------
public:
// Init etc.. (ppu functions...)
#ifndef SPU
inline void Init(IDirect3DDevice9Params *params);
#endif
inline void Init();
void SendToSpu();
inline void Reset(); // Reset for re-use
#ifndef SPU
inline void BeginScene(); // Sets report for Zcull
inline void EndScene(); // Gets report for Zcull
inline void CmdBufferFlush(); // Flush RSX via SPU
inline void CmdBufferFinish(); // Flush RSX and wait for it
#endif
inline void ResetRsxState(); // Lots of GCM_FUNC to default vals
// Dynamic ECB mgmt
inline uint8* OpenDynECB();
inline void CloseDynECB(uint32 size);
// Viewport and scissor
inline void UnpackSetViewport(CONST D3DVIEWPORT9* pViewport);
inline HRESULT SetViewport(CONST D3DVIEWPORT9* pViewport);
inline void UnpackSetScissorRect(DrawScissor_t * pScissor);
inline void SetScissorRect( DrawScissor_t * pScissor );
// Reports, Zpass and labels (all packed)
inline void SetZpassPixelCountEnable(uint32 enable);
inline void SetClearReport(uint32 type);
inline void SetReport(uint32 type, uint32 index);
inline void SetWriteBackEndLabel(uint8 index, uint32 value);
// RenderStates
inline void UnpackSetRenderState( D3DRENDERSTATETYPE State, uint Value );
inline void SetRenderState( D3DRENDERSTATETYPE State, uint Value );
// Texture samplers, textures, texture cache
inline void SetInvalidateTextureCache();
inline void SetSamplerState( uint Sampler,D3DSAMPLERSTATETYPE Type,DWORD Value );
inline void UnpackSetTexture( DWORD Stage, uint32 offset, uint32 eaLayout );
inline void UnpackResetTexture( DWORD Stage );
inline void SetTexture( DWORD Stage, CPs3gcmTexture *tex );
inline void ResetTexture( DWORD Stage );
// Vertex buffers, vertex cache, , vertex constants
inline void SetInvalidateVertexCache();
inline void UnpackUpdateVtxBufferOffset( IDirect3DVertexBuffer9 * vtxBuffer, uint nLocalBufferOffset );
inline void UpdateVtxBufferOffset( IDirect3DVertexBuffer9 * vtxBuffer, uint nLocalBufferOffset );
inline void SetVertexStreamSource(uint nStreamIndex, IDirect3DVertexBuffer9* pStreamData,UINT OffsetInBytes,UINT Stride );
inline void _SetVertexShaderConstantB( UINT StartRegister, uint BoolCount, uint shaderVxConstants );
inline void SetVertexShaderConstantB( UINT StartRegister,CONST BOOL* pConstantData,UINT BoolCount) ;
inline void SetVertexShaderConstantF( UINT StartRegister, void* pUnalignedConstantData, UINT Vector4fCount );
// inline void VertexConstantExtractor( float *pDestStorage, int kRegisterFirst, int kRegisterLength,
// int StartRegister, const float *pConstantData, int Vector4fCount );
// Pixel shader consts
inline void SetPixelShaderConstantF(uint32 StartRegister, float* pConstantData, uint32 Vector4fCount);
inline void UnpackSetWorldSpaceCameraPosition(float* pWCP);
inline void SetWorldSpaceCameraPosition(float* pWCP);
// Surfaces and render targets
inline void Ps3Helper_UpdateSurface( UpdateSurface_t * pSurface );
inline void UnpackUpdateSurface(CellGcmSurface* pSf);
inline void ResetSurfaceToKnownDefaultState();
inline void UnpackResetSurfaceToKnownDefaultState();
inline void Helper_IntersectRectsXYWH( uint16 const *a, uint16 const *b, uint16 *result );
inline void ClearSurface( DWORD nFlags, D3DCOLOR nColor, float flZ, uint32 nStencil,
uint32 nDepthStencilBitDepth );
inline void UnpackClearSurface( DWORD nFlags, D3DCOLOR nColor, float flZ, uint32 nStencil,
uint32 nDepthStencilBitDepth );
// Blit (packed)
inline void SetTransferImage(uint8 mode, uint32 dstOffset, uint32 dstPitch, uint32 dstX, uint32 dstY, uint32 srcOffset,
uint32 srcPitch, uint32 srcX, uint32 srcY, uint32 width, uint32 height, uint32 bytesPerPixel );
// DrawPrim
inline void DrawPrimitiveUP( IDirect3DVertexDeclaration9 * pDecl, D3DPRIMITIVETYPE nPrimitiveType,UINT nPrimitiveCount,
CONST void *pVertexStreamZeroData, UINT nVertexStreamZeroStride );
inline void DrawIndexedPrimitive( uint32 offset, IDirect3DVertexDeclaration9 * pDecl, D3DPRIMITIVETYPE Type,INT BaseVertexIndex,UINT MinVertexIndex,
UINT NumVertices,UINT startIndex,UINT nDrawPrimCount );
inline void ExecuteCommandBuffer( uint8 *pCmdBuf );
inline void UnpackExecuteCommandBuffer( uint8 *pCmdBuf );
void TestCommandBuffer( uint8 *pCmdBuf );
inline void TextureReplace(uint32 id, CPs3BindTexture_t tex);
// Commit, pack etc..
inline void PackData(uint8 type, uint8 idx, uint16 size, void* pSrc);
inline void PackData(uint8 type, uint16 size, void* pSrc);
inline void PackData(uint8 type, uint32 val1, uint32 val2, uint32 val3);
inline void PackData(uint8 type, uint32 val1, uint32 val2);
inline void PackData(uint8 type, uint32 val1);
inline void PackData(uint8 type);
inline void PackData(uint8 type, DWORD nFlags, D3DCOLOR nColor, float flZ, uint32 nStencil,
uint32 nDepthStencilBitDepth ); // used to pack clear surface
inline void UnpackData(); // Unpacks variable sized data and sets vertex consts
inline void CommitStates(); // Currently unused on PPU
inline void EndFrame(); // called by Flip()
inline void CommitAll(IDirect3DVertexDeclaration9 * pDecl, uint32 baseVertexIndex);
inline void CommitRenderStates();
inline void CommitVertexBindings(IDirect3DVertexDeclaration9 * pDecl, uint32 baseVertexIndex);
inline void CommitSampler(uint32 nSampler);
inline void CommitSamplers();
inline void CommitShaders();
inline void BindFragmentProgram(uint32 nVertexToFragmentProgramAttributeMask);
inline void PatchUcode(fltx4 * pUCode16, uint32 * pPatchTable, uint nPatchCount);
inline fltx4* CopyUcode(FpHeader_t* pFp);
#ifndef SPU
inline void AllocateUcode(FpHeader_t* pFp); // Reserves space in the patchbuffer for this
#endif
// ExecuteCommandBuffer Subs
inline void SetVertexShaderConstantInternal( int var, float const* pVec, int numVecs = 1, bool bForce = false );
inline void SetPixelShaderConstantInternal( int var, float const* pValues, int nNumConsts = 1, bool bForce = false );
inline void BindTexture2( CPs3BindTexture_t bindTex );
// Misc
inline int IsLayerRender() { return 1;} // 7LTODO : zprepass !
};
//--------------------------------------------------------------------------------------------------
// Externs
//--------------------------------------------------------------------------------------------------
extern CGcmDrawState* gpGcmDrawState;
extern CGcmDrawState gGcmDrawState[];
extern CGcmDrawState::FixedData gFixedData[];
//--------------------------------------------------------------------------------------------------
// inlines
//--------------------------------------------------------------------------------------------------
//--------------------------------------------------------------------------------------------------
// Generic pack data
//--------------------------------------------------------------------------------------------------
inline void CGcmDrawState::PackData(uint8 type, uint8 idx, uint16 size, void* pSrc)
{
// SNPROF("CGcmDrawState::PackData(uint8 type, uint8 idx, uint16 size, void* pSrc)");
uint32 spacereqd = size + sizeof(DrawData);
#ifdef GCM_DS_SAFE
uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData ));
if(spacereqd > spaceleft) Error("Out of per draw call data\n");
#endif
DrawData* pData = (DrawData*) m_pDataCursor;
pData->m_type = type;
pData->m_idx = idx;
pData->m_size = size;
V_memcpy(pData+1, pSrc, size);
m_pDataCursor += spacereqd;
}
inline void CGcmDrawState::PackData(uint8 type, uint16 size, void* pSrc)
{
// SNPROF("CGcmDrawState::PackData(uint8 type, uint16 size, void* pSrc)");
uint32 spacereqd = size + sizeof(DrawData);
#ifdef GCM_DS_SAFE
uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData ));
if(spacereqd > spaceleft) Error("Out of per draw call data\n");
#endif
DrawData* pData = (DrawData*) m_pDataCursor;
pData->m_type = type;
pData->m_idx = 0;
pData->m_size = size;
V_memcpy(pData+1, pSrc, size);
m_pDataCursor += spacereqd;
}
inline void CGcmDrawState::PackData(uint8 type, uint32 val1, uint32 val2, uint32 val3)
{
// SNPROF("CGcmDrawState::PackData(uint8 type, uint32 val1, uint32 val2, uint32 val3)");
const uint32 size = 12;
uint32 spacereqd = size + sizeof(DrawData);
#ifdef GCM_DS_SAFE
uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData ));
if(spacereqd > spaceleft) Error("Out of per draw call data\n");
#endif
DrawData* pData = (DrawData*) m_pDataCursor;
pData->m_type = type;
pData->m_idx = 0;
pData->m_size = size;
uint32* pDest = (uint32*)(pData + 1);
pDest[0] = val1;
pDest[1] = val2;
pDest[2] = val3;
m_pDataCursor += spacereqd;
}
inline void CGcmDrawState::PackData(uint8 type, uint32 val1, uint32 val2)
{
// SNPROF("CGcmDrawState::PackData(uint8 type, uint32 val1, uint32 val2)");
const uint32 size = 8;
uint32 spacereqd = size + sizeof(DrawData);
#ifdef GCM_DS_SAFE
uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData ));
if(spacereqd > spaceleft) Error("Out of per draw call data\n");
#endif
DrawData* pData = (DrawData*) m_pDataCursor;
pData->m_type = type;
pData->m_idx = 0;
pData->m_size = size;
uint32* pDest = (uint32*)(pData + 1);
pDest[0] = val1;
pDest[1] = val2;
m_pDataCursor += spacereqd;
}
inline void CGcmDrawState::PackData(uint8 type, uint32 val1)
{
// SNPROF("CGcmDrawState::PackData(uint8 type, uint32 val1)");
const uint32 size = 4;
uint32 spacereqd = size + sizeof(DrawData);
#ifdef GCM_DS_SAFE
uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData ));
if(spacereqd > spaceleft) Error("Out of per draw call data\n");
#endif
DrawData* pData = (DrawData*) m_pDataCursor;
pData->m_type = type;
pData->m_idx = 0;
pData->m_size = size;
uint32* pDest = (uint32*)(pData + 1);
pDest[0] = val1;
m_pDataCursor += spacereqd;
}
inline void CGcmDrawState::PackData(uint8 type)
{
// SNPROF("CGcmDrawState::PackData(uint8 type)");
const uint32 size = 0;
uint32 spacereqd = size + sizeof(DrawData);
#ifdef GCM_DS_SAFE
uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData ));
if(spacereqd > spaceleft) Error("Out of per draw call data\n");
#endif
DrawData* pData = (DrawData*) m_pDataCursor;
pData->m_type = type;
pData->m_idx = 0;
pData->m_size = size;
m_pDataCursor += spacereqd;
}
inline void CGcmDrawState::PackData(uint8 type, DWORD nFlags, D3DCOLOR nColor, float flZ, uint32 nStencil, uint32 nDepthStencilBitDepth ) // used to pack clear surface
{
// SNPROF("CGcmDrawState::PackData(uint8 type, DWORD nFlags, D3DCOLOR nColor, float flZ, uint32 nStencil, uint32 nDepthStencilBitDepth )");
const uint32 size = 20;
uint32 spacereqd = size + sizeof(DrawData);
#ifdef GCM_DS_SAFE
uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData ));
if(spacereqd > spaceleft) Error("Out of per draw call data\n");
#endif
DrawData* pData = (DrawData*) m_pDataCursor;
pData->m_type = type;
pData->m_idx = 0;
pData->m_size = size;
uint32* pDest = (uint32*)(pData + 1);
float* pDestf = (float*) pDest;
pDest[0] = nFlags;
pDest[1] = nColor;
pDestf[2] = flZ;
pDest[3] = nStencil;
pDest[4] = nDepthStencilBitDepth;
m_pDataCursor += spacereqd;
}
//--------------------------------------------------------------------------------------------------
// Init, Begin/EndScene. Flush and Finish, ResetRsxState
//--------------------------------------------------------------------------------------------------
inline void CGcmDrawState::Init()
{
// Initialize GCM state to defaults
memset(this, 0, sizeof(CGcmDrawState));
m_scissor.enabled = 1;
m_viewZ[0] = 0.1;
m_viewZ[1] = 1000.0f;
m_blends[0] = CELL_GCM_ONE;
m_blends[1] = CELL_GCM_ZERO;
m_alphaFunc.func = CELL_GCM_ALWAYS;
m_alphaFunc.ref = 0;
m_stencilOp.fail = CELL_GCM_KEEP;
m_stencilOp.dfail = CELL_GCM_KEEP;
m_stencilOp.dpass = CELL_GCM_KEEP;
m_stencilFunc.func = CELL_GCM_ALWAYS;
m_stencilFunc.ref = 0;
m_stencilFunc.mask = 0xFF;
m_depthBias.factor = 0;
m_depthBias.units = 0;
m_userClipPlanesState = 0;
m_shaderVxConstants = 0;
// Init fixed sized data
m_pFixed = &gFixedData[0];
memset(m_pFixed->m_aSamplerIdx, 0xff, sizeof(m_pFixed->m_aSamplerIdx));
m_pFixed->m_nSampler = 0;
m_pFixed->m_nInstanced = 0;
// Init variable sized data....
m_pData = gPackData[0];
m_pDataCursor = m_pData;
}
#ifndef SPU
inline void CGcmDrawState::Init(IDirect3DDevice9Params *params)
{
for (int lp = 0; lp < GCM_DRAWSTATE_MAX; lp++)
{
CGcmDrawState *pGcmDrawState = &gGcmDrawState[lp];
pGcmDrawState->Init();
m_nBackBufferSize[0] = params->m_presentationParameters.BackBufferWidth;
m_nBackBufferSize[1] = params->m_presentationParameters.BackBufferHeight;
pGcmDrawState->m_pData = gPackData[lp];
pGcmDrawState->m_pFixed = &gFixedData[lp];
DrawScissor_t temp;
temp.x = 0;
temp.y = 0;
temp.w = m_nBackBufferSize[0];
temp.h = m_nBackBufferSize[1];
SetScissorRect(&temp);
}
}
#endif
#ifndef SPU
inline void CGcmDrawState::BeginScene()
{
// redundant: will lead to redundant disabling of all samplers at the beginning of the frame, even though they're disabled anyway after flip
PackData(kDataBeginScene);
SetRenderState(D3DRS_ZWRITEENABLE, 1); // CELL_GCM_TRUE
if ( g_bZcullAuto )
{
PackData(kDataZcullStats);
}
PackData(kDataZcullLimit, g_nZcullMoveForward, g_nZcullPushBack);
g_UPFrame = 0;
}
inline void CGcmDrawState::EndScene()
{
int nZcullDefault = g_nZcullDefault;
// Update zcull settings based on metrics
if ( g_bZcullAuto )
{
int nMaxSlope = cellGcmGetReport( CELL_GCM_ZCULL_STATS, GCM_REPORT_ZCULL_STATS_0 );
int nSumSlope = cellGcmGetReport( CELL_GCM_ZCULL_STATS1, GCM_REPORT_ZCULL_STATS_1 );
int nNumTiles, nAvgSlope;
nNumTiles = nMaxSlope & 0xffff;
nMaxSlope = ( nMaxSlope & 0xFFFF0000 ) >> 16;
nAvgSlope = nNumTiles ? nSumSlope / nNumTiles : 0;
g_nZcullMoveForward = ( nAvgSlope + nMaxSlope ) / 2;
g_nZcullPushBack = g_nZcullMoveForward / 2;
if ( g_nZcullMoveForward < 1 || g_nZcullPushBack < 1 )
{
// pick reasonable defaults in the failure case
g_nZcullMoveForward = nZcullDefault;
g_nZcullPushBack = nZcullDefault;
}
}
else
{
g_nZcullMoveForward = nZcullDefault;
g_nZcullPushBack = nZcullDefault;
}
// Msg("DrawPrimUP Frame %d\n", g_UPFrame);
}
inline void CGcmDrawState::CmdBufferFlush()
{
CellGcmControl volatile *control = cellGcmGetControlRegister();
// Out-of-order write protection.
// this needs to be sync, not eieio as command buffer is on main memory(which is cached)
// but control registers are mapped as cache inhibited, eieio doesn't gurantee order
// between cached and cache inhibited region
#ifdef __SNC__
__builtin_sync();
#else
__asm__ volatile("sync");
#endif // __SNC__
uint32_t offsetInBytes = (uint32)gpGcmContext->current - (uint32)g_ps3gcmGlobalState.m_pIoAddress;
control->put = offsetInBytes;
}
inline void CGcmDrawState::CmdBufferFinish()
{
uint32 ref = g_ps3gcmGlobalState.m_finishIdx;
GCM_FUNC(cellGcmSetReferenceCommand, ref);
g_ps3gcmGlobalState.m_finishIdx ^=1;
CmdBufferFlush();
CellGcmControl volatile *control = cellGcmGetControlRegister();
while( control->ref != ref )
{
// Don't be a ppu hog ;)
sys_timer_usleep(30);
}
}
#endif
//--------------------------------------------------------------------------------------------------
// Dynamic ECB management
//--------------------------------------------------------------------------------------------------
inline uint8* CGcmDrawState::OpenDynECB()
{
return &g_aDynECB[g_nDynECBIdx];
}
inline void CGcmDrawState::CloseDynECB(uint32 size)
{
g_nDynECBIdx += AlignValue(size,16);
// If we don't have 8K left then wrap
if (g_nDynECBIdx > (GCM_DS_MAXDYNECB - 0x2000))
g_nDynECBIdx = 0;
}
//--------------------------------------------------------------------------------------------------
// Resets RSX to default state
//--------------------------------------------------------------------------------------------------
inline void UnpackResetRsxState()
{
GCM_FUNC( cellGcmSetAlphaFunc, CELL_GCM_ALWAYS, 0);
GCM_FUNC( cellGcmSetAlphaTestEnable, CELL_GCM_FALSE);
GCM_FUNC( cellGcmSetBackStencilFunc, CELL_GCM_ALWAYS, 0, 0xff);
GCM_FUNC( cellGcmSetBackStencilMask, 0xff);
GCM_FUNC( cellGcmSetBackStencilOp, CELL_GCM_KEEP, CELL_GCM_KEEP, CELL_GCM_KEEP);
GCM_FUNC( cellGcmSetBlendColor, 0, 0);
GCM_FUNC( cellGcmSetBlendEnable, CELL_GCM_FALSE);
GCM_FUNC( cellGcmSetBlendEnableMrt, CELL_GCM_FALSE, CELL_GCM_FALSE, CELL_GCM_FALSE);
GCM_FUNC( cellGcmSetBlendEquation, CELL_GCM_FUNC_ADD, CELL_GCM_FUNC_ADD);
GCM_FUNC( cellGcmSetBlendFunc, CELL_GCM_ONE, CELL_GCM_ZERO, CELL_GCM_ONE, CELL_GCM_ZERO);
// GCM_FUNC( cellGcmSetClearDepthStencil, 0xffffff00);
// GCM_FUNC( cellGcmSetClearSurface, 0);
GCM_FUNC( cellGcmSetColorMask, CELL_GCM_COLOR_MASK_A|CELL_GCM_COLOR_MASK_R|CELL_GCM_COLOR_MASK_G|CELL_GCM_COLOR_MASK_B);
GCM_FUNC( cellGcmSetCullFaceEnable, CELL_GCM_FALSE);
GCM_FUNC( cellGcmSetCullFace, CELL_GCM_BACK);
GCM_FUNC( cellGcmSetDepthBounds, 0.0f, 1.0f);
GCM_FUNC( cellGcmSetDepthBoundsTestEnable, CELL_GCM_FALSE);
GCM_FUNC( cellGcmSetDepthFunc, CELL_GCM_LESS);
GCM_FUNC( cellGcmSetDepthMask, CELL_GCM_TRUE);
GCM_FUNC( cellGcmSetDepthTestEnable, CELL_GCM_FALSE);
GCM_FUNC( cellGcmSetDitherEnable, CELL_GCM_TRUE);
GCM_FUNC( cellGcmSetFragmentProgramGammaEnable, CELL_GCM_FALSE);
GCM_FUNC( cellGcmSetFrequencyDividerOperation, 0);
GCM_FUNC( cellGcmSetFrontFace, CELL_GCM_CCW);
GCM_FUNC( cellGcmSetLineWidth, 8); // fixed point [0:6:3]
GCM_FUNC( cellGcmSetLogicOpEnable, CELL_GCM_FALSE);
GCM_FUNC( cellGcmSetLogicOp, CELL_GCM_COPY);
// GCM_FUNC( cellGcmSetNotifyIndex, -=something invalid=- ); // initial value is an invalid system reserved area
GCM_FUNC( cellGcmSetPointSize, 1.0f);
GCM_FUNC( cellGcmSetPolygonOffsetFillEnable, CELL_GCM_FALSE);
GCM_FUNC( cellGcmSetPolygonOffset, 0.0f, 0.0f);
GCM_FUNC( cellGcmSetRestartIndexEnable, CELL_GCM_FALSE);
GCM_FUNC( cellGcmSetRestartIndex, 0xffffffff);
GCM_FUNC( cellGcmSetScissor, 0,0,4096,4096);
GCM_FUNC( cellGcmSetShadeMode, CELL_GCM_SMOOTH);
GCM_FUNC( cellGcmSetStencilFunc, CELL_GCM_ALWAYS, 0, 0xff);
GCM_FUNC( cellGcmSetStencilMask, 0xff);
GCM_FUNC( cellGcmSetStencilOp, CELL_GCM_KEEP, CELL_GCM_KEEP, CELL_GCM_KEEP);
GCM_FUNC( cellGcmSetStencilTestEnable, CELL_GCM_FALSE);
for( uint nTextureSampler = 0; nTextureSampler < 16; ++nTextureSampler )
{
GCM_FUNC( cellGcmSetTextureAddress, nTextureSampler, CELL_GCM_TEXTURE_WRAP, CELL_GCM_TEXTURE_WRAP,
CELL_GCM_TEXTURE_CLAMP_TO_EDGE, CELL_GCM_TEXTURE_UNSIGNED_REMAP_NORMAL,
CELL_GCM_TEXTURE_ZFUNC_NEVER, 0);
GCM_FUNC( cellGcmSetTextureBorderColor, nTextureSampler, 0);
GCM_FUNC( cellGcmSetTextureControl, nTextureSampler, CELL_GCM_FALSE, 0, 12<<8, CELL_GCM_TEXTURE_MAX_ANISO_1);
GCM_FUNC( cellGcmSetTextureFilter, nTextureSampler, 0, CELL_GCM_TEXTURE_NEAREST_LINEAR,
CELL_GCM_TEXTURE_LINEAR, CELL_GCM_TEXTURE_CONVOLUTION_QUINCUNX);
}
for( uint nVertexAttribute = 0; nVertexAttribute < 16; ++nVertexAttribute )
{
GCM_FUNC( cellGcmSetVertexDataArray, nVertexAttribute, 0, 0, 0, CELL_GCM_VERTEX_F, CELL_GCM_LOCATION_LOCAL, 0);
}
GCM_FUNC( cellGcmSetTwoSidedStencilTestEnable, CELL_GCM_FALSE);
float scale[4] = {2048.0f, 2048.0f, 0.5f, 0.0f};
float offset[4] = {2048.0f, 2048.0f, 0.5f, 0.0f};
GCM_FUNC( cellGcmSetViewport, 0, 0, 4096, 4096, 0.0f, 1.0f, scale, offset);
GCM_FUNC( cellGcmSetZcullStatsEnable, CELL_GCM_FALSE);
GCM_FUNC( cellGcmSetAntiAliasingControl, CELL_GCM_FALSE, CELL_GCM_FALSE, CELL_GCM_FALSE, 0xffff);
GCM_FUNC( cellGcmSetBackPolygonMode, CELL_GCM_POLYGON_MODE_FILL);
GCM_FUNC( cellGcmSetClearColor, 0);
GCM_FUNC( cellGcmSetColorMaskMrt, 0);
GCM_FUNC( cellGcmSetFrontPolygonMode, CELL_GCM_POLYGON_MODE_FILL);
GCM_FUNC( cellGcmSetLineSmoothEnable, CELL_GCM_FALSE);
GCM_FUNC( cellGcmSetLineStippleEnable, CELL_GCM_FALSE);
GCM_FUNC( cellGcmSetPointSpriteControl, CELL_GCM_FALSE, 0, 0);
GCM_FUNC( cellGcmSetPolySmoothEnable, CELL_GCM_FALSE);
GCM_FUNC( cellGcmSetPolygonStippleEnable, CELL_GCM_FALSE);
GCM_FUNC( cellGcmSetRenderEnable, CELL_GCM_TRUE, 0);
GCM_FUNC( cellGcmSetUserClipPlaneControl, CELL_GCM_FALSE,CELL_GCM_FALSE,CELL_GCM_FALSE,CELL_GCM_FALSE,CELL_GCM_FALSE,CELL_GCM_FALSE);
GCM_FUNC( cellGcmSetVertexAttribInputMask, 0xffff);
GCM_FUNC( cellGcmSetZpassPixelCountEnable, CELL_GCM_FALSE);
for( uint i = 0; i < 4 ; ++i )
{
GCM_FUNC( cellGcmSetVertexTextureAddress, i, CELL_GCM_TEXTURE_WRAP, CELL_GCM_TEXTURE_WRAP);
GCM_FUNC( cellGcmSetVertexTextureBorderColor, i, 0);
GCM_FUNC( cellGcmSetVertexTextureControl, i, CELL_GCM_FALSE, 0, 12<<8);
GCM_FUNC( cellGcmSetVertexTextureFilter, i, 0);
}
GCM_FUNC( cellGcmSetTransformBranchBits, CELL_GCM_FALSE);
GCM_FUNC( cellGcmSetTwoSideLightEnable, CELL_GCM_FALSE);
GCM_FUNC( cellGcmSetZMinMaxControl, CELL_GCM_TRUE, CELL_GCM_FALSE, CELL_GCM_FALSE);
// GCM_FUNC( cellGcmSetTextureOptimization, 1<<3); --<sergiy>-- who cares? this won't compile the way it's described in documentation.
// GCM_FUNC( cellGcmSetCylindricalWrap, CELL_GCM_FALSE); --<sergiy>-- who cares? this won't compile the way it's described in documentation.
GCM_FUNC( cellGcmSetTwoSideLightEnable, CELL_GCM_FALSE);
GCM_FUNC( cellGcmSetTransformBranchBits, 0);
GCM_FUNC( cellGcmSetVertexDataBase, 0,0);
// --<sergiy>-- I don't wanna set the surface to the default surface that we never use, as it generates unneeded stall in RSX
/*
CellGcmSurface surface = {
CELL_GCM_SURFACE_PITCH, // type
CELL_GCM_SURFACE_CENTER_1, // antialias
CELL_GCM_SURFACE_X1R5G5B5_Z1R5G5B5,// colorFormat
CELL_GCM_SURFACE_TARGET_0, // colorTarget
{0, 0, 0, 0}, // colorLocation
{0, 0, 0, 0}, // colorOffset
{64, 64, 64, 64}, // colorPitch
CELL_GCM_SURFACE_Z16, // depthFormat
CELL_GCM_LOCATION_LOCAL, // depthLocation
{0,0}, // __padding
0, // depthOffset
64, // depthPitch
1,1, // width,height
0,0 // x,y
};
GCM_FUNC( cellGcmSetSurface, &surface);
*/
// After ^this, the cached vertex array data is worthless....
for( uint i = 0; i < D3D_MAX_STREAMS; ++i )
g_cacheSetVertexDataArray[i].SetNull();
}
inline void CGcmDrawState::ResetRsxState()
{
m_dirtyStatesMask |= kDirtyResetRsx;
}
//--------------------------------------------------------------------------------------------------
// Viewport and scissor
//--------------------------------------------------------------------------------------------------
inline void CGcmDrawState::UnpackSetViewport(CONST D3DVIEWPORT9* pViewport)
{
m_viewZ[0] = pViewport->MinZ;
m_viewZ[1] = pViewport->MaxZ;
m_viewportSize[0] = pViewport->X;
m_viewportSize[1] = pViewport->Y;
m_viewportSize[2] = pViewport->Width;
m_viewportSize[3] = pViewport->Height;
float viewScale[4] = { m_viewportSize[2]/2, m_viewportSize[3]/2,
( m_viewZ[1] - m_viewZ[0] ) / 2.0f, 0.0f };
float viewOffset[4] = { m_viewportSize[0] + m_viewportSize[2]/2, m_viewportSize[1] + m_viewportSize[3]/2,
( m_viewZ[1] + m_viewZ[0] ) / 2.0f, 0.0f };
GCM_FUNC ( cellGcmSetViewport, m_viewportSize[0], m_viewportSize[1],
m_viewportSize[2], m_viewportSize[3],
m_viewZ[0], m_viewZ[1],
viewScale, viewOffset );
}
inline HRESULT CGcmDrawState::SetViewport(CONST D3DVIEWPORT9* pViewport)
{
PackData(kDataViewport, sizeof(D3DVIEWPORT9), (void*)pViewport);
return S_OK;
}
inline void CGcmDrawState::UnpackSetScissorRect( DrawScissor_t * pScissor )
{
m_scissor.x = pScissor->x;
m_scissor.y = pScissor->y;
m_scissor.w = pScissor->w;
m_scissor.h = pScissor->h;
m_dirtyStatesMask |= kDirtyScissor;
}
inline void CGcmDrawState::SetScissorRect( DrawScissor_t * pScissor )
{
PackData(kDataScissor, sizeof(DrawScissor_t), pScissor);
}
//--------------------------------------------------------------------------------------------------
// Reports, Zpass and labels
//--------------------------------------------------------------------------------------------------
inline void UnpackSetZpassPixelCountEnable(uint32 enable)
{
GCM_FUNC(cellGcmSetZpassPixelCountEnable, enable);
}
inline void UnpackSetClearReport(uint32 type)
{
GCM_FUNC(cellGcmSetClearReport, type);
}
inline void UnpackSetReport(uint32 type, uint32 index)
{
GCM_FUNC(cellGcmSetReport, type, index);
}
inline void UnpackSetWriteBackEndLabel(uint8 index, uint32 value)
{
GCM_FUNC(cellGcmSetWriteBackEndLabel, index, value);
}
inline void CGcmDrawState::SetZpassPixelCountEnable(uint32 enable)
{
PackData(kDataSetZpassPixelCountEnable, enable);
}
inline void CGcmDrawState::SetClearReport(uint32 type)
{
PackData(kDataSetClearReport, type);
}
inline void CGcmDrawState::SetReport(uint32 type, uint32 index)
{
PackData(kDataSetReport, type, index);
}
inline void CGcmDrawState::SetWriteBackEndLabel(uint8 index, uint32 value)
{
if (index == GCM_LABEL_MEMORY_FREE)
{
m_nFreeLabel = value; // 0 is not valid...
}
else
{
PackData(kDataSetWriteBackEndLabel, index, value);
}
}
//--------------------------------------------------------------------------------------------------
// Renderstates
//--------------------------------------------------------------------------------------------------
inline void CGcmDrawState::UnpackSetRenderState( D3DRENDERSTATETYPE State, uint Value )
{
char ignored = 0;
Assert( State < D3DRS_VALUE_LIMIT );
uint nDefvalueIndex = g_d3drs_defvalue_indices[State];
uint8 nClass = nDefvalueIndex >> 6;
#ifdef DBGFLAG_ASSERT
nDefvalueIndex &= 0077;
Assert( nDefvalueIndex < ARRAYSIZE( g_d3drs_defvalues ) );
uint32 nDefValue = g_d3drs_defvalues[nDefvalueIndex];
#endif
switch( nClass )
{
case 0: // just ignore quietly. example: D3DRS_LIGHTING
ignored = 1;
break;
case 1:
{
// no GL response - and no error as long as the write value matches the default
Assert( Value == nDefValue );
}
break;
case 2:
// provide GL response, but only support known default value
Assert( Value == nDefValue );
// fall through to mode 3
case 3:
// full GL response, support any legal value
// note we're handling the class-2's as well.
switch( State )
{
default:
Msg( "Cannot interpret State %d", (int)State );
break;
case D3DRS_ZENABLE: // kGLDepthTestEnable
m_ZEnable = !!Value;
m_dirtyStatesMask |= kDirtyZEnable;
break;
case D3DRS_ZWRITEENABLE: // kGLDepthMask
{
uint32 newMask = Value ? 1 : 0;
if(m_nSetDepthMask != newMask)
{
m_nSetDepthMask = newMask;
m_dirtyStatesMask |= kDirtyDepthMask;
}
}
break;
case D3DRS_ZFUNC:
{
// kGLDepthFunc
m_ZFunc = D3DCompareFuncToGL( Value );
m_dirtyStatesMask |= kDirtyZFunc;
}
break;
case D3DRS_COLORWRITEENABLE: // kGLColorMaskSingle
if( IsLayerRender() )
{
m_ColorWriteEnable = ( ((Value & D3DCOLORWRITEENABLE_RED) != 0) ? CELL_GCM_COLOR_MASK_R : 0x00 )
| ( ((Value & D3DCOLORWRITEENABLE_GREEN) != 0) ? CELL_GCM_COLOR_MASK_G : 0x00 )
| ( ((Value & D3DCOLORWRITEENABLE_BLUE) != 0) ? CELL_GCM_COLOR_MASK_B : 0x00 )
| ( ((Value & D3DCOLORWRITEENABLE_ALPHA) != 0) ? CELL_GCM_COLOR_MASK_A : 0x00 );
m_dirtyStatesMask |= kDirtyColorWriteEnable;
}
break;
case D3DRS_COLORWRITEENABLE1: // kGLColorMaskMultiple
case D3DRS_COLORWRITEENABLE2: // kGLColorMaskMultiple
case D3DRS_COLORWRITEENABLE3: // kGLColorMaskMultiple
ignored = 1;
break;
case D3DRS_CULLMODE: // kGLCullFaceEnable / kGLCullFrontFace
{
m_CullMode = Value;
m_dirtyStatesMask |= kDirtyCullMode;
}
break;
//-------------------------------------------------------------------------------------------- alphablend stuff
case D3DRS_ALPHABLENDENABLE: // kGLBlendEnable
if( IsLayerRender() )
m_AlphablendEnable = !!Value;
m_dirtyStatesMask |= kDirtyAlphablendEnable;
break;
case D3DRS_BLENDOP: // kGLBlendEquation // D3D blend-op ==> GL blend equation
if( IsLayerRender() )
{
m_BlendOp = Value;
m_dirtyStatesMask |= kDirtyBlendOp;
}
break;
case D3DRS_SRCBLEND: // kGLBlendFactor // D3D blend-factor ==> GL blend factor
case D3DRS_DESTBLEND: // kGLBlendFactor
{
uint16 factor = D3DBlendFactorToGL( Value );
m_blends[!( State == D3DRS_SRCBLEND )] = factor;
m_dirtyStatesMask |= kDirtyBlendFactor;
}
break;
case D3DRS_SEPARATEALPHABLENDENABLE:
case D3DRS_BLENDOPALPHA:
case D3DRS_SRCBLENDALPHA:
case D3DRS_DESTBLENDALPHA:
ignored = 1;
break;
case D3DRS_SRGBWRITEENABLE: // kGLBlendEnableSRGB
if( IsLayerRender() )
{
m_SrgbWriteEnable = Value;
m_dirtyStatesMask |= kDirtySrgbWriteEnable;
}
break;
//-------------------------------------------------------------------------------------------- alphatest stuff
case D3DRS_ALPHATESTENABLE:
m_AlphaTestEnable = Value;
m_dirtyStatesMask |= kDirtyAlphaTestEnable;
break;
case D3DRS_ALPHAREF:
m_alphaFunc.ref = Value;
m_dirtyStatesMask |= kDirtyAlphaFunc;
break;
case D3DRS_ALPHAFUNC:
{
uint32 func = D3DCompareFuncToGL( Value );
m_alphaFunc.func = func;
m_dirtyStatesMask |= kDirtyAlphaFunc;
}
break;
//-------------------------------------------------------------------------------------------- stencil stuff
case D3DRS_STENCILENABLE: // GLStencilTestEnable_t
m_StencilEnable = Value;
m_dirtyStatesMask |= kDirtyStencilEnable;
break;
case D3DRS_STENCILFAIL: // GLStencilOp_t "what do you do if stencil test fails"
{
m_stencilOp.fail = dxtogl_stencilmode[Value];
m_dirtyStatesMask |= kDirtyStencilOp;
}
break;
case D3DRS_STENCILZFAIL: // GLStencilOp_t "what do you do if stencil test passes *but* depth test fails, if depth test happened"
{
m_stencilOp.dfail = dxtogl_stencilmode[Value];
m_dirtyStatesMask |= kDirtyStencilOp;
}
break;
case D3DRS_STENCILPASS: // GLStencilOp_t "what do you do if stencil test and depth test both pass"
{
m_stencilOp.dpass = dxtogl_stencilmode[Value];
m_dirtyStatesMask |= kDirtyStencilOp;
}
break;
case D3DRS_STENCILFUNC: // GLStencilFunc_t
{
uint32 stencilfunc = D3DCompareFuncToGL( Value );
m_stencilFunc.func = stencilfunc;
m_dirtyStatesMask |= kDirtyStencilFunc;
}
break;
case D3DRS_STENCILREF: // GLStencilFunc_t
m_stencilFunc.ref = (Value & 0xFF);
m_dirtyStatesMask |= kDirtyStencilFunc;
break;
case D3DRS_STENCILMASK: // GLStencilFunc_t
{
m_stencilFunc.mask = (Value & 0xFF);
m_dirtyStatesMask |= kDirtyStencilFunc;
}
break;
case D3DRS_STENCILWRITEMASK: // GLStencilWriteMask_t
{
//if (Value==255)
//{
// Value = 0xFFFFFFFF; // mask blast
//}
m_StencilWriteMask = Value;
m_dirtyStatesMask |= kDirtyStencilWriteMask;
}
break;
//-------------------------------------------------------------------------------------------- two-sided stencil stuff
case D3DRS_TWOSIDEDSTENCILMODE: // -> GL_STENCIL_TEST_TWO_SIDE_EXT... not yet implemented ?
case D3DRS_CCW_STENCILFAIL: // GLStencilOp_t
case D3DRS_CCW_STENCILZFAIL: // GLStencilOp_t
case D3DRS_CCW_STENCILPASS: // GLStencilOp_t
case D3DRS_CCW_STENCILFUNC: // GLStencilFunc_t
ignored = 1;
break;
case D3DRS_FOGENABLE: // none of these are implemented yet... erk
case D3DRS_FOGCOLOR:
case D3DRS_FOGTABLEMODE:
case D3DRS_FOGSTART:
case D3DRS_FOGEND:
case D3DRS_FOGDENSITY:
case D3DRS_RANGEFOGENABLE:
case D3DRS_FOGVERTEXMODE:
ignored = 1;
break;
case D3DRS_MULTISAMPLEANTIALIAS:
case D3DRS_MULTISAMPLEMASK:
ignored = 1;
break;
case D3DRS_SCISSORTESTENABLE: // kGLScissorEnable
{
m_scissor.enabled = !!Value;
m_dirtyStatesMask |= kDirtyScissor;
}
break;
case D3DRS_DEPTHBIAS: // kGLDepthBias
{
// the value in the dword is actually a float
m_depthBias.units = Value;
m_dirtyStatesMask |= kDirtyDepthBias;
}
break;
// good ref on these: http://aras-p.info/blog/2008/06/12/depth-bias-and-the-power-of-deceiving-yourself/
case D3DRS_SLOPESCALEDEPTHBIAS:
{
// the value in the dword is actually a float
m_depthBias.factor = Value;
m_dirtyStatesMask |= kDirtyDepthBias;
}
break;
case D3DRS_CLIPPING: // ???? is clipping ever turned off ??
ignored = 1;
break;
case D3DRS_CLIPPLANEENABLE: // kGLClipPlaneEnable
{
m_userClipPlanesState = 0;
for ( uint32 j = 0, uiValueMask = 1, uiClipSetMask = CELL_GCM_USER_CLIP_PLANE_ENABLE_GE;
j < 6; ++ j, uiValueMask <<= 1, uiClipSetMask <<= 2 )
{
m_userClipPlanesState |= ( ( Value & uiValueMask ) != 0 ) ? uiClipSetMask : 0;
}
m_dirtyCachesMask |= kDirtyClipPlanes;
}
break;
//-------------------------------------------------------------------------------------------- polygon/fill mode
case D3DRS_FILLMODE:
m_FillMode = Value;
m_dirtyStatesMask |= kDirtyFillMode;
break;
}
break;
}
}
inline void CGcmDrawState::SetRenderState( D3DRENDERSTATETYPE State, uint Value )
{
PackData(kDataSetRenderState, State, Value);
}
//--------------------------------------------------------------------------------------------------
// Texture samplers, textures, texture cache
//--------------------------------------------------------------------------------------------------
inline void CGcmDrawState::SetSamplerState( uint Sampler,D3DSAMPLERSTATETYPE Type,DWORD Value )
{
#ifndef CERT
if (Sampler>=D3D_MAX_SAMPLERS) Error("Invalid sampler %d, PS3 suppoerts %d\n", Sampler, D3D_MAX_SAMPLERS );
#endif
// indirect sampler index
uint32 SamplerIdx = m_pFixed->m_aSamplerIdx[Sampler];
if (SamplerIdx == 0xFF)
{
SamplerIdx = m_pFixed->m_nSampler;
m_pFixed->m_nSampler++;
m_pFixed->m_aSamplerIdx[Sampler] = SamplerIdx;
}
// the D3D-to-GL translation has been moved to CommitSamplers since we want to do it at draw time
// so this call just stuffs values in slots.
D3DSamplerDesc *samp = m_pFixed->m_aSamplers + SamplerIdx;
switch( Type )
{
// addressing modes can be
// D3DTADDRESS_WRAP Tile the texture at every integer junction.
// D3DTADDRESS_MIRROR Similar to D3DTADDRESS_WRAP, except that the texture is flipped at every integer junction.
// D3DTADDRESS_CLAMP Texture coordinates outside the range [0.0, 1.0] are set to the texture color at 0.0 or 1.0, respectively.
// D3DTADDRESS_BORDER Texture coordinates outside the range [0.0, 1.0] are set to the border color.
// D3DTADDRESS_MIRRORONCE Similar to D3DTADDRESS_MIRROR and D3DTADDRESS_CLAMP.
// Takes the absolute value of the texture coordinate (thus, mirroring around 0),
// and then clamps to the maximum value. The most common usage is for volume textures,
// where support for the full D3DTADDRESS_MIRRORONCE texture-addressing mode is not
// necessary, but the data is symmetric around the one axis.
case D3DSAMP_ADDRESSU:
samp->m_addressModeU = Value;
break;
case D3DSAMP_ADDRESSV:
samp->m_addressModeV = Value;
break;
case D3DSAMP_ADDRESSW:
samp->m_addressModeW = Value;
break;
case D3DSAMP_BORDERCOLOR:
// samp->m_borderColor = Value; // Border color always 0
break;
case D3DSAMP_MAGFILTER: samp->m_magFilter = (D3DTEXTUREFILTERTYPE)Value; break;
case D3DSAMP_MINFILTER: samp->m_minFilter = (D3DTEXTUREFILTERTYPE)Value; break;
case D3DSAMP_MIPFILTER: samp->m_mipFilter = (D3DTEXTUREFILTERTYPE)Value; break;
case D3DSAMP_MIPMAPLODBIAS: samp->m_mipmapBias = Value; break; // float in sheep's clothing - check this one out
case D3DSAMP_MAXMIPLEVEL: samp->m_maxMipLevel = Value; break; //FIXME (unsure here)
case D3DSAMP_MAXANISOTROPY: samp->m_maxAniso = Value; break;
case D3DSAMP_SRGBTEXTURE: samp->m_srgb = Value; break;
case D3DSAMP_SHADOWFILTER: samp->m_shadowFilter = Value; break;
default:
Msg( "Unknown sampler parameter" );
DebuggerBreak();
break;
}
m_dirtySamplersMask |= ( 1 << Sampler );
}
inline void CGcmDrawState::UnpackSetTexture( DWORD Stage, uint32 offset, uint32 eaLayout )
{
// texture sets are finalized in CommitSamplers
m_textures[Stage].m_nLocalOffset = offset;
m_textures[Stage].m_eaLayout = eaLayout;
m_dirtySamplersMask |= ( 1 << Stage );
}
inline void CGcmDrawState::UnpackResetTexture( DWORD Stage )
{
// texture sets are finalized in CommitSamplers
m_textures[Stage].Reset();
m_dirtySamplersMask |= ( 1 << Stage );
}
inline void CGcmDrawState::SetTexture( DWORD Stage, CPs3gcmTexture *tex )
{
m_textures[Stage].Assign(tex);
if (tex->m_lmBlock.IsLocalMemory() )
{
m_textures[Stage].m_nLocalOffset |= 1;
}
PackData(kDataTexture, Stage, m_textures[Stage].m_nLocalOffset, m_textures[Stage].m_eaLayout );
}
inline void CGcmDrawState::ResetTexture( DWORD Stage )
{
PackData(kDataResetTexture, Stage);
}
inline void UnpackSetInvalidateTextureCache()
{
GCM_FUNC( cellGcmSetInvalidateTextureCache, CELL_GCM_INVALIDATE_TEXTURE );
}
inline void CGcmDrawState::SetInvalidateTextureCache()
{
m_dirtyCachesMask |= kDirtyTxCache;
}
//--------------------------------------------------------------------------------------------------
// Vertex buffers, vertex cache, , vertex constants
//--------------------------------------------------------------------------------------------------
#ifndef SPU
inline void CGcmDrawState::SetVertexStreamSource( uint nStreamIndex, IDirect3DVertexBuffer9* pStreamData,UINT OffsetInBytes,UINT Stride )
{
// SNPROF("CGcmDrawState::SetVertexStreamSource( uint nStreamIndex, IDirect3DVertexBuffer9* pStreamData,UINT OffsetInBytes,UINT Stride )");
// Write stream descriptor into variable data
#ifdef GCM_DS_SAFE
uint32 spacereqd = sizeof(D3DStreamDesc) + sizeof(DrawData);
uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData ));
if(spacereqd > spaceleft) Error("Out of per draw call data\n");
#endif
DrawData* pData = (DrawData*) m_pDataCursor;
pData->m_type = kDataStreamDesc;
pData->m_size = sizeof(D3DStreamDesc);
pData->m_idx = nStreamIndex;
D3DStreamDesc* pDsd = (D3DStreamDesc*)(pData+1);
if ( pStreamData && pStreamData->m_pBuffer )
{
// we pass this pointer as a BufferBase later to compare, so we need to make sure they're binarily the same
Assert( uintp( pStreamData ) == uintp( static_cast<IDirect3DGcmBufferBase*>( pStreamData ) ) );
pDsd->m_offset = OffsetInBytes;
pDsd->m_stride = Stride;
pDsd->m_vtxBuffer = pStreamData;
pDsd->m_nLocalBufferOffset = pStreamData->m_pBuffer->Offset();
}
else
{
V_memset(pDsd, 0, sizeof( *pDsd ) );
}
m_pDataCursor = (uint8*)pDsd + sizeof(D3DStreamDesc);
}
#endif
inline void CGcmDrawState::_SetVertexShaderConstantB( UINT StartRegister, uint BoolCount, uint shaderVxConstants )
{
uint nMask = ( 1 << ( StartRegister + BoolCount ) ) - ( 1 << StartRegister ) ;
m_shaderVxConstants &= ~nMask;
m_shaderVxConstants |= shaderVxConstants;
m_dirtyCachesMask |= kDirtyVxConstants;
}
inline void CGcmDrawState::SetVertexShaderConstantB(UINT StartRegister,CONST BOOL* pConstantData,UINT BoolCount)
{
uint shaderVxConstants = 0;
for ( uint32 k = MIN( StartRegister, 32 ), kEnd = MIN( StartRegister + BoolCount, 32 ),
uiConstantBit = ( 1 << StartRegister ), uiDataIdx = 0;
k < kEnd; ++ k, uiConstantBit <<= 1, ++ uiDataIdx )
{
if( pConstantData[ uiDataIdx ] )
{
shaderVxConstants |= uiConstantBit;
}
}
_SetVertexShaderConstantB( StartRegister, BoolCount, shaderVxConstants );
}
// inline void CGcmDrawState::VertexConstantExtractor(
// float *pDestStorage, int kRegisterFirst, int kRegisterLength,
// int StartRegister, const float *pConstantData, int Vector4fCount )
// {
// int iMatrixRegister = Max<int>( 0, StartRegister - kRegisterFirst ); // which part of matrix is updated
// int iConstantDataMatrixStart = Max<int>( StartRegister, kRegisterFirst ); // where in constant data the new values start
// int numMatrixRegisters = StartRegister + Vector4fCount - iConstantDataMatrixStart; // how many new values can be used
// numMatrixRegisters = Min<int>( numMatrixRegisters, kRegisterLength - iMatrixRegister ); // we shouldn't use more values than there's room in the matrix
// if ( numMatrixRegisters > 0 )
// {
// iConstantDataMatrixStart -= StartRegister; // constant data values are relative to StartRegister
// V_memcpy( &pDestStorage[ iMatrixRegister * 4 ], &pConstantData[ iConstantDataMatrixStart * 4 ], numMatrixRegisters * 4 * sizeof( float ) );
// }
// }
inline void CGcmDrawState::SetVertexShaderConstantF( UINT StartRegister, void* pUnalignedConstantData, UINT Vector4fCount )
{
// SNPROF("CGcmDrawState::SetVertexShaderConstantF( UINT StartRegister, void* pUnalignedConstantData, UINT Vector4fCount )");
// // Intercept the vertex constants affecting model-view-projection [ registers C8,C9,C10,C11 ]
// VertexConstantExtractor( m_matViewProjection, 8, 4, StartRegister, pConstantData, Vector4fCount );
// // Intercept the vertex constants affecting model matrix [ registers C58,C59,C60 ]
// VertexConstantExtractor( m_matModel, 58, 3, StartRegister, pConstantData, Vector4fCount );
uint32 spacereqd = (Vector4fCount*sizeof(vec_float4)) + sizeof(DrawData);
#ifdef GCM_DS_SAFE
uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData ));
if(spacereqd > spaceleft) Error("Out of per draw call data\n");
#endif
DrawData* pDest = (DrawData*)m_pDataCursor;
uint8* pVals = (uint8*)(pDest+1);
pDest->m_type = kDataVpuConsts;
pDest->m_size = Vector4fCount * sizeof(vec_float4);
pDest->m_idx = StartRegister;
V_memcpy(pVals, pUnalignedConstantData, Vector4fCount * sizeof(vec_float4));
m_pDataCursor += spacereqd;
}
inline void UnpackSetInvalidateVertexCache()
{
GCM_FUNC( cellGcmSetInvalidateVertexCache );
}
inline void CGcmDrawState::SetInvalidateVertexCache()
{
m_dirtyCachesMask |= kDirtyVxCache;
}
inline void CGcmDrawState::UnpackUpdateVtxBufferOffset( IDirect3DVertexBuffer9 * vtxBuffer, uint nLocalBufferOffset )
{
for( uint i = 0; i < D3D_MAX_STREAMS; ++i )
{
if( g_dxGcmVertexStreamSources[i].m_vtxBuffer == vtxBuffer )
{
g_dxGcmVertexStreamSources[i].m_nLocalBufferOffset = nLocalBufferOffset; // new local buffer offset
}
}
}
inline void CGcmDrawState::UpdateVtxBufferOffset( IDirect3DVertexBuffer9 * vtxBuffer, uint nLocalBufferOffset )
{
PackData(kDataUpdateVtxBufferOffset, (uint32)vtxBuffer, nLocalBufferOffset);
}
//--------------------------------------------------------------------------------------------------
// Pixel Shader Consts
//--------------------------------------------------------------------------------------------------
inline void CGcmDrawState::SetPixelShaderConstantF(uint32 StartRegister, float* pConstantData, uint32 Vector4fCount)
{
// SNPROF("CGcmDrawState::SetPixelShaderConstantF(uint32 StartRegister, float* pConstantData, uint32 Vector4fCount)");
m_dirtyCachesMask |= CGcmDrawState::kDirtyPxConstants;
uint32 spacereqd = (Vector4fCount*sizeof(vec_float4)) + sizeof(DrawData);
#ifdef GCM_DS_SAFE
uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData ));
if(spacereqd > spaceleft) Error("Out of per draw call data\n");
#endif
DrawData* pDest = (DrawData*)m_pDataCursor;
uint8* pVals = (uint8*)(pDest+1);
pDest->m_type = kDataFpuConsts;
pDest->m_size = Vector4fCount * sizeof(vec_float4);
pDest->m_idx = StartRegister;
V_memcpy(pVals, pConstantData, Vector4fCount * sizeof(vec_float4));
m_pDataCursor += spacereqd;
}
inline void CGcmDrawState::UnpackSetWorldSpaceCameraPosition(float* pWCP)
{
memcpy(m_vecWorldSpaceCameraPosition, pWCP, sizeof(m_vecWorldSpaceCameraPosition));
}
inline void CGcmDrawState::SetWorldSpaceCameraPosition(float* pWCP)
{
PackData(kDataSetWorldSpaceCameraPosition, (uint16)sizeof(m_vecWorldSpaceCameraPosition), (void*)pWCP);
}
//--------------------------------------------------------------------------------------------------
// Surfaces and render teargets
//--------------------------------------------------------------------------------------------------
inline void CGcmDrawState::Ps3Helper_UpdateSurface( UpdateSurface_t * pSurface )
{
const CPs3gcmTextureData_t &texC = pSurface->m_texC, &texZ = pSurface->m_texZ;
const CPs3gcmTextureData_t *pTexCZ = &texC;
CPs3gcmTextureLayout texC_layout, texZ_layout, *pTexCZ_layout = &texC_layout;
if( texZ )
{
memcpy (&texZ_layout, (void*)texZ.m_eaLayout, sizeof( texZ_layout ));
pTexCZ = &texZ;
pTexCZ_layout = &texZ_layout;
}
if( texC )
{
memcpy( &texC_layout, (void*)texC.m_eaLayout, sizeof( texC_layout ));
pTexCZ = &texC;
pTexCZ_layout = &texC_layout;
}
CellGcmSurface sf;
V_memset( &sf, 0, sizeof( sf ) );
sf.colorFormat = CELL_GCM_SURFACE_A8R8G8B8;
sf.colorTarget = texC.NotNull() ? CELL_GCM_SURFACE_TARGET_0 : CELL_GCM_SURFACE_TARGET_NONE;
sf.colorLocation[0] = CELL_GCM_LOCATION_LOCAL;
sf.colorOffset[0] = texC ? texC.Offset() : 0;
sf.colorPitch[0] = texC ? texC_layout.DefaultPitch2( g_ps3texFormats ) : 64;
sf.colorLocation[1] = CELL_GCM_LOCATION_LOCAL;
sf.colorLocation[2] = CELL_GCM_LOCATION_LOCAL;
sf.colorLocation[3] = CELL_GCM_LOCATION_LOCAL;
sf.colorOffset[1] = 0;
sf.colorOffset[2] = 0;
sf.colorOffset[3] = 0;
sf.colorPitch[1] = 64;
sf.colorPitch[2] = 64;
sf.colorPitch[3] = 64;
sf.depthFormat = CELL_GCM_SURFACE_Z24S8;
if ( texZ )
{
CPs3gcmTextureLayout::Format_t &zFmt = g_ps3texFormats[texZ_layout.m_nFormat];
if ( ( zFmt.m_gcmFormat == CELL_GCM_TEXTURE_DEPTH16 ) || ( zFmt.m_gcmFormat == CELL_GCM_TEXTURE_DEPTH16_FLOAT ) )
{
sf.depthFormat = CELL_GCM_SURFACE_Z16;
}
}
sf.depthLocation = CELL_GCM_LOCATION_LOCAL;
sf.depthOffset = texZ ? texZ.Offset() : 0;
sf.depthPitch = texZ ? texZ_layout.DefaultPitch2( g_ps3texFormats ) : 64;
sf.type = ( texC && texC_layout.IsSwizzled() ) ? CELL_GCM_SURFACE_SWIZZLE : CELL_GCM_SURFACE_PITCH;
sf.antialias = CELL_GCM_SURFACE_CENTER_1;
sf.width = *pTexCZ ? pTexCZ_layout->m_key.m_size[0] : g_ps3gcmGlobalState.m_nRenderSize[0];
sf.height = *pTexCZ ? pTexCZ_layout->m_key.m_size[1] : g_ps3gcmGlobalState.m_nRenderSize[1];
sf.x = 0;
sf.y = 0;
PackData(kDataUpdateSurface, 0, (uint16)sizeof(sf), (void*)&sf);
}
inline void CGcmDrawState::UnpackUpdateSurface(CellGcmSurface* pSf)
{
GCM_FUNC( cellGcmSetSurface, pSf );
// cellGcmSetZcullControl invalidates Zcull, and these are the default settings anyways (LESS / LONES)
// so don't bother doing anything here.
// If other settings are needed, set them once at the beginning of time for each zcull region
//GCM_FUNC( cellGcmSetZcullControl, CELL_GCM_ZCULL_LESS, CELL_GCM_ZCULL_LONES );
// These calls do NOT invalidate Zcull
GCM_FUNC( cellGcmSetZcullEnable, CELL_GCM_TRUE, CELL_GCM_TRUE );
// when render target changes, and scissor is not enabled, and the target dimensions change,
// we need to flush the scissor dimensions because we always maintain scissor ON state, and
// the scissor size must conform to surface size (which just changed)
m_dirtyStatesMask |= kDirtyScissor;
}
inline void CGcmDrawState::Helper_IntersectRectsXYWH( uint16 const *a, uint16 const *b, uint16 *result )
// Takes 2 rects a&b specified as top,left,width,height
// Produces an intersection also as top,left,width,height
// Intersection can have zero width and/or height
{
result[0] = a[0] > b[0] ? a[0] : b[0];
result[1] = a[1] > b[1] ? a[1] : b[1];
uint16 ca = a[0]+a[2], cb = b[0]+b[2];
ca = ca < cb ? ca : cb;
if ( int16(ca) < int16(result[0]) )
ca = result[0];
result[2] = ca - result[0];
ca = a[1]+a[3], cb = b[1]+b[3];
ca = ca < cb ? ca : cb;
if ( int16(ca) < int16(result[1]) )
ca = result[1];
result[3] = ca - result[1];
}
inline void CGcmDrawState::UnpackClearSurface( DWORD nFlags, D3DCOLOR nColor, float flZ, uint32 nStencil,
uint32 nDepthStencilBitDepth )
{
uint32 uiGcmClearMask = 0
| ( ( nFlags & D3DCLEAR_STENCIL ) ? CELL_GCM_CLEAR_S : 0 )
| ( ( nFlags & D3DCLEAR_ZBUFFER ) ? CELL_GCM_CLEAR_Z : 0 )
| ( ( nFlags & D3DCLEAR_TARGET ) ? (CELL_GCM_CLEAR_R|CELL_GCM_CLEAR_G|CELL_GCM_CLEAR_B|CELL_GCM_CLEAR_A) : 0 )
;
if ( nFlags & D3DCLEAR_TARGET )
{
GCM_FUNC( cellGcmSetClearColor, nColor );
}
if ( nFlags & (D3DCLEAR_STENCIL|D3DCLEAR_ZBUFFER) )
{
uint32 nClearValue;
if ( nDepthStencilBitDepth == 16 )
{
// NOTE: for SURFACE_Z16 depth is in lower 16 bits
nClearValue = ( uint32 )( flZ * 0xFFFF );
}
else
{
nClearValue = ( ( ( uint32 )( flZ * 0xFFFFFF ) ) << 8 ) | ( nStencil & 0xFF );
}
// if(Z16) GCM_FUNC( cellGcmSetClearDepthStencil, (((uint32)( Z*0xFFFF ))<<8) );
GCM_FUNC( cellGcmSetClearDepthStencil, nClearValue );
}
// Set scissor box to cover the intersection of viewport and scissor
if ( !m_scissor.enabled )
{
GCM_FUNC( cellGcmSetScissor, m_viewportSize[0], m_viewportSize[1], m_viewportSize[2], m_viewportSize[3] );
}
else
{
uint16 uiScissorCoords[4] = {0};
Helper_IntersectRectsXYWH( m_viewportSize, &m_scissor.x, uiScissorCoords );
GCM_FUNC( cellGcmSetScissor, uiScissorCoords[0], uiScissorCoords[1], uiScissorCoords[2], uiScissorCoords[3] );
}
GCM_FUNC( cellGcmSetClearSurface, uiGcmClearMask );
// Since we affected the scissor, mark it as dirty
m_dirtyStatesMask |= kDirtyScissor;
}
inline void CGcmDrawState::ClearSurface( DWORD nFlags, D3DCOLOR nColor, float flZ, uint32 nStencil,
uint32 nDepthStencilBitDepth )
{
PackData(kDataClearSurface, nFlags, nColor, flZ, nStencil, nDepthStencilBitDepth );
}
inline void CGcmDrawState::UnpackResetSurfaceToKnownDefaultState()
{
// Reset to default state:
GCM_FUNC( cellGcmSetCullFaceEnable, CELL_GCM_TRUE );
GCM_FUNC( cellGcmSetCullFace, CELL_GCM_BACK );
GCM_FUNC( cellGcmSetFrontFace, CELL_GCM_CW );
GCM_FUNC( cellGcmSetBlendEnable, CELL_GCM_FALSE );
GCM_FUNC( cellGcmSetAlphaTestEnable, CELL_GCM_FALSE );
GCM_FUNC( cellGcmSetStencilTestEnable, CELL_GCM_FALSE );
GCM_FUNC( cellGcmSetDepthTestEnable, CELL_GCM_FALSE );
GCM_FUNC( cellGcmSetFrontPolygonMode, CELL_GCM_POLYGON_MODE_FILL );
GCM_FUNC( cellGcmSetBackPolygonMode, CELL_GCM_POLYGON_MODE_FILL );
GCM_FUNC( cellGcmSetPolygonOffset, 0, 0 );
GCM_FUNC( cellGcmSetPolygonOffsetFillEnable, CELL_GCM_FALSE );
// Force the viewport to match the current back buffer
D3DVIEWPORT9 dForcedView =
{
0, 0,
m_nBackBufferSize[0],
m_nBackBufferSize[1],
m_viewZ[0],
m_viewZ[1]
};
SetViewport( &dForcedView );
GCM_FUNC( cellGcmSetScissor, 0, 0, m_nBackBufferSize[0], m_nBackBufferSize[1] );
// Reset some cached gcm state
m_userClipPlanesState = 0;
m_shaderVxConstants = 0;
m_dirtyCachesMask |= ( kDirtyVxConstants | kDirtyVxShader |
kDirtyClipPlanes | kDirtyPxShader |
kDirtyPxConstants );
}
inline void CGcmDrawState::ResetSurfaceToKnownDefaultState()
{
PackData(kDataResetSurface);
}
//--------------------------------------------------------------------------------------------------
// Blit
//--------------------------------------------------------------------------------------------------
inline void UnpackTransferImage(uint8 mode, uint32 dstOffset, uint32 dstPitch, uint32 dstX, uint32 dstY, uint32 srcOffset,
uint32 srcPitch, uint32 srcX, uint32 srcY, uint32 width, uint32 height, uint32 bytesPerPixel )
{
GCM_FUNC(cellGcmSetTransferImage, mode, dstOffset, dstPitch, dstX, dstY, srcOffset,
srcPitch, srcX, srcY, width, height, bytesPerPixel );
}
inline void CGcmDrawState::SetTransferImage(uint8 mode, uint32 dstOffset, uint32 dstPitch, uint32 dstX, uint32 dstY, uint32 srcOffset,
uint32 srcPitch, uint32 srcX, uint32 srcY, uint32 width, uint32 height, uint32 bytesPerPixel )
{
// return UnpackTransferImage( mode, dstOffset, dstPitch, dstX, dstY, srcOffset,
// srcPitch, srcX, srcY, width, height, bytesPerPixel);
uint32 aValues[12];
aValues[0] = mode;
aValues[1] = dstOffset;
aValues[2] = dstPitch;
aValues[3] = dstX;
aValues[4] = dstY;
aValues[5] = srcOffset;
aValues[6] = srcPitch;
aValues[7] = srcX;
aValues[8] = srcY;
aValues[9] = width;
aValues[10] = height;
aValues[11] = bytesPerPixel;
PackData(kDataTransferImage, 0, sizeof(aValues), (void*)aValues);
}
//--------------------------------------------------------------------------------------------------
// State Flushing and Pixel Shader Patching
//--------------------------------------------------------------------------------------------------
inline void CGcmDrawState::UnpackData()
{
static uint32 highWater = 0;
static float average = 0.0f;
static uint32 count = 0;
#ifndef SPU
static int display = 4000;
#endif
m_nNumECB = 0;
int aSizes[64];
memset(aSizes, 0, sizeof(aSizes));
DrawData* pSrc = (DrawData*)m_pData;
while ((uint8*)pSrc < m_pDataCursor)
{
uint32* pVals = (uint32*)(pSrc+1);
float* pfVals = (float*)pVals;
aSizes[pSrc->m_type] += pSrc->m_size;
switch (pSrc->m_type)
{
case kDataEcbTexture:
V_memcpy(&m_aBindTexture[pSrc->m_idx], pVals, pSrc->m_size);
break;
case kDataSetRenderState:
UnpackSetRenderState((D3DRENDERSTATETYPE)pVals[0], pVals[1]);
break;
case kDataFpuConsts:
V_memcpy(&g_aFPConst[pSrc->m_idx], pVals, pSrc->m_size);
break;
case kDataSetWorldSpaceCameraPosition:
UnpackSetWorldSpaceCameraPosition(pfVals);
break;
case kDataStreamDesc:
V_memcpy(&g_dxGcmVertexStreamSources[pSrc->m_idx], pVals, pSrc->m_size);
break;
case kDataVpuConsts:
GCM_FUNC( cellGcmSetVertexProgramParameterBlock, pSrc->m_idx, pSrc->m_size/16, (float*)pVals );
break;
case kDataZcullStats:
GCM_FUNC( cellGcmSetReport, CELL_GCM_ZCULL_STATS, GCM_REPORT_ZCULL_STATS_0 );
GCM_FUNC( cellGcmSetReport, CELL_GCM_ZCULL_STATS1, GCM_REPORT_ZCULL_STATS_1 );
break;
case kDataZcullLimit:
GCM_FUNC(cellGcmSetZcullLimit, pVals[0], pVals[2] );
break;
case kDataViewport:
UnpackSetViewport((D3DVIEWPORT9*) pVals);
break;
case kDataScissor:
UnpackSetScissorRect((DrawScissor_t*) pVals);
break;
case kDataSetZpassPixelCountEnable:
UnpackSetZpassPixelCountEnable(pVals[0]);
break;
case kDataSetClearReport:
UnpackSetClearReport(pVals[0]);
break;
case kDataSetReport:
UnpackSetReport(pVals[0], pVals[1]);
break;
case kDataSetWriteBackEndLabel:
UnpackSetWriteBackEndLabel(pVals[0], pVals[1]);
break;
case kDataUpdateSurface:
UnpackUpdateSurface((CellGcmSurface*)pVals);
break;
case kDataResetSurface:
UnpackResetSurfaceToKnownDefaultState();
break;
case kDataClearSurface:
UnpackClearSurface(pVals[0], pVals[1], pfVals[2], pVals[3], pVals[4] );
break;
case kDataTransferImage:
UnpackTransferImage(pVals[0], pVals[1], pVals[2], pVals[3],
pVals[4], pVals[5], pVals[6], pVals[7],
pVals[8], pVals[9], pVals[10], pVals[11] );
break;
case kDataTexture:
UnpackSetTexture(pVals[0], pVals[1], pVals[2]);
break;
case kDataResetTexture:
UnpackResetTexture(pVals[0]);
break;
case kDataUpdateVtxBufferOffset:
UnpackUpdateVtxBufferOffset((IDirect3DVertexBuffer9*)pVals[0], pVals[1]);
break;
case kDataECB:
UnpackExecuteCommandBuffer(m_aECB[m_nNumECB]);
m_aECB[m_nNumECB] = 0;
m_nNumECB++;
break;
case kDataBeginScene:
m_nDisabledSamplers = 0;
m_nSetTransformBranchBits = 0;
break;
}
pSrc = (DrawData*)((uint8*)(pSrc+1)+pSrc->m_size);
}
m_nNumECB = 0;
// Record High Water
uint32 size = m_pDataCursor - m_pData;
average *= count;
count++;
average += size;
average /= count;
#ifndef SPU
uint32 avgInt = uint32(average + 0.5f);
#endif
if (size > highWater)
{
highWater = size;
Msg("\n>>>>>>>>>>>High Water %d (0x%x) : Average %d (0x%x) : Avg plus GcmDrawState = %d (0x%x) : This plus drawstate (%d (0x%x)) \n", highWater, highWater,
avgInt, avgInt, avgInt + DRAWSTATE_SIZEOFDMA, avgInt + DRAWSTATE_SIZEOFDMA, size + DRAWSTATE_SIZEOFDMA, size + DRAWSTATE_SIZEOFDMA );
for (int i = 1; i <= kDataTransferImage; i++ )
{
Msg( ">>>%d : %d\n", i, aSizes[i]);
}
}
// display--;
// if ( (display < 1) || ((size+sizeof(CGcmDrawState)) > 0x1800))
// {
// Msg("\n>>>>>>>>>>>High Water %d (0x%x) : Average %d (0x%x) : Avg plus GcmDrawState = %d (0x%x) : This (%d (0x%x)) \n", highWater, highWater,
// avgInt, avgInt, avgInt + sizeof(CGcmDrawState), avgInt + sizeof(CGcmDrawState), size, size );
//
// display = 10000;
// }
// Reset cursor
m_pDataCursor = m_pData;
}
inline void CGcmDrawState::CommitRenderStates()
{
uint nMask = m_dirtyStatesMask;
m_dirtyStatesMask = 0;
if ( nMask & kDirtyDepthMask)
{
GCM_FUNC(cellGcmSetDepthMask, m_nSetDepthMask);
}
if ( nMask & kDirtyZEnable )
{
GCM_FUNC( cellGcmSetDepthTestEnable, m_ZEnable );
}
if ( nMask & kDirtyZFunc )
{
GCM_FUNC( cellGcmSetDepthFunc, m_ZFunc );
}
if ( nMask & kDirtyColorWriteEnable )
{
GCM_FUNC( cellGcmSetColorMask, m_ColorWriteEnable);
}
if ( nMask & kDirtyCullMode )
{
switch(m_CullMode)
{
case D3DCULL_NONE:
GCM_FUNC( cellGcmSetCullFaceEnable, CELL_GCM_FALSE );
break;
case D3DCULL_CW:
GCM_FUNC( cellGcmSetCullFaceEnable, CELL_GCM_TRUE );
GCM_FUNC( cellGcmSetFrontFace, CELL_GCM_CCW ); // opposite from D3D
break;
case D3DCULL_CCW:
GCM_FUNC( cellGcmSetCullFaceEnable, CELL_GCM_TRUE );
GCM_FUNC( cellGcmSetFrontFace, CELL_GCM_CW ); // opposite from D3D
break;
}
}
if ( nMask & kDirtyAlphablendEnable )
{
GCM_FUNC( cellGcmSetBlendEnable, m_AlphablendEnable );
}
if (nMask & kDirtyBlendOp)
{
uint32 Value = m_BlendOp;
uint16 equation = dxtogl_blendop[ Value ];
GCM_FUNC( cellGcmSetBlendEquation, equation, equation );
}
if ( nMask & kDirtySrgbWriteEnable )
{
uint32 Value = m_SrgbWriteEnable;
GCM_FUNC( cellGcmSetFragmentProgramGammaEnable, !!Value );
}
if ( nMask & kDirtyAlphaTestEnable )
{
uint32 Value = m_AlphaTestEnable;
GCM_FUNC( cellGcmSetAlphaTestEnable, !!Value );
}
if ( nMask & kDirtyStencilEnable )
{
uint32 Value = m_StencilEnable;
GCM_FUNC( cellGcmSetStencilTestEnable, !!Value );
}
if ( nMask & kDirtyStencilWriteMask )
{
uint32 Value = m_StencilWriteMask;
GCM_FUNC( cellGcmSetStencilMask, Value );
}
if ( nMask & kDirtyFillMode )
{
uint32 Value = m_FillMode;
uint32 mode = CELL_GCM_POLYGON_MODE_POINT + ( Value - D3DFILL_POINT );
GCM_FUNC( cellGcmSetFrontPolygonMode, mode );
GCM_FUNC( cellGcmSetBackPolygonMode, mode );
}
if ( nMask & CGcmDrawState::kDirtyBlendFactor )
{
GCM_FUNC( cellGcmSetBlendFunc,
m_blends[0], m_blends[1],
m_blends[0], m_blends[1] );
}
if ( nMask & CGcmDrawState::kDirtyAlphaFunc )
{
GCM_FUNC( cellGcmSetAlphaFunc, m_alphaFunc.func, m_alphaFunc.ref );
}
if ( nMask & CGcmDrawState::kDirtyStencilOp )
{
GCM_FUNC( cellGcmSetStencilOp, m_stencilOp.fail, m_stencilOp.dfail, m_stencilOp.dpass );
GCM_FUNC( cellGcmSetBackStencilOp, m_stencilOp.fail, m_stencilOp.dfail, m_stencilOp.dpass );
}
if ( nMask & CGcmDrawState::kDirtyStencilFunc )
{
GCM_FUNC( cellGcmSetStencilFunc, m_stencilFunc.func, m_stencilFunc.ref, m_stencilFunc.mask );
GCM_FUNC( cellGcmSetBackStencilFunc, m_stencilFunc.func, m_stencilFunc.ref, m_stencilFunc.mask );
}
if ( nMask & CGcmDrawState::kDirtyScissor )
{
if( m_scissor.enabled )
{
GCM_FUNC( cellGcmSetScissor, m_scissor.x, m_scissor.y, m_scissor.w, m_scissor.h );
}
else
{
GCM_FUNC( cellGcmSetScissor, 0, 0, 4095, 4095 ); // disable scissor
}
}
if ( nMask & CGcmDrawState::kDirtyDepthBias )
{
float units = *((float*)&m_depthBias.units);
GCM_FUNC( cellGcmSetPolygonOffset, *((float*)&m_depthBias.factor), /* NEED 2x here:see PSGL! */ 2.0f * units );
if ( ( m_depthBias.factor != 0.0f ) || ( m_depthBias.units != 0.0f ) )
{
GCM_FUNC( cellGcmSetPolygonOffsetFillEnable, CELL_GCM_TRUE );
}
else
{
GCM_FUNC( cellGcmSetPolygonOffsetFillEnable, CELL_GCM_FALSE );
}
}
}
inline void CGcmDrawState::CommitVertexBindings(IDirect3DVertexDeclaration9 * pDecl, uint32 baseVertexIndex)
{
// push vertex buffer state for the current vertex decl
uint uiVertexSlotMask = m_pVertexShaderData->m_attributeInputMask;
if ( !uiVertexSlotMask) Error(">>>>Blank vertex shader attr\n");
for( int nStreamIndex = 0; nStreamIndex < D3D_MAX_STREAMS; ++ nStreamIndex, uiVertexSlotMask >>= 1 )
{
SetVertexDataArrayCache_t *pOldCache = &g_cacheSetVertexDataArray[nStreamIndex];
// Check if this attribute is unused by the shader program
// and try to find the match in the decl.
if ( int j = ( uiVertexSlotMask & 1 ) ? pDecl->m_cgAttrSlots[ nStreamIndex ] : 0 )
{
D3DVERTEXELEMENT9_GCM *elem = &pDecl->m_elements[ j - 1 ];
int streamIndex = elem->m_dxdecl.Stream;
Assert( streamIndex >= 0 && streamIndex < D3D_MAX_STREAMS );
D3DStreamDesc &dsd = g_dxGcmVertexStreamSources[ streamIndex ];
D3DVERTEXELEMENT9_GCM::GcmDecl_t const &gcmvad = elem->m_gcmdecl;
const uint8_t stride = dsd.m_stride;
const uint8_t size = gcmvad.m_datasize;
const uint8_t type = gcmvad.m_datatype;
SetVertexDataArrayCache_t newCache( dsd, gcmvad, baseVertexIndex );
if( *pOldCache != newCache )
{
// Msg(">>>>>>>>>> Offset 0x%x <<<<<<<<<<\n\n", newCache.GetLocalOffset());
GCM_FUNC( cellGcmSetVertexDataArray, nStreamIndex, 1, stride, size, type,
CELL_GCM_LOCATION_LOCAL, newCache.GetLocalOffset() ); //
// if (!newCache.GetLocalOffset()) Error (">>>>>>>>>>>>>>>>>address %x <<<<<<<<<<<<<<<<<<<<<<\n", newCache.GetLocalOffset());
*pOldCache = newCache;
}
continue;
}
if( !pOldCache->IsNull() )
{
// Disable data slot if we failed to bind proper data stream
GCM_FUNC( cellGcmSetVertexDataArray, nStreamIndex, 1, 0, 0, CELL_GCM_VERTEX_F, CELL_GCM_LOCATION_LOCAL, 0 );
pOldCache->SetNull(); // disable
}
}
}
inline void CGcmDrawState::CommitSampler(uint32 nSampler)
{
D3DSamplerDesc const & dxsamp = m_aSamplers[ nSampler ];
#ifdef SPU
extern CPs3gcmTextureLayout gaLayout[D3D_MAX_TEXTURES];
CPs3gcmTextureLayout const & texlayout = gaLayout[nSampler];
#else
CPs3gcmTextureLayout const & texlayout = *((CPs3gcmTextureLayout const *)m_textures[ nSampler ].m_eaLayout);
#endif
uint nMips = texlayout.m_mipCount;
Assert( nMips > 0 );
CPs3gcmTextureLayout::Format_t & texlayoutFormat = g_ps3texFormats[texlayout.m_nFormat];
// If bReadsRawDepth is true, a depth texture has been set but shadow filtering has NOT been enabled. In this case, the shader is expecting to read
// the texture as A8R8G8B8 and manually recover depth (used for depth feathering).
bool bReadsRawDepth = ( texlayoutFormat.m_gcmFormat == CELL_GCM_TEXTURE_DEPTH24_D8 ) && !dxsamp.m_shadowFilter;
// GCM_FUNC( cellGcmReserveMethodSize, 11 );
uint32_t *current = gpGcmContext->current;
current[0] = CELL_GCM_METHOD_HEADER_TEXTURE_OFFSET( nSampler, 8 );
current[1] = CELL_GCM_METHOD_DATA_TEXTURE_OFFSET( m_textures[ nSampler ].Offset() );
uint locn;
if (current[1] & 1)
{
locn = CELL_GCM_LOCATION_LOCAL;
current[1] &= 0xFFFFFFFE;
}
else
{
locn = CELL_GCM_LOCATION_MAIN;
}
current[2] = CELL_GCM_METHOD_DATA_TEXTURE_FORMAT(
locn,
texlayout.IsCubeMap() ? CELL_GCM_TRUE : CELL_GCM_FALSE,
texlayout.IsVolumeTex() ? CELL_GCM_TEXTURE_DIMENSION_3 : CELL_GCM_TEXTURE_DIMENSION_2,
( bReadsRawDepth
? CELL_GCM_TEXTURE_A8R8G8B8 // bind depth textures as ARGB and reassemble depth in shader
: texlayoutFormat.m_gcmFormat
) |
( texlayout.IsSwizzled() ? CELL_GCM_TEXTURE_SZ : CELL_GCM_TEXTURE_LN ),
nMips
);
current[3] = CELL_GCM_METHOD_DATA_TEXTURE_ADDRESS(
dxtogl_addressMode[ dxsamp.m_addressModeU ],
dxtogl_addressMode[ dxsamp.m_addressModeV ],
dxtogl_addressMode[ dxsamp.m_addressModeW ],
CELL_GCM_TEXTURE_UNSIGNED_REMAP_NORMAL,
dxsamp.m_shadowFilter ? CELL_GCM_TEXTURE_ZFUNC_GEQUAL : CELL_GCM_TEXTURE_ZFUNC_NEVER,
( ( texlayoutFormat.m_gcmCaps & CPs3gcmTextureLayout::Format_t::kCapSRGB ) && dxsamp.m_srgb )
? CELL_GCM_TEXTURE_GAMMA_R | CELL_GCM_TEXTURE_GAMMA_G | CELL_GCM_TEXTURE_GAMMA_B : 0,
0
);
current[4] = CELL_GCM_METHOD_DATA_TEXTURE_CONTROL0( CELL_GCM_TRUE,
(uint16)( Max<uint>( Min<uint>( dxsamp.m_maxMipLevel, nMips - 1 ), 0u ) * 256.0f ),
(uint16)( Max<uint>( nMips - 1, 0u ) * 256.0f ),
texlayout.IsVolumeTex() || ( ( dxsamp.m_minFilter != D3DTEXF_ANISOTROPIC ) && ( dxsamp.m_magFilter != D3DTEXF_ANISOTROPIC ) )
? CELL_GCM_TEXTURE_MAX_ANISO_1 // 3D textures cannot have anisotropic filtering!
: CELL_GCM_TEXTURE_MAX_ANISO_4 // dxtogl_anisoIndexHalf[ ( dxsamp.m_maxAniso / 2 ) & ( ARRAYSIZE( dxtogl_anisoIndexHalf ) - 1 ) ]
);
current[5] = bReadsRawDepth ?
CELL_GCM_REMAP_MODE( CELL_GCM_TEXTURE_REMAP_ORDER_XYXY, CELL_GCM_TEXTURE_REMAP_FROM_B, CELL_GCM_TEXTURE_REMAP_FROM_A, CELL_GCM_TEXTURE_REMAP_FROM_R, CELL_GCM_TEXTURE_REMAP_FROM_G,
CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP )
: texlayoutFormat.m_gcmRemap;
if( bReadsRawDepth )
current[6] = CELL_GCM_METHOD_DATA_TEXTURE_FILTER( 0, CELL_GCM_TEXTURE_NEAREST, CELL_GCM_TEXTURE_NEAREST, CELL_GCM_TEXTURE_CONVOLUTION_QUINCUNX );
else
current[6] = CELL_GCM_METHOD_DATA_TEXTURE_FILTER(
0, // 0x1FBE, // 0x1FC0, // corresponding to PSGL 0 mip bias, formula: [( bias - .26 )*256] & 0x1FFF
dxtogl_minFilter[ dxsamp.m_minFilter ][ Min( (D3DTEXTUREFILTERTYPE)dxsamp.m_mipFilter, D3DTEXF_LINEAR ) ],
dxtogl_magFilter[ dxsamp.m_magFilter ],
CELL_GCM_TEXTURE_CONVOLUTION_QUINCUNX
);
current[7] = CELL_GCM_METHOD_DATA_TEXTURE_IMAGE_RECT(
texlayout.m_key.m_size[1],
texlayout.m_key.m_size[0]
);
current[8] = CELL_GCM_METHOD_DATA_TEXTURE_BORDER_COLOR(
0 // Border color always 0 ... dxsamp.m_borderColor // R=>>16; G=>>8; B=>>0; A=>>24 (same thing as GCM, see JSGCM_CALC_COLOR_LE_ARGB8)
);
current[9] = CELL_GCM_METHOD_HEADER_TEXTURE_CONTROL3( nSampler, 1 );
current[10] = CELL_GCM_METHOD_DATA_TEXTURE_CONTROL3(
texlayout.DefaultPitch2( g_ps3texFormats ),
texlayout.m_key.m_size[2]
);
gpGcmContext->current = &current[11];
}
inline void CGcmDrawState::CommitSamplers()
{
// Unpack from Fixed data into m_aSamplers
for (uint32 lp = 0; lp < D3D_MAX_SAMPLERS; lp++)
{
uint32 SamplerIdx = m_pFixed->m_aSamplerIdx[lp];
if (SamplerIdx != 0xFF)
m_aSamplers[lp] = m_pFixed->m_aSamplers[SamplerIdx];
}
// PS3 is binding textures here
uint mask = m_dirtySamplersMask;
m_dirtySamplersMask = 0;
uint16 uiPixelShaderInputMask = m_pPixelShaderData ? m_pPixelShaderData->m_samplerInputMask : 0;
uint16 uiRunningUpBitMask = 1;
uint nDisabledSamplers = m_nDisabledSamplers;
m_nDisabledSamplers = 0;
for ( int nSampler = 0; nSampler < 16; ++ nSampler, mask >>= 1, uiPixelShaderInputMask >>= 1, uiRunningUpBitMask <<= 1 )
{
if ( ( uiPixelShaderInputMask & 1 ) == 0 ) // The texture will not be sampled by pixel shader, unset it
{
// optimization
if( !( nDisabledSamplers & uiRunningUpBitMask ) )
{
GCM_FUNC( cellGcmSetTextureControl, nSampler, CELL_GCM_FALSE, 0, 0, 0 );
}
m_dirtySamplersMask |= uiRunningUpBitMask; // Keep the sampler dirty because we might have textures previously set on it
m_nDisabledSamplers |= uiRunningUpBitMask; // don't disable repeatedly
continue;
}
if ( ( mask & 1 ) == 0 ) // If the sampler is not dirty then don't do anything
continue;
if ( m_textures[nSampler].IsNull() ) // The sampler is dirty, but no texture on it, disable the sampler
{
// optimization
if( !( nDisabledSamplers & uiRunningUpBitMask ) )
{
GCM_FUNC( cellGcmSetTextureControl, nSampler, CELL_GCM_FALSE, 0, 0, 0 );
}
m_nDisabledSamplers |= uiRunningUpBitMask; // don't disable repeatedly
continue;
}
CommitSampler(nSampler);
}
m_pFixed->m_nInstanced = 0;
}
static vector unsigned int g_swap16x32m1[5] =
{
{0x02030001, 0x14151617, 0x18191A1B, 0x1C1D1E1F},
{0x02030001, 0x06070405, 0x18191A1B, 0x1C1D1E1F},
{0x02030001, 0x06070405, 0x0A0B0809, 0x1C1D1E1F},
{0x02030001, 0x06070405, 0x0A0B0809, 0x0E0F0C0D}
};
static inline void PatchUcodeConstSwap( void * pDestination, const fltx4 f4Source, int nLengthMinus1 )
{
*( fltx4* )pDestination = vec_perm( f4Source, *( fltx4* )pDestination, ( vector unsigned char )g_swap16x32m1[nLengthMinus1] );
}
inline void CGcmDrawState::PatchUcode(fltx4 * pUCode16, uint32 * pPatchTable, uint nPatchCount )
{
for ( uint nPatchIndex = 0; nPatchIndex < nPatchCount; ++nPatchIndex )
{
uint nPatchWord = pPatchTable[ nPatchIndex ], nLengthMinus1 = nPatchWord >> 30;
uint nUcodeOffsetQword = nPatchWord & 0xFFFF;
uint nRegister = ( nPatchWord >> 16 ) & 0x3FF;
fltx4 & reg = g_aFPConst[nRegister];
PatchUcodeConstSwap( pUCode16 + nUcodeOffsetQword, reg, nLengthMinus1 );
}
}
#ifndef SPU
inline void CGcmDrawState::AllocateUcode(FpHeader_t* pFp)
{
uint32 patchIdx = g_ps3gcmGlobalState.m_nPatchIdx;
uint32 uCodeSize = pFp->m_nUcodeSize;
uint32 patchSize = AlignValue(uCodeSize + 400, 128);
uint32 nEndPos = patchIdx + patchSize;
uint32 nEndSeg = nEndPos/GCM_PATCHSEGSIZE;
uint32 writeSeg = patchIdx/GCM_PATCHSEGSIZE;
// are we out of space and so need to move to the next segment ?
if (nEndSeg != writeSeg)
{
// move to the next segment
uint32 nextSeg = (writeSeg + 1) % (GCM_PATCHBUFFSIZE/GCM_PATCHSEGSIZE);
// Wait for RSX not to be in this segment
uint32 readSeg = g_ps3gcmGlobalState.m_nPatchReadSeg;
if (nextSeg == readSeg) readSeg = *g_label_fppatch_ring_seg;
gpGcmDrawState->CmdBufferFlush();
uint32 spins = 0;
while (nextSeg == readSeg)
{
spins++;
sys_timer_usleep(60); // Not on SPU..
readSeg = *g_label_fppatch_ring_seg;
}
// if (spins > 0) Msg("Patch Spins %d\n", spins);
// Move to the next segment and record the new readSeg
patchIdx = (nextSeg * GCM_PATCHSEGSIZE);
writeSeg = nextSeg;
g_ps3gcmGlobalState.m_nPatchReadSeg = readSeg;
// Msg("New Patch Segment 0x%x\n", patchIdx);
}
uint8* pDst = g_ps3gcmGlobalState.m_pPatchBuff + patchIdx;
patchIdx += patchSize;
g_ps3gcmGlobalState.m_nPatchIdx = patchIdx;
m_eaOutputUCode = uintp(pDst);
}
#endif
inline fltx4* CGcmDrawState::CopyUcode(FpHeader_t* pFp)
{
uint8* pDst = (uint8*)m_eaOutputUCode;
uint32 patchIdx = pDst - g_ps3gcmGlobalState.m_pPatchBuff;
uint32 uCodeSize = pFp->m_nUcodeSize;
uint32 writeSeg = patchIdx/GCM_PATCHSEGSIZE;
#ifndef SPU
V_memcpy(pDst, (uint8*)(pFp+1), uCodeSize);
#endif
// Set the label to say we're using shaders in this part of the ring buffer now
GCM_FUNC(cellGcmSetWriteBackEndLabel, GCM_LABEL_FPPATCH_RING_SEG, writeSeg);
return (fltx4*) pDst;
}
inline void CGcmDrawState::BindFragmentProgram(uint32 nVertexToFragmentProgramAttributeMask)
{
FpHeader_t * fpHeader = m_pPixelShaderData->m_eaFp;
// Copy and Patch Ucode
uint32* pPatches = (uint32*)((uint8*)(fpHeader + 1) + fpHeader->m_nUcodeSize);
fltx4* pUcode = CopyUcode(fpHeader);
#ifndef SPU
PatchUcode(pUcode, pPatches, fpHeader->m_nPatchCount );
#else
fltx4* pUcodeSPU = (fltx4*) (fpHeader+1);
PatchUcode(pUcodeSPU, pPatches, fpHeader->m_nPatchCount );
gSpuMgr.DmaSync();
gSpuMgr.DmaPut(m_eaOutputUCode, (void*)pUcodeSPU, fpHeader->m_nUcodeSize, SPU_DMAPUT_TAG);
#endif
// Set Fragment Shader
uint32 nFragmentProgramOffset = uintp(pUcode);
nFragmentProgramOffset += g_ps3gcmGlobalState.m_nIoOffsetDelta;
uint32* pTexControls = pPatches + fpHeader->m_nPatchCount;
uint nTexControls = fpHeader->m_nTexControls;
// GCM_FUNC( cellGcmReserveMethodSize, 6 + (2 * nTexControls) );
CELL_GCM_METHOD_SET_SHADER_CONTROL( gpGcmContext->current, fpHeader->m_nShaderControl0 ); // +2
CELL_GCM_METHOD_SET_SHADER_PROGRAM( gpGcmContext->current, CELL_GCM_LOCATION_MAIN + 1, ( nFragmentProgramOffset & 0x1fffffff ) ); // +2
CELL_GCM_METHOD_SET_VERTEX_ATTRIB_OUTPUT_MASK( gpGcmContext->current, nVertexToFragmentProgramAttributeMask /*psh->m_attributeInputMask | 0x20*/ ); // +2 - this gets overwritten later, so it's useless here , but GPAD says "unrecognized sequence" if I don't insert this command here
V_memcpy( gpGcmContext->current, pTexControls, fpHeader->m_nTexControls * sizeof( uint32 ) * 2 );
gpGcmContext->current += 2 * nTexControls;
}
void CGcmDrawState::CommitShaders()
{
uint nMask = m_dirtyCachesMask;
m_dirtyCachesMask = 0;
if( nMask & kDirtyVxCache )
{
GCM_FUNC(cellGcmSetInvalidateVertexCache);
}
if( nMask & kDirtyTxCache )
{
GCM_FUNC( cellGcmSetInvalidateTextureCache, CELL_GCM_INVALIDATE_TEXTURE );
}
if ( nMask & kDirtyVxShader )
{
void* pVertexShaderCmdBuffer = (void*)(m_pVertexShaderData->m_pVertexShaderCmdBuffer );
if( pVertexShaderCmdBuffer )
{
uint32 nVertexShaderCmdBufferWords = m_pVertexShaderData->m_nVertexShaderCmdBufferWords;
// GCM_FUNC( cellGcmReserveMethodSize, nVertexShaderCmdBufferWords );
// uint32_t *current = gpGcmContext->current;
V_memcpy(gpGcmContext->current, pVertexShaderCmdBuffer, nVertexShaderCmdBufferWords * sizeof( uint32 ));
gpGcmContext->current += nVertexShaderCmdBufferWords;
}
}
if ( nMask & kDirtyVxConstants )
{
uint nBits = m_shaderVxConstants;
// Disabling this check because it causes lots of per-vertex dynamic lighting problems in common_vs_fxc.h function DoLighting().
if( m_nSetTransformBranchBits != nBits )
{
GCM_FUNC( cellGcmSetTransformBranchBits, nBits );
m_nSetTransformBranchBits = nBits;
}
}
if ( nMask & ( kDirtyVxShader | kDirtyClipPlanes ) )
{
// GCM_FUNC( cellGcmSetUserClipPlaneControl,
// ( ( m_pGcmState->vertAttrOutputMask & ( 1 << ( 6 + 0 ) ) ) != 0 ) ? CELL_GCM_USER_CLIP_PLANE_ENABLE_GE : 0,
// ( ( m_pGcmState->vertAttrOutputMask & ( 1 << ( 6 + 1 ) ) ) != 0 ) ? CELL_GCM_USER_CLIP_PLANE_ENABLE_GE : 0,
// ( ( m_pGcmState->vertAttrOutputMask & ( 1 << ( 6 + 2 ) ) ) != 0 ) ? CELL_GCM_USER_CLIP_PLANE_ENABLE_GE : 0,
// ( ( m_pGcmState->vertAttrOutputMask & ( 1 << ( 6 + 3 ) ) ) != 0 ) ? CELL_GCM_USER_CLIP_PLANE_ENABLE_GE : 0,
// ( ( m_pGcmState->vertAttrOutputMask & ( 1 << ( 6 + 4 ) ) ) != 0 ) ? CELL_GCM_USER_CLIP_PLANE_ENABLE_GE : 0,
// ( ( m_pGcmState->vertAttrOutputMask & ( 1 << ( 6 + 5 ) ) ) != 0 ) ? CELL_GCM_USER_CLIP_PLANE_ENABLE_GE : 0
// );
}
uint setVertexAttribOutputMask = ( nMask & ( kDirtyVxShader | kDirtyPxShader ) );
uint nVertexToFragmentProgramAttributeMask = m_pVertexShaderData->m_attributeOutputMask;
if ( m_pPixelShaderData )
{
nVertexToFragmentProgramAttributeMask = m_pPixelShaderData->m_attributeInputMask;
nVertexToFragmentProgramAttributeMask |= 0x20;
BindFragmentProgram( nVertexToFragmentProgramAttributeMask );
}
else
{
// we need to set the shader, but no shader specified, so set the default empty shader
if ( nMask & ( kDirtyPxShader | kDirtyPxConstants ) )
{
CELL_GCM_METHOD_SET_SHADER_CONTROL( gpGcmContext->current, g_ps3gcmGlobalState.m_nPsEmptyShaderControl0 ); // +2
CELL_GCM_METHOD_SET_SHADER_PROGRAM( gpGcmContext->current, CELL_GCM_LOCATION_LOCAL + 1,
( g_ps3gcmGlobalState.m_pShaderPsEmptyBuffer.Offset() & 0x1fffffff ) ); // +2
CELL_GCM_METHOD_SET_VERTEX_ATTRIB_OUTPUT_MASK( gpGcmContext->current, g_ps3gcmGlobalState.m_nPsEmptyAttributeInputMask | 0x20 );
}
}
if ( setVertexAttribOutputMask )
{
GCM_FUNC( cellGcmSetVertexAttribOutputMask, nVertexToFragmentProgramAttributeMask );
}
}
inline void ZeroFPConsts()
{
memset(g_aFPConst, 0, sizeof(g_aFPConst));
}
inline void ZeroVPConsts()
{
GCM_FUNC( cellGcmSetVertexProgramParameterBlock, 0, GCM_DS_MAXVPCONST, (float*)g_aVPConst);
}
#ifndef SPU
inline void CGcmDrawState::EndFrame()
{
m_cmd = CmdEndFrame;
SendToSpu();
}
#endif
#ifndef SPU
inline void CGcmDrawState::CommitStates()
{
m_cmd = CmdCommitStates;
SendToSpu();
}
#else
inline void CGcmDrawState::CommitStates()
{
if (m_nFreeLabel) UnpackSetWriteBackEndLabel(GCM_LABEL_MEMORY_FREE, m_nFreeLabel);
if ( m_dirtyStatesMask & kDirtyResetRsx) UnpackResetRsxState();
if (m_dirtyStatesMask & kDirtyZeroAllPSConsts) ZeroFPConsts();
if (m_dirtyStatesMask & kDirtyZeroAllVSConsts) ZeroVPConsts();
UnpackData(); // Pulls out pixel shader consts and sets vertex shader consts
CommitRenderStates();
}
#endif
inline void CGcmDrawState::CommitAll(IDirect3DVertexDeclaration9 * pDecl, uint32 baseVertexIndex)
{
if (m_nFreeLabel) UnpackSetWriteBackEndLabel(GCM_LABEL_MEMORY_FREE, m_nFreeLabel);
if ( m_dirtyStatesMask & kDirtyResetRsx) UnpackResetRsxState();
if (m_dirtyStatesMask & kDirtyZeroAllPSConsts) ZeroFPConsts();
if (m_dirtyStatesMask & kDirtyZeroAllVSConsts) ZeroVPConsts();
UnpackData(); // Pulls out pixel shader consts and sets vertex shader consts
#ifdef SPU
extern void GetTextureLayouts();
GetTextureLayouts();
#endif
CommitRenderStates();
CommitVertexBindings(pDecl, baseVertexIndex);
CommitSamplers();
CommitShaders();
}
//--------------------------------------------------------------------------------------------------
// Draw Prim
//--------------------------------------------------------------------------------------------------
#ifndef SPU
inline void CGcmDrawState::DrawPrimitiveUP( IDirect3DVertexDeclaration9 * pDecl, D3DPRIMITIVETYPE nPrimitiveType,UINT nPrimitiveCount,
CONST void *pVertexStreamZeroData, UINT nVertexStreamZeroStride )
{
// Put drawcall into call buffer
uint32 callAddr = g_ps3gcmGlobalState.DrawPrimitiveUP(nPrimitiveType, nPrimitiveCount, pVertexStreamZeroData, nVertexStreamZeroStride);
// Allocate space to patch frag prog
if ( m_pPixelShaderData)
{
AllocateUcode((FpHeader_t*)m_pPixelShaderData->m_eaFp);
}
// if (m_param[0] > uint32(0xD0000000) )
// Error("Decl on Stack\n");
m_cmd = CmdDrawPrimUP;
m_param[0] = uintp(pDecl);
m_param[1] = callAddr + g_ps3gcmGlobalState.m_nIoOffsetDelta;
m_param[2] = nVertexStreamZeroStride;
m_param[4] = (uint32)&g_ps3texFormats;
SendToSpu();
}
inline void CGcmDrawState::DrawIndexedPrimitive( uint32 offset, IDirect3DVertexDeclaration9 * pDecl, D3DPRIMITIVETYPE Type,INT BaseVertexIndex,UINT MinVertexIndex,
UINT NumVertices,UINT startIndex,UINT nDrawPrimCount )
{
uint8 uiGcmMode = GetGcmMode(Type);
if( !uiGcmMode ) Error("PS3 : Unsupported prim type\n");
uint32 nPartitionStartIndex = startIndex;
uint nPartitionPrimCount = nDrawPrimCount;
uint32 uiGcmCount = GetGcmCount( Type, nPartitionPrimCount );
uint32 ioMemoryIndexBuffer = offset + nPartitionStartIndex * sizeof( uint16 ) ;
if (uiGcmCount)
{
if ( m_pPixelShaderData)
{
AllocateUcode((FpHeader_t*)m_pPixelShaderData->m_eaFp);
}
m_param[0] = uintp(pDecl);
m_param[1] = BaseVertexIndex;
m_param[2] = uiGcmMode;
m_param[3] = ioMemoryIndexBuffer;
m_param[4] = (uint32)&g_ps3texFormats;
m_param[5] = uiGcmCount;
m_cmd = CmdDrawPrim;
SendToSpu();
}
}
#endif
//--------------------------------------------------------------------------------------------------
// Execute command shader buffers
//--------------------------------------------------------------------------------------------------
template<class T> FORCEINLINE T GetData( uint8 *pData )
{
return * ( reinterpret_cast< T const *>( pData ) );
}
inline void CGcmDrawState::BindTexture2( CPs3BindTexture_t bindTex)
{
// On SPU, we need to pull in the lmblock to get the correct offset
#ifdef SPU
extern CPs3gcmLocalMemoryBlock gLmBlock;
gSpuMgr.DmaGetUNSAFE(&gLmBlock, uintp(bindTex.m_pLmBlock), sizeof(gLmBlock), SPU_DMAGET_TAG );
#endif
// Check for same texture ?
// Check for NULL texture ?
uint32 stage = bindTex.m_sampler;
if(bindTex.m_nLayout)
{
// Msg("New Bind Flags %d\n", bindTex.m_nBindFlags);
// if(gBind != bindTex.m_nBindFlags) DebuggerBreak();
SetSamplerState( stage, D3DSAMP_SRGBTEXTURE, ( bindTex.m_nBindFlags & (TEXTURE_BINDFLAGS_SRGBREAD>>24) ) != 0 );
SetSamplerState( stage, D3DSAMP_SHADOWFILTER, ( bindTex.m_nBindFlags & (TEXTURE_BINDFLAGS_SHADOWDEPTH>>24) ) ? 1 : 0 );
SetSamplerState( stage, D3DSAMP_ADDRESSU, bindTex.m_UWrap );
SetSamplerState( stage, D3DSAMP_ADDRESSV, bindTex.m_VWrap );
SetSamplerState( stage, D3DSAMP_ADDRESSW, bindTex.m_WWrap );
SetSamplerState( stage, D3DSAMP_MINFILTER, bindTex.m_minFilter );
SetSamplerState( stage, D3DSAMP_MAGFILTER, bindTex.m_magFilter );
SetSamplerState( stage, D3DSAMP_MIPFILTER, bindTex.m_mipFilter );
// if (m_textures[stage].m_nLocalOffset != bindTex.m_pLmBlock->Offset()) DebuggerBreak();
// if (m_textures[stage].m_eaLayout != bindTex.m_nLayout) DebuggerBreak();
#ifdef SPU
gSpuMgr.DmaDone(SPU_DMAGET_TAG_WAIT);
bindTex.m_pLmBlock = &gLmBlock;
#endif
m_textures[stage].m_nLocalOffset = bindTex.m_pLmBlock->Offset();
m_textures[stage].m_eaLayout = bindTex.m_nLayout;
if (bindTex.m_pLmBlock->IsLocalMemory() )
{
m_textures[stage].m_nLocalOffset |= 1;
}
m_dirtySamplersMask |= ( 1 << stage );
//PackData(kDataTexture, stage, m_textures[stage].m_nLocalOffset, m_textures[stage].m_eaLayout );
UnpackSetTexture(stage, m_textures[stage].m_nLocalOffset, m_textures[stage].m_eaLayout );
}
else
{
#ifdef SPU
gSpuMgr.DmaDone(SPU_DMAGET_TAG_WAIT);
#endif
UnpackResetTexture(stage);
}
}
inline void CGcmDrawState::SetVertexShaderConstantInternal( int var, float const* pVec, int numVecs, bool bForce)
{
GCM_FUNC( cellGcmSetVertexProgramParameterBlock, var, numVecs, pVec );
}
inline void CGcmDrawState::SetPixelShaderConstantInternal( int var, float const* pValues, int nNumConsts, bool bForce)
{
V_memcpy(&g_aFPConst[var], pValues, nNumConsts * 16);
}
#ifndef SPU
#include "shaderapifast.h"
#endif
void CGcmDrawState::ExecuteCommandBuffer( uint8 *pCmdBuf )
{
#ifndef SPU
int* pOffset = (int*) (pCmdBuf + sizeof(int) + (2*sizeof(int)));
for ( int i = 0; i < CBCMD_MAX_PS3TEX; i++)
{
uint32 offset = pOffset[i];
if (!offset) break;
CPs3BindParams_t* pBindParams = (CPs3BindParams_t*)(offset + pCmdBuf);
CPs3BindTexture_t tex;
CPs3BindTexture_t* pTex = &tex;
pTex->m_sampler = pBindParams->m_sampler;
pTex->m_nBindFlags = pBindParams->m_nBindFlags;
pTex->m_boundStd = pBindParams->m_boundStd;
pTex->m_hTexture = pBindParams->m_hTexture;
if (pTex->m_boundStd == -1)
{
ShaderApiFast( pShaderAPI )->GetPs3Texture(pTex, (ShaderAPITextureHandle_t)pTex->m_hTexture);
}
else
{
ShaderApiFast( pShaderAPI )->GetPs3Texture(pTex, (StandardTextureId_t)pTex->m_boundStd);
}
PackData(kDataEcbTexture, (uint8) i, sizeof(CPs3BindTexture_t), pTex);
}
#endif
m_aECB[m_nNumECB] = pCmdBuf;
uint32 size = *((uint32*)(pCmdBuf+4));
m_aSizeECB[m_nNumECB] = size;
m_nNumECB++;
PackData(kDataECB);
}
void CGcmDrawState::UnpackExecuteCommandBuffer( uint8 *pCmdBuf )
{
uint8* pStart = pCmdBuf;
uint8 *pReturnStack[20];
uint8 **pSP = &pReturnStack[ARRAYSIZE(pReturnStack)];
uint8 *pLastCmd;
for(;;)
{
uint8 *pCmd=pCmdBuf;
int nCmd = GetData<int>( pCmdBuf );
if (nCmd > CBCMD_SET_VERTEX_SHADER_NEARZFARZ_STATE) DebuggerBreak();
switch( nCmd )
{
case CBCMD_END:
{
if ( pSP == &pReturnStack[ARRAYSIZE(pReturnStack)] )
return;
else
{
// pop pc
pCmdBuf = *( pSP ++ );
break;
}
}
case CBCMD_JUMP:
pCmdBuf = GetData<uint8 *>( pCmdBuf + sizeof( int ) );
break;
case CBCMD_JSR:
{
Assert( pSP > &(pReturnStack[0] ) );
// *(--pSP ) = pCmdBuf + sizeof( int ) + sizeof( uint8 *);
// pCmdBuf = GetData<uint8 *>( pCmdBuf + sizeof( int ) );
UnpackExecuteCommandBuffer( GetData<uint8 *>( pCmdBuf + sizeof( int ) ) );
pCmdBuf = pCmdBuf + sizeof( int ) + sizeof( uint8 *);
break;
}
case CBCMD_SET_PIXEL_SHADER_FLOAT_CONST:
{
int nStartConst = GetData<int>( pCmdBuf + sizeof( int ) );
int nNumConsts = GetData<int>( pCmdBuf + 2 * sizeof( int ) );
float const *pValues = reinterpret_cast< float const *> ( pCmdBuf + 3 * sizeof( int ) );
pCmdBuf += nNumConsts * 4 * sizeof( float ) + 3 * sizeof( int );
SetPixelShaderConstantInternal( nStartConst, pValues, nNumConsts, false );
break;
}
case CBCMD_SETPIXELSHADERFOGPARAMS:
{
Error("Pixel Shader Fog params not supported\n");
break;
}
case CBCMD_STORE_EYE_POS_IN_PSCONST:
{
int nReg = GetData<int>( pCmdBuf + sizeof( int ) );
float flWValue = GetData<float>( pCmdBuf + 2 * sizeof( int ) );
pCmdBuf += 2 * sizeof( int ) + sizeof( float );
float vecValue[4];
memcpy(vecValue, m_vecWorldSpaceCameraPosition, sizeof(vecValue));
vecValue[3] = flWValue;
SetPixelShaderConstantInternal( nReg, vecValue, 1, false );
break;
}
case CBCMD_SET_DEPTH_FEATHERING_CONST:
{
// int nConst = GetData<int>( pCmdBuf + sizeof( int ) );
// float fDepthBlendScale = GetData<float>( pCmdBuf + 2 * sizeof( int ) );
pCmdBuf += 2 * sizeof( int ) + sizeof( float );
// SetDepthFeatheringPixelShaderConstant( nConst, fDepthBlendScale );
break;
}
case CBCMD_SET_VERTEX_SHADER_FLOAT_CONST:
{
int nStartConst = GetData<int>( pCmdBuf + sizeof( int ) );
int nNumConsts = GetData<int>( pCmdBuf + 2 * sizeof( int ) );
float const *pValues = reinterpret_cast< float const *> ( pCmdBuf + 3 * sizeof( int ) );
pCmdBuf += nNumConsts * 4 * sizeof( float ) + 3 * sizeof( int );
SetVertexShaderConstantInternal( nStartConst, pValues, nNumConsts, false );
break;
}
case CBCMD_BIND_PS3_TEXTURE:
{
CPs3BindParams_t params = GetData<CPs3BindParams_t> (pCmdBuf + sizeof( int ));
CPs3BindTexture_t tex = m_aBindTexture[params.m_nBindTexIndex];
gpGcmDrawState->BindTexture2( tex );
pCmdBuf += sizeof(int) + sizeof(params);
break;
}
case CBCMD_BIND_PS3_STANDARD_TEXTURE:
{
CPs3BindParams_t params = GetData<CPs3BindParams_t> (pCmdBuf + sizeof( int ));
CPs3BindTexture_t tex = m_aBindTexture[params.m_nBindTexIndex];
if (m_pFixed->m_nInstanced)
{
uint32 nBindFlags = tex.m_nBindFlags;
uint32 nSampler = tex.m_sampler;
switch (tex.m_boundStd)
{
case TEXTURE_LOCAL_ENV_CUBEMAP:
if (m_pFixed->m_nInstanced & GCM_DS_INST_ENVMAP) tex = m_pFixed->m_instanceEnvCubemap;
break;
case TEXTURE_LIGHTMAP:
if (m_pFixed->m_nInstanced & GCM_DS_INST_LIGHTMAP) tex = m_pFixed->m_instanceLightmap;
break;
case TEXTURE_PAINT:
if (m_pFixed->m_nInstanced & GCM_DS_INST_PAINTMAP) tex = m_pFixed->m_instancePaintmap;
break;
}
tex.m_nBindFlags = nBindFlags;
tex.m_sampler = nSampler;
}
// Bind texture
gpGcmDrawState->BindTexture2( tex );
// Twice more for bumped...
if ( (tex.m_boundStd == TEXTURE_LIGHTMAP_BUMPED) || (tex.m_boundStd == TEXTURE_LIGHTMAP_BUMPED))
{
tex.m_sampler++;
gpGcmDrawState->BindTexture2( tex );
tex.m_sampler++;
gpGcmDrawState->BindTexture2( tex );
}
pCmdBuf += sizeof(int) + sizeof(params);
break;
}
case CBCMD_PS3TEX:
{
pCmdBuf += sizeof(int) + (CBCMD_MAX_PS3TEX*sizeof(int));
break;
}
case CBCMD_LENGTH:
{
pCmdBuf += sizeof(int) *2 ;
break;
}
case CBCMD_SET_PSHINDEX:
{
// int nIdx = GetData<int>( pCmdBuf + sizeof( int ) );
// ShaderManager()->SetPixelShaderIndex( nIdx );
// pCmdBuf += 2 * sizeof( int );
Error("PSHINDEX Not Supported\n");
break;
}
case CBCMD_SET_VSHINDEX:
{
// int nIdx = GetData<int>( pCmdBuf + sizeof( int ) );
// ShaderManager()->SetVertexShaderIndex( nIdx );
pCmdBuf += 2 * sizeof( int );
Error("VSHINDEX Not Supported\n");
break;
}
case CBCMD_SET_VERTEX_SHADER_FLASHLIGHT_STATE:
{
// int nStartConst = GetData<int>( pCmdBuf + sizeof( int ) );
// SetVertexShaderConstantInternal( nStartConst, m_FlashlightWorldToTexture.Base(), 4, false );
// pCmdBuf += 2 * sizeof( int );
// Error("Flashlight unsupported\n");
pCmdBuf += 2 * sizeof( int );
break;
}
case CBCMD_SET_VERTEX_SHADER_NEARZFARZ_STATE:
{
Error("SetVertexShaderNearAndFarZ NOt SUPPORTED\n");
// int nStartConst = GetData<int>( pCmdBuf + sizeof( int ) );
//
// VMatrix m;
//
// m = m_MaterialProjectionMatrix;
//
// // GetMatrix( MATERIAL_PROJECTION, m.m[0] );
//
// // m[2][2] = F/(N-F) (flip sign if RH)
// // m[3][2] = NF/(N-F)
//
// float vNearFar[4];
//
// float N = m[3][2] / m[2][2];
// float F = (m[3][2]*N) / (N + m[3][2]);
//
// vNearFar[0] = N;
// vNearFar[1] = F;
//
// SetVertexShaderConstantInternal( nStartConst, vNearFar, 1, false );
pCmdBuf += 2 * sizeof( int );
break;
}
case CBCMD_SET_PIXEL_SHADER_FLASHLIGHT_STATE:
{
// int nLightSampler = GetData<int>( pCmdBuf + sizeof( int ) );
// int nDepthSampler = GetData<int>( pCmdBuf + 2 * sizeof( int ) );
// int nShadowNoiseSampler = GetData<int>( pCmdBuf + 3 * sizeof( int ) );
// int nColorConst = GetData<int>( pCmdBuf + 4 * sizeof( int ) );
// int nAttenConst = GetData<int>( pCmdBuf + 5 * sizeof( int ) );
// int nOriginConst = GetData<int>( pCmdBuf + 6 * sizeof( int ) );
// int nDepthTweakConst = GetData<int>( pCmdBuf + 7 * sizeof( int ) );
// int nScreenScaleConst = GetData<int>( pCmdBuf + 8 * sizeof( int ) );
// int nWorldToTextureConstant = GetData<int>( pCmdBuf + 9 * sizeof( int ) );
// bool bFlashlightNoLambert = GetData<int>( pCmdBuf + 10 * sizeof( int ) ) != 0;
// bool bSinglePassFlashlight = GetData<int>( pCmdBuf + 11 * sizeof( int ) ) != 0;
// pCmdBuf += 12 * sizeof( int );
//
// ShaderAPITextureHandle_t hTexture = g_pShaderUtil->GetShaderAPITextureBindHandle( m_FlashlightState.m_pSpotlightTexture, m_FlashlightState.m_nSpotlightTextureFrame, 0 );
// BindTexture( (Sampler_t)nLightSampler, TEXTURE_BINDFLAGS_SRGBREAD, hTexture ); // !!!BUG!!!srgb or not?
//
// SetPixelShaderConstantInternal( nAttenConst, m_pFlashlightAtten, 1, false );
// SetPixelShaderConstantInternal( nOriginConst, m_pFlashlightPos, 1, false );
//
// m_pFlashlightColor[3] = bFlashlightNoLambert ? 2.0f : 0.0f; // This will be added to N.L before saturate to force a 1.0 N.L term
//
// // DX10 hardware and single pass flashlight require a hack scalar since the flashlight is added in linear space
// float flashlightColor[4] = { m_pFlashlightColor[0], m_pFlashlightColor[1], m_pFlashlightColor[2], m_pFlashlightColor[3] };
// if ( ( g_pHardwareConfig->UsesSRGBCorrectBlending() ) || ( bSinglePassFlashlight ) )
// {
// // Magic number that works well on the 360 and NVIDIA 8800
// flashlightColor[0] *= 2.5f;
// flashlightColor[1] *= 2.5f;
// flashlightColor[2] *= 2.5f;
// }
//
// SetPixelShaderConstantInternal( nColorConst, flashlightColor, 1, false );
//
// if ( nWorldToTextureConstant >= 0 )
// {
// SetPixelShaderConstantInternal( nWorldToTextureConstant, m_FlashlightWorldToTexture.Base(), 4, false );
// }
//
// BindStandardTexture( (Sampler_t)nShadowNoiseSampler, TEXTURE_BINDFLAGS_NONE, TEXTURE_SHADOW_NOISE_2D );
// if( m_pFlashlightDepthTexture && m_FlashlightState.m_bEnableShadows && ShaderUtil()->GetConfig().ShadowDepthTexture() )
// {
// ShaderAPITextureHandle_t hDepthTexture = g_pShaderUtil->GetShaderAPITextureBindHandle( m_pFlashlightDepthTexture, 0, 0 );
// BindTexture( (Sampler_t)nDepthSampler, TEXTURE_BINDFLAGS_SHADOWDEPTH, hDepthTexture );
//
// SetPixelShaderConstantInternal( nDepthTweakConst, m_pFlashlightTweaks, 1, false );
//
// // Dimensions of screen, used for screen-space noise map sampling
// float vScreenScale[4] = {1280.0f / 32.0f, 720.0f / 32.0f, 0, 0};
// int nWidth, nHeight;
// BaseClass::GetBackBufferDimensions( nWidth, nHeight );
//
// int nTexWidth, nTexHeight;
// GetStandardTextureDimensions( &nTexWidth, &nTexHeight, TEXTURE_SHADOW_NOISE_2D );
//
// vScreenScale[0] = (float) nWidth / nTexWidth;
// vScreenScale[1] = (float) nHeight / nTexHeight;
// vScreenScale[2] = 1.0f / m_FlashlightState.m_flShadowMapResolution;
// vScreenScale[3] = 2.0f / m_FlashlightState.m_flShadowMapResolution;
// SetPixelShaderConstantInternal( nScreenScaleConst, vScreenScale, 1, false );
// }
// else
// {
// BindStandardTexture( (Sampler_t)nDepthSampler, TEXTURE_BINDFLAGS_NONE, TEXTURE_WHITE );
// }
// Error("Flashlight unsupported\n");
pCmdBuf += 12 * sizeof( int );
break;
}
case CBCMD_SET_PIXEL_SHADER_UBERLIGHT_STATE:
{
// int iEdge0Const = GetData<int>( pCmdBuf + sizeof( int ) );
// int iEdge1Const = GetData<int>( pCmdBuf + 2 * sizeof( int ) );
// int iEdgeOOWConst = GetData<int>( pCmdBuf + 3 * sizeof( int ) );
// int iShearRoundConst = GetData<int>( pCmdBuf + 4 * sizeof( int ) );
// int iAABBConst = GetData<int>( pCmdBuf + 5 * sizeof( int ) );
// int iWorldToLightConst = GetData<int>( pCmdBuf + 6 * sizeof( int ) );
pCmdBuf += 7 * sizeof( int );
//
// SetPixelShaderConstantInternal( iEdge0Const, m_UberlightRenderState.m_vSmoothEdge0.Base(), 1, false );
// SetPixelShaderConstantInternal( iEdge1Const, m_UberlightRenderState.m_vSmoothEdge1.Base(), 1, false );
// SetPixelShaderConstantInternal( iEdgeOOWConst, m_UberlightRenderState.m_vSmoothOneOverW.Base(), 1, false );
// SetPixelShaderConstantInternal( iShearRoundConst, m_UberlightRenderState.m_vShearRound.Base(), 1, false );
// SetPixelShaderConstantInternal( iAABBConst, m_UberlightRenderState.m_vaAbB.Base(), 1, false );
// SetPixelShaderConstantInternal( iWorldToLightConst, m_UberlightRenderState.m_WorldToLight.Base(), 4, false );
Error("Uberlight state unsupported\n");
break;
}
#ifndef NDEBUG
default:
Assert(0);
break;
#endif
}
pLastCmd = pCmd;
}
}
inline void CGcmDrawState::TextureReplace(uint32 id, CPs3BindTexture_t tex)
{
switch (id)
{
case TEXTURE_LOCAL_ENV_CUBEMAP:
m_pFixed->m_nInstanced |= GCM_DS_INST_ENVMAP;
m_pFixed->m_instanceEnvCubemap = tex;
break;
case TEXTURE_LIGHTMAP:
m_pFixed->m_nInstanced |= GCM_DS_INST_LIGHTMAP;
m_pFixed->m_instanceLightmap = tex;
break;
case TEXTURE_PAINT:
m_pFixed->m_nInstanced |= GCM_DS_INST_ENVMAP;
m_pFixed->m_instancePaintmap = tex;
break;
}
}
#endif // INCLUDED_GCMDRAWSTATE_H