source-engine/materialsystem/stdshaders/common_flashlight_fxc.h

822 lines
36 KiB
C
Raw Normal View History

2020-04-22 12:56:21 -04:00
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose: Common pixel shader code specific to flashlights
//
// $NoKeywords: $
//
//=============================================================================//
#ifndef COMMON_FLASHLIGHT_FXC_H_
#define COMMON_FLASHLIGHT_FXC_H_
#include "common_ps_fxc.h"
// JasonM - TODO: remove this simpleton version
float DoShadow( sampler DepthSampler, float4 texCoord )
{
const float g_flShadowBias = 0.0005f;
float2 uoffset = float2( 0.5f/512.f, 0.0f );
float2 voffset = float2( 0.0f, 0.5f/512.f );
float3 projTexCoord = texCoord.xyz / texCoord.w;
float4 flashlightDepth = float4( tex2D( DepthSampler, projTexCoord + uoffset + voffset ).x,
tex2D( DepthSampler, projTexCoord + uoffset - voffset ).x,
tex2D( DepthSampler, projTexCoord - uoffset + voffset ).x,
tex2D( DepthSampler, projTexCoord - uoffset - voffset ).x );
# if ( defined( REVERSE_DEPTH_ON_X360 ) )
{
flashlightDepth = 1.0f - flashlightDepth;
}
# endif
float shadowed = 0.0f;
float z = texCoord.z/texCoord.w;
float4 dz = float4(z,z,z,z) - (flashlightDepth + float4( g_flShadowBias, g_flShadowBias, g_flShadowBias, g_flShadowBias));
float4 shadow = float4(0.25f,0.25f,0.25f,0.25f);
if( dz.x <= 0.0f )
shadowed += shadow.x;
if( dz.y <= 0.0f )
shadowed += shadow.y;
if( dz.z <= 0.0f )
shadowed += shadow.z;
if( dz.w <= 0.0f )
shadowed += shadow.w;
return shadowed;
}
float DoShadowNvidiaRAWZOneTap( sampler DepthSampler, const float4 shadowMapPos )
{
float ooW = 1.0f / shadowMapPos.w; // 1 / w
float3 shadowMapCenter_objDepth = shadowMapPos.xyz * ooW; // Do both projections at once
float2 shadowMapCenter = shadowMapCenter_objDepth.xy; // Center of shadow filter
float objDepth = shadowMapCenter_objDepth.z; // Object depth in shadow space
float fDepth = dot(tex2D(DepthSampler, shadowMapCenter).arg, float3(0.996093809371817670572857294849, 0.0038909914428586627756752238080039, 1.5199185323666651467481343000015e-5));
return fDepth > objDepth;
}
float DoShadowNvidiaRAWZ( sampler DepthSampler, const float4 shadowMapPos )
{
float fE = 1.0f / 512.0f; // Epsilon
float ooW = 1.0f / shadowMapPos.w; // 1 / w
float3 shadowMapCenter_objDepth = shadowMapPos.xyz * ooW; // Do both projections at once
float2 shadowMapCenter = shadowMapCenter_objDepth.xy; // Center of shadow filter
float objDepth = shadowMapCenter_objDepth.z; // Object depth in shadow space
float4 vDepths;
vDepths.x = dot(tex2D(DepthSampler, shadowMapCenter + float2( fE, fE )).arg, float3(0.996093809371817670572857294849, 0.0038909914428586627756752238080039, 1.5199185323666651467481343000015e-5));
vDepths.y = dot(tex2D(DepthSampler, shadowMapCenter + float2( -fE, fE )).arg, float3(0.996093809371817670572857294849, 0.0038909914428586627756752238080039, 1.5199185323666651467481343000015e-5));
vDepths.z = dot(tex2D(DepthSampler, shadowMapCenter + float2( fE, -fE )).arg, float3(0.996093809371817670572857294849, 0.0038909914428586627756752238080039, 1.5199185323666651467481343000015e-5));
vDepths.w = dot(tex2D(DepthSampler, shadowMapCenter + float2( -fE, -fE )).arg, float3(0.996093809371817670572857294849, 0.0038909914428586627756752238080039, 1.5199185323666651467481343000015e-5));
return dot(vDepths > objDepth.xxxx, float4(0.25, 0.25, 0.25, 0.25));
}
float DoShadowNvidiaCheap( sampler DepthSampler, const float4 shadowMapPos )
{
float fTexelEpsilon = 1.0f / 1024.0f;
float ooW = 1.0f / shadowMapPos.w; // 1 / w
float3 shadowMapCenter_objDepth = shadowMapPos.xyz * ooW; // Do both projections at once
float2 shadowMapCenter = shadowMapCenter_objDepth.xy; // Center of shadow filter
float objDepth = shadowMapCenter_objDepth.z; // Object depth in shadow space
float4 vTaps;
vTaps.x = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( fTexelEpsilon, fTexelEpsilon), objDepth, 1 ) ).x;
vTaps.y = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( -fTexelEpsilon, fTexelEpsilon), objDepth, 1 ) ).x;
vTaps.z = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( fTexelEpsilon, -fTexelEpsilon), objDepth, 1 ) ).x;
vTaps.w = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( -fTexelEpsilon, -fTexelEpsilon), objDepth, 1 ) ).x;
return dot(vTaps, float4(0.25, 0.25, 0.25, 0.25));
}
float DoShadowNvidiaPCF3x3Box( sampler DepthSampler, const float4 shadowMapPos )
{
float fTexelEpsilon = 1.0f / 1024.0f;
float ooW = 1.0f / shadowMapPos.w; // 1 / w
float3 shadowMapCenter_objDepth = shadowMapPos.xyz * ooW; // Do both projections at once
float2 shadowMapCenter = shadowMapCenter_objDepth.xy; // Center of shadow filter
float objDepth = shadowMapCenter_objDepth.z; // Object depth in shadow space
float4 vOneTaps;
vOneTaps.x = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( fTexelEpsilon, fTexelEpsilon ), objDepth, 1 ) ).x;
vOneTaps.y = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( -fTexelEpsilon, fTexelEpsilon ), objDepth, 1 ) ).x;
vOneTaps.z = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( fTexelEpsilon, -fTexelEpsilon ), objDepth, 1 ) ).x;
vOneTaps.w = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( -fTexelEpsilon, -fTexelEpsilon ), objDepth, 1 ) ).x;
float flOneTaps = dot( vOneTaps, float4(1.0f / 9.0f, 1.0f / 9.0f, 1.0f / 9.0f, 1.0f / 9.0f));
float4 vTwoTaps;
vTwoTaps.x = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( fTexelEpsilon, 0 ), objDepth, 1 ) ).x;
vTwoTaps.y = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( -fTexelEpsilon, 0 ), objDepth, 1 ) ).x;
vTwoTaps.z = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( 0, -fTexelEpsilon ), objDepth, 1 ) ).x;
vTwoTaps.w = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( 0, -fTexelEpsilon ), objDepth, 1 ) ).x;
float flTwoTaps = dot( vTwoTaps, float4(1.0f / 9.0f, 1.0f / 9.0f, 1.0f / 9.0f, 1.0f / 9.0f));
float flCenterTap = tex2Dproj( DepthSampler, float4( shadowMapCenter, objDepth, 1 ) ).x * (1.0f / 9.0f);
// Sum all 9 Taps
return flOneTaps + flTwoTaps + flCenterTap;
}
//
// 1 4 7 4 1
// 4 20 33 20 4
// 7 33 55 33 7
// 4 20 33 20 4
// 1 4 7 4 1
//
float DoShadowNvidiaPCF5x5Gaussian( sampler DepthSampler, const float4 shadowMapPos )
{
float fEpsilon = 1.0f / 512.0f;
float fTwoEpsilon = 2.0f * fEpsilon;
float ooW = 1.0f / shadowMapPos.w; // 1 / w
float3 shadowMapCenter_objDepth = shadowMapPos.xyz * ooW; // Do both projections at once
float2 shadowMapCenter = shadowMapCenter_objDepth.xy; // Center of shadow filter
float objDepth = shadowMapCenter_objDepth.z; // Object depth in shadow space
float4 vOneTaps;
vOneTaps.x = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( fTwoEpsilon, fTwoEpsilon ), objDepth, 1 ) ).x;
vOneTaps.y = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( -fTwoEpsilon, fTwoEpsilon ), objDepth, 1 ) ).x;
vOneTaps.z = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( fTwoEpsilon, -fTwoEpsilon ), objDepth, 1 ) ).x;
vOneTaps.w = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( -fTwoEpsilon, -fTwoEpsilon ), objDepth, 1 ) ).x;
float flOneTaps = dot( vOneTaps, float4(1.0f / 331.0f, 1.0f / 331.0f, 1.0f / 331.0f, 1.0f / 331.0f));
float4 vSevenTaps;
vSevenTaps.x = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( fTwoEpsilon, 0 ), objDepth, 1 ) ).x;
vSevenTaps.y = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( -fTwoEpsilon, 0 ), objDepth, 1 ) ).x;
vSevenTaps.z = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( 0, -fTwoEpsilon ), objDepth, 1 ) ).x;
vSevenTaps.w = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( 0, -fTwoEpsilon ), objDepth, 1 ) ).x;
float flSevenTaps = dot( vSevenTaps, float4( 7.0f / 331.0f, 7.0f / 331.0f, 7.0f / 331.0f, 7.0f / 331.0f ) );
float4 vFourTapsA, vFourTapsB;
vFourTapsA.x = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( fTwoEpsilon, fEpsilon ), objDepth, 1 ) ).x;
vFourTapsA.y = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( fEpsilon, fTwoEpsilon ), objDepth, 1 ) ).x;
vFourTapsA.z = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( -fEpsilon, fTwoEpsilon ), objDepth, 1 ) ).x;
vFourTapsA.w = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( -fTwoEpsilon, fEpsilon ), objDepth, 1 ) ).x;
vFourTapsB.x = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( -fTwoEpsilon, -fEpsilon ), objDepth, 1 ) ).x;
vFourTapsB.y = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( -fEpsilon, -fTwoEpsilon ), objDepth, 1 ) ).x;
vFourTapsB.z = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( fEpsilon, -fTwoEpsilon ), objDepth, 1 ) ).x;
vFourTapsB.w = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( fTwoEpsilon, -fEpsilon ), objDepth, 1 ) ).x;
float flFourTapsA = dot( vFourTapsA, float4( 4.0f / 331.0f, 4.0f / 331.0f, 4.0f / 331.0f, 4.0f / 331.0f ) );
float flFourTapsB = dot( vFourTapsB, float4( 4.0f / 331.0f, 4.0f / 331.0f, 4.0f / 331.0f, 4.0f / 331.0f ) );
float4 v20Taps;
v20Taps.x = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( fEpsilon, fEpsilon ), objDepth, 1 ) ).x;
v20Taps.y = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( -fEpsilon, fEpsilon ), objDepth, 1 ) ).x;
v20Taps.z = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( fEpsilon, -fEpsilon ), objDepth, 1 ) ).x;
v20Taps.w = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( -fEpsilon, -fEpsilon ), objDepth, 1 ) ).x;
float fl20Taps = dot( v20Taps, float4(20.0f / 331.0f, 20.0f / 331.0f, 20.0f / 331.0f, 20.0f / 331.0f));
float4 v33Taps;
v33Taps.x = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( fEpsilon, 0 ), objDepth, 1 ) ).x;
v33Taps.y = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( -fEpsilon, 0 ), objDepth, 1 ) ).x;
v33Taps.z = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( 0, -fEpsilon ), objDepth, 1 ) ).x;
v33Taps.w = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( 0, -fEpsilon ), objDepth, 1 ) ).x;
float fl33Taps = dot( v33Taps, float4(33.0f / 331.0f, 33.0f / 331.0f, 33.0f / 331.0f, 33.0f / 331.0f));
float flCenterTap = tex2Dproj( DepthSampler, float4( shadowMapCenter, objDepth, 1 ) ).x * (55.0f / 331.0f);
// Sum all 25 Taps
return flOneTaps + flSevenTaps + +flFourTapsA + flFourTapsB + fl20Taps + fl33Taps + flCenterTap;
}
float DoShadowATICheap( sampler DepthSampler, const float4 shadowMapPos )
{
float2 shadowMapCenter = shadowMapPos.xy/shadowMapPos.w;
float objDepth = shadowMapPos.z / shadowMapPos.w;
float fSampleDepth = tex2D( DepthSampler, shadowMapCenter ).x;
objDepth = min( objDepth, 0.99999 ); //HACKHACK: On 360, surfaces at or past the far flashlight plane have an abrupt cutoff. This is temp until a smooth falloff is implemented
return fSampleDepth > objDepth;
}
// Poisson disc, randomly rotated at different UVs
float DoShadowPoisson16Sample( sampler DepthSampler, sampler RandomRotationSampler, const float3 vProjCoords, const float2 vScreenPos, const float4 vShadowTweaks, bool bNvidiaHardwarePCF, bool bFetch4 )
{
float2 vPoissonOffset[8] = { float2( 0.3475f, 0.0042f ),
float2( 0.8806f, 0.3430f ),
float2( -0.0041f, -0.6197f ),
float2( 0.0472f, 0.4964f ),
float2( -0.3730f, 0.0874f ),
float2( -0.9217f, -0.3177f ),
float2( -0.6289f, 0.7388f ),
float2( 0.5744f, -0.7741f ) };
float flScaleOverMapSize = vShadowTweaks.x * 2; // Tweak parameters to shader
float2 vNoiseOffset = vShadowTweaks.zw;
float4 vLightDepths = 0, accum = 0.0f;
float2 rotOffset = 0;
float2 shadowMapCenter = vProjCoords.xy; // Center of shadow filter
float objDepth = min( vProjCoords.z, 0.99999 ); // Object depth in shadow space
// 2D Rotation Matrix setup
float3 RMatTop = 0, RMatBottom = 0;
#if defined(SHADER_MODEL_PS_2_0) || defined(SHADER_MODEL_PS_2_B) || defined(SHADER_MODEL_PS_3_0)
RMatTop.xy = tex2D( RandomRotationSampler, cFlashlightScreenScale.xy * (vScreenPos * 0.5 + 0.5) + vNoiseOffset) * 2.0 - 1.0;
RMatBottom.xy = float2(-1.0, 1.0) * RMatTop.yx; // 2x2 rotation matrix in 4-tuple
#endif
RMatTop *= flScaleOverMapSize; // Scale up kernel while accounting for texture resolution
RMatBottom *= flScaleOverMapSize;
RMatTop.z = shadowMapCenter.x; // To be added in d2adds generated below
RMatBottom.z = shadowMapCenter.y;
float fResult = 0.0f;
if ( bNvidiaHardwarePCF )
{
rotOffset.x = dot (RMatTop.xy, vPoissonOffset[0].xy) + RMatTop.z;
rotOffset.y = dot (RMatBottom.xy, vPoissonOffset[0].xy) + RMatBottom.z;
vLightDepths.x += tex2Dproj( DepthSampler, float4(rotOffset, objDepth, 1) ).x;
rotOffset.x = dot (RMatTop.xy, vPoissonOffset[1].xy) + RMatTop.z;
rotOffset.y = dot (RMatBottom.xy, vPoissonOffset[1].xy) + RMatBottom.z;
vLightDepths.y += tex2Dproj( DepthSampler, float4(rotOffset, objDepth, 1) ).x;
rotOffset.x = dot (RMatTop.xy, vPoissonOffset[2].xy) + RMatTop.z;
rotOffset.y = dot (RMatBottom.xy, vPoissonOffset[2].xy) + RMatBottom.z;
vLightDepths.z += tex2Dproj( DepthSampler, float4(rotOffset, objDepth, 1) ).x;
rotOffset.x = dot (RMatTop.xy, vPoissonOffset[3].xy) + RMatTop.z;
rotOffset.y = dot (RMatBottom.xy, vPoissonOffset[3].xy) + RMatBottom.z;
vLightDepths.w += tex2Dproj( DepthSampler, float4(rotOffset, objDepth, 1) ).x;
rotOffset.x = dot (RMatTop.xy, vPoissonOffset[4].xy) + RMatTop.z;
rotOffset.y = dot (RMatBottom.xy, vPoissonOffset[4].xy) + RMatBottom.z;
vLightDepths.x += tex2Dproj( DepthSampler, float4(rotOffset, objDepth, 1) ).x;
rotOffset.x = dot (RMatTop.xy, vPoissonOffset[5].xy) + RMatTop.z;
rotOffset.y = dot (RMatBottom.xy, vPoissonOffset[5].xy) + RMatBottom.z;
vLightDepths.y += tex2Dproj( DepthSampler, float4(rotOffset, objDepth, 1) ).x;
rotOffset.x = dot (RMatTop.xy, vPoissonOffset[6].xy) + RMatTop.z;
rotOffset.y = dot (RMatBottom.xy, vPoissonOffset[6].xy) + RMatBottom.z;
vLightDepths.z += tex2Dproj( DepthSampler, float4(rotOffset, objDepth, 1) ).x;
rotOffset.x = dot (RMatTop.xy, vPoissonOffset[7].xy) + RMatTop.z;
rotOffset.y = dot (RMatBottom.xy, vPoissonOffset[7].xy) + RMatBottom.z;
vLightDepths.w += tex2Dproj( DepthSampler, float4(rotOffset, objDepth, 1) ).x;
fResult = dot( vLightDepths, float4( 0.25, 0.25, 0.25, 0.25) );
}
else if ( bFetch4 )
{
/*
TODO: Fix this contact hardening stuff
float flNumCloserSamples = 1;
float flAccumulatedCloserSamples = objDepth;
float4 vBlockerDepths;
// First, search for blockers
for( int j=0; j<8; j++ )
{
rotOffset.x = dot (RMatTop.xy, vPoissonOffset[j].xy) + RMatTop.z;
rotOffset.y = dot (RMatBottom.xy, vPoissonOffset[j].xy) + RMatBottom.z;
vBlockerDepths = tex2D( DepthSampler, rotOffset.xy );
// Which samples are closer than the pixel we're rendering?
float4 vCloserSamples = (vBlockerDepths < objDepth.xxxx ); // Binary comparison results
flNumCloserSamples += dot( vCloserSamples, float4(1, 1, 1, 1) ); // How many samples are closer than receiver?
flAccumulatedCloserSamples += dot (vCloserSamples, vBlockerDepths ); // Total depths from samples closer than receiver
}
float flBlockerDepth = flAccumulatedCloserSamples / flNumCloserSamples;
float flContactHardeningScale = (objDepth - flBlockerDepth) / flBlockerDepth;
// Scale the kernel
RMatTop.xy *= flContactHardeningScale;
RMatBottom.xy *= flContactHardeningScale;
*/
for( int i=0; i<8; i++ )
{
rotOffset.x = dot (RMatTop.xy, vPoissonOffset[i].xy) + RMatTop.z;
rotOffset.y = dot (RMatBottom.xy, vPoissonOffset[i].xy) + RMatBottom.z;
vLightDepths = tex2D( DepthSampler, rotOffset.xy );
accum += (vLightDepths > objDepth.xxxx);
}
fResult = dot( accum, float4( 1.0f/32.0f, 1.0f/32.0f, 1.0f/32.0f, 1.0f/32.0f) );
}
else // ATI vanilla hardware shadow mapping
{
for( int i=0; i<2; i++ )
{
rotOffset.x = dot (RMatTop.xy, vPoissonOffset[4*i+0].xy) + RMatTop.z;
rotOffset.y = dot (RMatBottom.xy, vPoissonOffset[4*i+0].xy) + RMatBottom.z;
vLightDepths.x = tex2D( DepthSampler, rotOffset.xy ).x;
rotOffset.x = dot (RMatTop.xy, vPoissonOffset[4*i+1].xy) + RMatTop.z;
rotOffset.y = dot (RMatBottom.xy, vPoissonOffset[4*i+1].xy) + RMatBottom.z;
vLightDepths.y = tex2D( DepthSampler, rotOffset.xy ).x;
rotOffset.x = dot (RMatTop.xy, vPoissonOffset[4*i+2].xy) + RMatTop.z;
rotOffset.y = dot (RMatBottom.xy, vPoissonOffset[4*i+2].xy) + RMatBottom.z;
vLightDepths.z = tex2D( DepthSampler, rotOffset.xy ).x;
rotOffset.x = dot (RMatTop.xy, vPoissonOffset[4*i+3].xy) + RMatTop.z;
rotOffset.y = dot (RMatBottom.xy, vPoissonOffset[4*i+3].xy) + RMatBottom.z;
vLightDepths.w = tex2D( DepthSampler, rotOffset.xy ).x;
accum += (vLightDepths > objDepth.xxxx);
}
fResult = dot( accum, float4( 0.125, 0.125, 0.125, 0.125) );
}
return fResult;
}
#if defined( _X360 )
// Poisson disc, randomly rotated at different UVs
float DoShadow360Simple( sampler DepthSampler, const float3 vProjCoords )
{
float fLOD;
float2 shadowMapCenter = vProjCoords.xy; // Center of shadow filter
float objDepth = min( vProjCoords.z, 0.99999 ); // Object depth in shadow space
#if defined( REVERSE_DEPTH_ON_X360 )
objDepth = 1.0f - objDepth;
#endif
float4 vSampledDepths, vWeights;
asm {
getCompTexLOD2D fLOD.x, shadowMapCenter.xy, DepthSampler, AnisoFilter=max16to1
setTexLOD fLOD.x
tfetch2D vSampledDepths.x___, shadowMapCenter, DepthSampler, OffsetX = -0.5, OffsetY = -0.5, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
tfetch2D vSampledDepths._x__, shadowMapCenter, DepthSampler, OffsetX = 0.5, OffsetY = -0.5, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
tfetch2D vSampledDepths.__x_, shadowMapCenter, DepthSampler, OffsetX = -0.5, OffsetY = 0.5, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
tfetch2D vSampledDepths.___x, shadowMapCenter, DepthSampler, OffsetX = 0.5, OffsetY = 0.5, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
getWeights2D vWeights, shadowMapCenter.xy, DepthSampler, MagFilter=linear, MinFilter=linear, UseComputedLOD=false, UseRegisterLOD=true
};
vWeights = float4( (1-vWeights.x)*(1-vWeights.y), vWeights.x*(1-vWeights.y), (1-vWeights.x)*vWeights.y, vWeights.x*vWeights.y );
#if defined( REVERSE_DEPTH_ON_X360 )
float4 vCompare = (vSampledDepths < objDepth.xxxx);
#else
float4 vCompare = (vSampledDepths > objDepth.xxxx);
#endif
return dot( vCompare, vWeights );
}
float Do360PCFFetch( sampler DepthSampler, float2 tc, float objDepth )
{
float fLOD;
float4 vSampledDepths, vWeights;
asm {
getCompTexLOD2D fLOD.x, tc.xy, DepthSampler, AnisoFilter=max16to1
setTexLOD fLOD.x
tfetch2D vSampledDepths.x___, tc, DepthSampler, OffsetX = -0.5, OffsetY = -0.5, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
tfetch2D vSampledDepths._x__, tc, DepthSampler, OffsetX = 0.5, OffsetY = -0.5, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
tfetch2D vSampledDepths.__x_, tc, DepthSampler, OffsetX = -0.5, OffsetY = 0.5, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
tfetch2D vSampledDepths.___x, tc, DepthSampler, OffsetX = 0.5, OffsetY = 0.5, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
getWeights2D vWeights, tc.xy, DepthSampler, MagFilter=linear, MinFilter=linear, UseComputedLOD=false, UseRegisterLOD=true
};
vWeights = float4( (1-vWeights.x)*(1-vWeights.y), vWeights.x*(1-vWeights.y), (1-vWeights.x)*vWeights.y, vWeights.x*vWeights.y );
#if defined( REVERSE_DEPTH_ON_X360 )
float4 vCompare = (vSampledDepths < objDepth.xxxx);
#else
float4 vCompare = (vSampledDepths > objDepth.xxxx);
#endif
return dot( vCompare, vWeights );
}
float Do360NearestFetch( sampler DepthSampler, float2 tc, float objDepth )
{
float fLOD;
float4 vSampledDepth;
asm {
getCompTexLOD2D fLOD.x, tc.xy, DepthSampler, AnisoFilter=max16to1
setTexLOD fLOD.x
tfetch2D vSampledDepth.x___, tc, DepthSampler, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
};
#if defined( REVERSE_DEPTH_ON_X360 )
return (vSampledDepth.x < objDepth.x);
#else
return (vSampledDepth.x > objDepth.x);
#endif
}
float AmountShadowed_8Tap_360( sampler DepthSampler, float2 tc, float objDepth )
{
float fLOD;
float4 vSampledDepthsA, vSampledDepthsB;
// Optimal 8 rooks pattern to get an idea about whether we're at a penumbra or not
// From [Kallio07] "Scanline Edge-Flag Algorithm for Antialiasing"
//
// +---+---+---+---+---+---+---+---+
// | | | | | | o | | |
// +---+---+---+---+---+---+---+---+
// | o | | | | | | | |
// +---+---+---+---+---+---+---+---+
// | | | | o | | | | |
// +---+---+---+---+---+---+---+---+
// | | | | | | | o | |
// +---+---+---+---+---+---+---+---+
// | | o | | | | | | |
// +---+---+---+---+---+---+---+---+
// | | | | | o | | | |
// +---+---+---+---+---+---+---+---+
// | | | | | | | | o |
// +---+---+---+---+---+---+---+---+
// | | | o | | | | | |
// +---+---+---+---+---+---+---+---+
//
asm {
getCompTexLOD2D fLOD.x, tc.xy, DepthSampler, AnisoFilter=max16to1
setTexLOD fLOD.x
tfetch2D vSampledDepthsA.x___, tc, DepthSampler, OffsetX = -2.0, OffsetY = -1.5, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
tfetch2D vSampledDepthsA._x__, tc, DepthSampler, OffsetX = -1.5, OffsetY = 0.5, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
tfetch2D vSampledDepthsA.__x_, tc, DepthSampler, OffsetX = -1.0, OffsetY = 2.0, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
tfetch2D vSampledDepthsA.___x, tc, DepthSampler, OffsetX = -0.5, OffsetY = -1.0, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
tfetch2D vSampledDepthsB.x___, tc, DepthSampler, OffsetX = 0.5, OffsetY = 1.0, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
tfetch2D vSampledDepthsB._x__, tc, DepthSampler, OffsetX = 1.0, OffsetY = -2.0, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
tfetch2D vSampledDepthsB.__x_, tc, DepthSampler, OffsetX = 1.5, OffsetY = -0.5, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
tfetch2D vSampledDepthsB.___x, tc, DepthSampler, OffsetX = 2.0, OffsetY = 1.5, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
};
#if defined( REVERSE_DEPTH_ON_X360 )
float4 vCompareA = (vSampledDepthsA < objDepth.xxxx);
float4 vCompareB = (vSampledDepthsB < objDepth.xxxx);
#else
float4 vCompareA = (vSampledDepthsA > objDepth.xxxx);
float4 vCompareB = (vSampledDepthsB > objDepth.xxxx);
#endif
return dot( vCompareA, float4(0.125,0.125,0.125,0.125) ) + dot( vCompareB, float4(0.125,0.125,0.125,0.125) );
}
float AmountShadowed_4Tap_360( sampler DepthSampler, float2 tc, float objDepth )
{
float fLOD;
float4 vSampledDepths;
// Rotated grid pattern to get an idea about whether we're at a penumbra or not
asm {
getCompTexLOD2D fLOD.x, tc.xy, DepthSampler, AnisoFilter=max16to1
setTexLOD fLOD.x
tfetch2D vSampledDepths.x___, tc, DepthSampler, OffsetX = -1.0, OffsetY = 0.5, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
tfetch2D vSampledDepths._x__, tc, DepthSampler, OffsetX = -0.5, OffsetY = -1.0, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
tfetch2D vSampledDepths.__x_, tc, DepthSampler, OffsetX = 0.5, OffsetY = 1.0, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
tfetch2D vSampledDepths.___x, tc, DepthSampler, OffsetX = 1.0, OffsetY = -0.5, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
};
#if defined( REVERSE_DEPTH_ON_X360 )
float4 vCompare = (vSampledDepths < objDepth.xxxx);
#else
float4 vCompare = (vSampledDepths > objDepth.xxxx);
#endif
return dot( vCompare, float4(0.25,0.25,0.25,0.25) );
}
// Poisson disc, randomly rotated at different UVs
float DoShadowPoisson360( sampler DepthSampler, sampler RandomRotationSampler, const float3 vProjCoords, const float2 vScreenPos, const float4 vShadowTweaks )
{
float2 vPoissonOffset[8] = { float2( 0.3475f, 0.0042f ), float2( 0.8806f, 0.3430f ),
float2( -0.0041f, -0.6197f ), float2( 0.0472f, 0.4964f ),
float2( -0.3730f, 0.0874f ), float2( -0.9217f, -0.3177f ),
float2( -0.6289f, 0.7388f ), float2( 0.5744f, -0.7741f ) };
float2 shadowMapCenter = vProjCoords.xy; // Center of shadow filter
float objDepth = min( vProjCoords.z, 0.99999 ); // Object depth in shadow space
#if defined( REVERSE_DEPTH_ON_X360 )
objDepth = 1.0f - objDepth;
#endif
float fAmountShadowed = AmountShadowed_4Tap_360( DepthSampler, shadowMapCenter, objDepth );
if ( fAmountShadowed >= 1.0f ) // Fully in light
{
return 1.0f;
}
else // Do the expensive filtering since we're at least partially shadowed
{
float flScaleOverMapSize = 1.7f / 512.0f; // Tweak parameters to shader
// 2D Rotation Matrix setup
float3 RMatTop = 0, RMatBottom = 0;
#if defined(SHADER_MODEL_PS_2_0) || defined(SHADER_MODEL_PS_2_B) || defined(SHADER_MODEL_PS_3_0)
RMatTop.xy = tex2D( RandomRotationSampler, cFlashlightScreenScale.xy * (vScreenPos * 0.5 + 0.5)) * 2.0 - 1.0;
RMatBottom.xy = float2(-1.0, 1.0) * RMatTop.yx; // 2x2 rotation matrix in 4-tuple
#endif
RMatTop *= flScaleOverMapSize; // Scale up kernel while accounting for texture resolution
RMatBottom *= flScaleOverMapSize;
RMatTop.z = shadowMapCenter.x; // To be added in d2adds generated below
RMatBottom.z = shadowMapCenter.y;
float2 rotOffset = float2(0,0);
float4 vAccum = 0;
rotOffset.x = dot (RMatTop.xy, vPoissonOffset[0].xy) + RMatTop.z;
rotOffset.y = dot (RMatBottom.xy, vPoissonOffset[0].xy) + RMatBottom.z;
vAccum.x = Do360NearestFetch( DepthSampler, rotOffset, objDepth );
rotOffset.x = dot (RMatTop.xy, vPoissonOffset[1].xy) + RMatTop.z;
rotOffset.y = dot (RMatBottom.xy, vPoissonOffset[1].xy) + RMatBottom.z;
vAccum.y = Do360NearestFetch( DepthSampler, rotOffset, objDepth );
rotOffset.x = dot (RMatTop.xy, vPoissonOffset[2].xy) + RMatTop.z;
rotOffset.y = dot (RMatBottom.xy, vPoissonOffset[2].xy) + RMatBottom.z;
vAccum.z = Do360NearestFetch( DepthSampler, rotOffset, objDepth );
rotOffset.x = dot (RMatTop.xy, vPoissonOffset[3].xy) + RMatTop.z;
rotOffset.y = dot (RMatBottom.xy, vPoissonOffset[3].xy) + RMatBottom.z;
vAccum.w = Do360NearestFetch( DepthSampler, rotOffset, objDepth );
rotOffset.x = dot (RMatTop.xy, vPoissonOffset[4].xy) + RMatTop.z;
rotOffset.y = dot (RMatBottom.xy, vPoissonOffset[4].xy) + RMatBottom.z;
vAccum.x += Do360NearestFetch( DepthSampler, rotOffset, objDepth );
rotOffset.x = dot (RMatTop.xy, vPoissonOffset[5].xy) + RMatTop.z;
rotOffset.y = dot (RMatBottom.xy, vPoissonOffset[5].xy) + RMatBottom.z;
vAccum.y += Do360NearestFetch( DepthSampler, rotOffset, objDepth );
rotOffset.x = dot (RMatTop.xy, vPoissonOffset[6].xy) + RMatTop.z;
rotOffset.y = dot (RMatBottom.xy, vPoissonOffset[6].xy) + RMatBottom.z;
vAccum.z += Do360NearestFetch( DepthSampler, rotOffset, objDepth );
rotOffset.x = dot (RMatTop.xy, vPoissonOffset[7].xy) + RMatTop.z;
rotOffset.y = dot (RMatBottom.xy, vPoissonOffset[7].xy) + RMatBottom.z;
vAccum.w += Do360NearestFetch( DepthSampler, rotOffset, objDepth );
return dot( vAccum, float4( 0.25, 0.25, 0.25, 0.25) );
}
}
#endif // _X360
float DoFlashlightShadow( sampler DepthSampler, sampler RandomRotationSampler, float3 vProjCoords, float2 vScreenPos, int nShadowLevel, float4 vShadowTweaks, bool bAllowHighQuality )
{
float flShadow = 1.0f;
#if !defined( _X360 ) //PC
if( nShadowLevel == NVIDIA_PCF_POISSON )
flShadow = DoShadowPoisson16Sample( DepthSampler, RandomRotationSampler, vProjCoords, vScreenPos, vShadowTweaks, true, false );
else if( nShadowLevel == ATI_NOPCF )
flShadow = DoShadowPoisson16Sample( DepthSampler, RandomRotationSampler, vProjCoords, vScreenPos, vShadowTweaks, false, false );
else if( nShadowLevel == ATI_NO_PCF_FETCH4 )
flShadow = DoShadowPoisson16Sample( DepthSampler, RandomRotationSampler, vProjCoords, vScreenPos, vShadowTweaks, false, true );
return flShadow;
#else
// Compile-time switch for shaders which allow high quality modes on 360
if ( bAllowHighQuality )
{
// Static control flow switch for shadow quality. Some non-interactive sequences use the high quality path
if ( g_bHighQualityShadows )
{
flShadow = DoShadowPoisson360( DepthSampler, RandomRotationSampler, vProjCoords, vScreenPos, vShadowTweaks );
}
else
{
flShadow = DoShadow360Simple( DepthSampler, vProjCoords );
}
}
else
{
flShadow = DoShadow360Simple( DepthSampler, vProjCoords );
}
return flShadow;
#endif
}
float3 SpecularLight( const float3 vWorldNormal, const float3 vLightDir, const float fSpecularExponent,
const float3 vEyeDir, const bool bDoSpecularWarp, in sampler specularWarpSampler, float fFresnel )
{
float3 result = float3(0.0f, 0.0f, 0.0f);
//float3 vReflect = reflect( -vEyeDir, vWorldNormal );
float3 vReflect = 2 * vWorldNormal * dot( vWorldNormal , vEyeDir ) - vEyeDir; // Reflect view through normal
float3 vSpecular = saturate(dot( vReflect, vLightDir )); // L.R (use half-angle instead?)
vSpecular = pow( vSpecular.x, fSpecularExponent ); // Raise to specular power
// Optionally warp as function of scalar specular and fresnel
if ( bDoSpecularWarp )
vSpecular *= tex2D( specularWarpSampler, float2(vSpecular.x, fFresnel) ); // Sample at { (L.R)^k, fresnel }
return vSpecular;
}
void DoSpecularFlashlight( float3 flashlightPos, float3 worldPos, float4 flashlightSpacePosition, float3 worldNormal,
float3 attenuationFactors, float farZ, sampler FlashlightSampler, sampler FlashlightDepthSampler, sampler RandomRotationSampler,
int nShadowLevel, bool bDoShadows, bool bAllowHighQuality, const float2 vScreenPos, const float fSpecularExponent, const float3 vEyeDir,
const bool bDoSpecularWarp, sampler specularWarpSampler, float fFresnel, float4 vShadowTweaks,
// Outputs of this shader...separate shadowed diffuse and specular from the flashlight
out float3 diffuseLighting, out float3 specularLighting )
{
float3 vProjCoords = flashlightSpacePosition.xyz / flashlightSpacePosition.w;
float3 flashlightColor = float3(1,1,1);
#if ( defined( _X360 ) )
float3 ltz = vProjCoords.xyz < float3( 0.0f, 0.0f, 0.0f );
float3 gto = vProjCoords.xyz > float3( 1.0f, 1.0f, 1.0f );
[branch]
if ( dot(ltz + gto, float3(1,1,1)) > 0 )
{
clip(-1);
diffuseLighting = specularLighting = float3(0,0,0);
return;
}
else
{
flashlightColor = tex2D( FlashlightSampler, vProjCoords );
[branch]
if ( dot(flashlightColor.xyz, float3(1,1,1)) <= 0 )
{
clip(-1);
diffuseLighting = specularLighting = float3(0,0,0);
return;
}
}
#else
flashlightColor = tex2D( FlashlightSampler, vProjCoords );
#endif
#if defined(SHADER_MODEL_PS_2_0) || defined(SHADER_MODEL_PS_2_B) || defined(SHADER_MODEL_PS_3_0)
flashlightColor *= cFlashlightColor.xyz; // Flashlight color
#endif
float3 delta = flashlightPos - worldPos;
float3 L = normalize( delta );
float distSquared = dot( delta, delta );
float dist = sqrt( distSquared );
float endFalloffFactor = RemapValClamped( dist, farZ, 0.6f * farZ, 0.0f, 1.0f );
// Attenuation for light and to fade out shadow over distance
float fAtten = saturate( dot( attenuationFactors, float3( 1.0f, 1.0f/dist, 1.0f/distSquared ) ) );
// Shadowing and coloring terms
#if (defined(SHADER_MODEL_PS_2_B) || defined(SHADER_MODEL_PS_3_0))
if ( bDoShadows )
{
float flShadow = DoFlashlightShadow( FlashlightDepthSampler, RandomRotationSampler, vProjCoords, vScreenPos, nShadowLevel, vShadowTweaks, bAllowHighQuality );
float flAttenuated = lerp( flShadow, 1.0f, vShadowTweaks.y ); // Blend between fully attenuated and not attenuated
flShadow = saturate( lerp( flAttenuated, flShadow, fAtten ) ); // Blend between shadow and above, according to light attenuation
flashlightColor *= flShadow; // Shadow term
}
#endif
diffuseLighting = fAtten;
#if defined(SHADER_MODEL_PS_2_0) || defined(SHADER_MODEL_PS_2_B) || defined(SHADER_MODEL_PS_3_0)
diffuseLighting *= saturate( dot( L.xyz, worldNormal.xyz ) + flFlashlightNoLambertValue ); // Lambertian term
#else
diffuseLighting *= saturate( dot( L.xyz, worldNormal.xyz ) ); // Lambertian (not Half-Lambert) term
#endif
diffuseLighting *= flashlightColor;
diffuseLighting *= endFalloffFactor;
// Specular term (masked by diffuse)
specularLighting = diffuseLighting * SpecularLight ( worldNormal, L, fSpecularExponent, vEyeDir, bDoSpecularWarp, specularWarpSampler, fFresnel );
}
// Diffuse only version
float3 DoFlashlight( float3 flashlightPos, float3 worldPos, float4 flashlightSpacePosition, float3 worldNormal,
float3 attenuationFactors, float farZ, sampler FlashlightSampler, sampler FlashlightDepthSampler,
sampler RandomRotationSampler, int nShadowLevel, bool bDoShadows, bool bAllowHighQuality,
const float2 vScreenPos, bool bClip, float4 vShadowTweaks = float4(3/1024.0f, 0.0005f, 0.0f, 0.0f), bool bHasNormal = true )
{
float3 vProjCoords = flashlightSpacePosition.xyz / flashlightSpacePosition.w;
float3 flashlightColor = float3(1,1,1);
#if ( defined( _X360 ) )
float3 ltz = vProjCoords.xyz < float3( 0.0f, 0.0f, 0.0f );
float3 gto = vProjCoords.xyz > float3( 1.0f, 1.0f, 1.0f );
[branch]
if ( dot(ltz + gto, float3(1,1,1)) > 0 )
{
if ( bClip )
{
clip(-1);
}
return float3(0,0,0);
}
else
{
flashlightColor = tex2D( FlashlightSampler, vProjCoords );
[branch]
if ( dot(flashlightColor.xyz, float3(1,1,1)) <= 0 )
{
if ( bClip )
{
clip(-1);
}
return float3(0,0,0);
}
}
#else
flashlightColor = tex2D( FlashlightSampler, vProjCoords );
#endif
#if defined(SHADER_MODEL_PS_2_0) || defined(SHADER_MODEL_PS_2_B) || defined(SHADER_MODEL_PS_3_0)
flashlightColor *= cFlashlightColor.xyz; // Flashlight color
#endif
float3 delta = flashlightPos - worldPos;
float3 L = normalize( delta );
float distSquared = dot( delta, delta );
float dist = sqrt( distSquared );
float endFalloffFactor = RemapValClamped( dist, farZ, 0.6f * farZ, 0.0f, 1.0f );
// Attenuation for light and to fade out shadow over distance
float fAtten = saturate( dot( attenuationFactors, float3( 1.0f, 1.0f/dist, 1.0f/distSquared ) ) );
// Shadowing and coloring terms
#if (defined(SHADER_MODEL_PS_2_B) || defined(SHADER_MODEL_PS_3_0))
if ( bDoShadows )
{
float flShadow = DoFlashlightShadow( FlashlightDepthSampler, RandomRotationSampler, vProjCoords, vScreenPos, nShadowLevel, vShadowTweaks, bAllowHighQuality );
float flAttenuated = lerp( flShadow, 1.0f, vShadowTweaks.y ); // Blend between fully attenuated and not attenuated
flShadow = saturate( lerp( flAttenuated, flShadow, fAtten ) ); // Blend between shadow and above, according to light attenuation
flashlightColor *= flShadow; // Shadow term
}
#endif
float3 diffuseLighting = fAtten;
float flLDotWorldNormal;
if ( bHasNormal )
{
flLDotWorldNormal = dot( L.xyz, worldNormal.xyz );
}
else
{
flLDotWorldNormal = 1.0f;
}
#if defined(SHADER_MODEL_PS_2_0) || defined(SHADER_MODEL_PS_2_B) || defined(SHADER_MODEL_PS_3_0)
diffuseLighting *= saturate( flLDotWorldNormal + flFlashlightNoLambertValue ); // Lambertian term
#else
diffuseLighting *= saturate( flLDotWorldNormal ); // Lambertian (not Half-Lambert) term
#endif
diffuseLighting *= flashlightColor;
diffuseLighting *= endFalloffFactor;
return diffuseLighting;
}
#endif //#ifndef COMMON_FLASHLIGHT_FXC_H_