457 lines
16 KiB
C
Raw Permalink Normal View History

2021-07-24 21:11:47 -07:00
//========= Copyright <20> 1996-2005, Valve Corporation, All rights reserved. ============//
//
// Purpose:
//
// $Workfile: $
// $Date: $
// $NoKeywords: $
//=============================================================================//
#ifndef COLORSPACE_H
#define COLORSPACE_H
#ifdef _WIN32
#pragma once
#endif
#include "mathlib/mathlib.h"
#include "mathlib/ssemath.h"
#include "mathlib/bumpvects.h"
#include "tier0/dbg.h"
extern float g_LinearToVertex[4096]; // linear (0..4) to screen corrected vertex space (0..1?)
// FIXME!!! Get rid of this. . all of this should be in mathlib
namespace ColorSpace
{
void SetGamma( float screenGamma, float texGamma,
float overbright, bool allowCheats, bool linearFrameBuffer );
// convert texture to linear 0..1 value
float TextureToLinear( int c );
// convert texture to linear 0..1 value
int LinearToTexture( float f );
float TexLightToLinear( int c, int exponent );
// assume 0..4 range
void LinearToLightmap( unsigned char *pDstRGB, const float *pSrcRGB );
// assume 0..4 range
void LinearToBumpedLightmap( const float *linearColor, const float *linearBumpColor1,
const float *linearBumpColor2, const float *linearBumpColor3,
unsigned char *ret, unsigned char *retBump1,
unsigned char *retBump2, unsigned char *retBump3 );
//
void LinearToBumpedLightmapAlpha( const float *linearAlpha, const float *linearBumpAlpha1,
const float *linearBumpAlpha2, const float *linearBumpAlpha3,
unsigned char *ret, unsigned char *retBump1,
unsigned char *retBump2, unsigned char *retBump3 );
// converts 0..1 linear value to screen gamma (0..255)
int LinearToScreenGamma( float f );
FORCEINLINE void LinearToLightmap( unsigned char *pDstRGB, const float *pSrcRGB )
{
Vector tmpVect;
#if 1
int i, j;
for( j = 0; j < 3; j++ )
{
i = RoundFloatToInt( pSrcRGB[j] * 1024 ); // assume 0..4 range
if (i < 0)
{
i = 0;
}
if (i > 4091)
{
i = 4091;
}
tmpVect[j] = g_LinearToVertex[i];
}
#else
tmpVect[0] = LinearToVertexLight( pSrcRGB[0] );
tmpVect[1] = LinearToVertexLight( pSrcRGB[1] );
tmpVect[2] = LinearToVertexLight( pSrcRGB[2] );
#endif
ColorClamp( tmpVect );
pDstRGB[0] = RoundFloatToByte( tmpVect[0] * 255.0f );
pDstRGB[1] = RoundFloatToByte( tmpVect[1] * 255.0f );
pDstRGB[2] = RoundFloatToByte( tmpVect[2] * 255.0f );
}
// Clamp the three values for bumped lighting such that we trade off directionality for brightness.
FORCEINLINE void ColorClampBumped( Vector& color1, Vector& color2, Vector& color3 )
{
Vector maxs;
Vector *colors[3] = { &color1, &color2, &color3 };
maxs[0] = VectorMaximum( color1 );
maxs[1] = VectorMaximum( color2 );
maxs[2] = VectorMaximum( color3 );
// HACK! Clean this up, and add some else statements
#define CONDITION(a,b,c) do { if( maxs[a] >= maxs[b] && maxs[b] >= maxs[c] ) { order[0] = a; order[1] = b; order[2] = c; } } while( 0 )
int order[3];
CONDITION(0,1,2);
CONDITION(0,2,1);
CONDITION(1,0,2);
CONDITION(1,2,0);
CONDITION(2,0,1);
CONDITION(2,1,0);
int i;
for( i = 0; i < 3; i++ )
{
float max = VectorMaximum( *colors[order[i]] );
if( max <= 1.0f )
{
continue;
}
// This channel is too bright. . take half of the amount that we are over and
// add it to the other two channel.
float factorToRedist = ( max - 1.0f ) / max;
Vector colorToRedist = factorToRedist * *colors[order[i]];
*colors[order[i]] -= colorToRedist;
colorToRedist *= 0.5f;
*colors[order[(i+1)%3]] += colorToRedist;
*colors[order[(i+2)%3]] += colorToRedist;
}
ColorClamp( color1 );
ColorClamp( color2 );
ColorClamp( color3 );
if( color1[0] < 0.f ) color1[0] = 0.f;
if( color1[1] < 0.f ) color1[1] = 0.f;
if( color1[2] < 0.f ) color1[2] = 0.f;
if( color2[0] < 0.f ) color2[0] = 0.f;
if( color2[1] < 0.f ) color2[1] = 0.f;
if( color2[2] < 0.f ) color2[2] = 0.f;
if( color3[0] < 0.f ) color3[0] = 0.f;
if( color3[1] < 0.f ) color3[1] = 0.f;
if( color3[2] < 0.f ) color3[2] = 0.f;
}
FORCEINLINE void LinearToBumpedLightmap( const float *linearColor, const float *linearBumpColor1,
const float *linearBumpColor2, const float *linearBumpColor3,
unsigned char *ret, unsigned char *retBump1,
unsigned char *retBump2, unsigned char *retBump3 )
{
const Vector &linearBump1 = *( ( const Vector * )linearBumpColor1 );
const Vector &linearBump2 = *( ( const Vector * )linearBumpColor2 );
const Vector &linearBump3 = *( ( const Vector * )linearBumpColor3 );
Vector gammaGoal;
// gammaGoal is premultiplied by 1/overbright, which we want
gammaGoal[0] = LinearToVertexLight( linearColor[0] );
gammaGoal[1] = LinearToVertexLight( linearColor[1] );
gammaGoal[2] = LinearToVertexLight( linearColor[2] );
Vector bumpAverage = linearBump1;
bumpAverage += linearBump2;
bumpAverage += linearBump3;
bumpAverage *= ( 1.0f / 3.0f );
Vector correctionScale;
if( *( int * )&bumpAverage[0] != 0 && *( int * )&bumpAverage[1] != 0 && *( int * )&bumpAverage[2] != 0 )
{
// fast path when we know that we don't have to worry about divide by zero.
VectorDivide( gammaGoal, bumpAverage, correctionScale );
// correctionScale = gammaGoal / bumpSum;
}
else
{
correctionScale.Init( 0.0f, 0.0f, 0.0f );
if( bumpAverage[0] != 0.0f )
{
correctionScale[0] = gammaGoal[0] / bumpAverage[0];
}
if( bumpAverage[1] != 0.0f )
{
correctionScale[1] = gammaGoal[1] / bumpAverage[1];
}
if( bumpAverage[2] != 0.0f )
{
correctionScale[2] = gammaGoal[2] / bumpAverage[2];
}
}
Vector correctedBumpColor1;
Vector correctedBumpColor2;
Vector correctedBumpColor3;
VectorMultiply( linearBump1, correctionScale, correctedBumpColor1 );
VectorMultiply( linearBump2, correctionScale, correctedBumpColor2 );
VectorMultiply( linearBump3, correctionScale, correctedBumpColor3 );
Vector check = ( correctedBumpColor1 + correctedBumpColor2 + correctedBumpColor3 ) / 3.0f;
ColorClampBumped( correctedBumpColor1, correctedBumpColor2, correctedBumpColor3 );
ret[0] = RoundFloatToByte( gammaGoal[0] * 255.0f );
ret[1] = RoundFloatToByte( gammaGoal[1] * 255.0f );
ret[2] = RoundFloatToByte( gammaGoal[2] * 255.0f );
retBump1[0] = RoundFloatToByte( correctedBumpColor1[0] * 255.0f );
retBump1[1] = RoundFloatToByte( correctedBumpColor1[1] * 255.0f );
retBump1[2] = RoundFloatToByte( correctedBumpColor1[2] * 255.0f );
retBump2[0] = RoundFloatToByte( correctedBumpColor2[0] * 255.0f );
retBump2[1] = RoundFloatToByte( correctedBumpColor2[1] * 255.0f );
retBump2[2] = RoundFloatToByte( correctedBumpColor2[2] * 255.0f );
retBump3[0] = RoundFloatToByte( correctedBumpColor3[0] * 255.0f );
retBump3[1] = RoundFloatToByte( correctedBumpColor3[1] * 255.0f );
retBump3[2] = RoundFloatToByte( correctedBumpColor3[2] * 255.0f );
}
// For use with CSM correct blending output in alpha channel from vrad
// we can go out of 0..1 range if we normalize the bumped alpha data
// so we 'squash' the range to compensate.
// Note - We shouldn't be taking the simple average in the bump case when normalizing anyway
// (i.e. add together and multiply by 0.575 comments below).
// Keeping the averaging as is for consistency with RGB's, etc
FORCEINLINE void LinearToBumpedLightmapAlpha( const float *linearAlpha, const float *linearBumpAlpha1,
const float *linearBumpAlpha2, const float *linearBumpAlpha3,
unsigned char *ret, unsigned char *retBump1,
unsigned char *retBump2, unsigned char *retBump3 )
{
// divide by three to ensure we stay in a 0..1 range here, scale up result later (via scaling in CSM light color)
const float &linearUnbumped = *((const float *)linearAlpha) / 3.0f;
const float &linearBump1 = *((const float *)linearBumpAlpha1) / 3.0f;
const float &linearBump2 = *((const float *)linearBumpAlpha2) / 3.0f;
const float &linearBump3 = *((const float *)linearBumpAlpha3) / 3.0f;
float bumpAverage = linearBump1;
bumpAverage += linearBump2;
bumpAverage += linearBump3;
bumpAverage *= (1.0f / 3.0f);
float correctionScale;
if ( *(int *)&bumpAverage != 0 )
{
// fast path when we know that we don't have to worry about divide by zero.
correctionScale = linearUnbumped / bumpAverage;
}
else
{
correctionScale = 0.0f;
if ( bumpAverage != 0.0f )
{
correctionScale = linearUnbumped / bumpAverage;
}
}
float correctedBumpAlpha1;
float correctedBumpAlpha2;
float correctedBumpAlpha3;
correctedBumpAlpha1 = linearBump1 * correctionScale;
correctedBumpAlpha2 = linearBump2 * correctionScale;
correctedBumpAlpha3 = linearBump3 * correctionScale;
ret[0] = RoundFloatToByte( linearUnbumped * 255.0f );
retBump1[0] = RoundFloatToByte( correctedBumpAlpha1 * 255.0f );
retBump2[0] = RoundFloatToByte( correctedBumpAlpha2 * 255.0f );
retBump3[0] = RoundFloatToByte( correctedBumpAlpha3 * 255.0f );
}
uint16 LinearFloatToCorrectedShort( float in );
inline unsigned short LinearToUnsignedShort( float in, int nFractionalBits )
{
unsigned short out;
in = in * ( 1 << nFractionalBits );
in = MIN( in, 65535 );
out = ( unsigned short ) MAX( in, 0.0f );
return out;
}
inline void ClampToHDR( const Vector &in, unsigned short out[3] )
{
Vector tmp = in;
out[0] = LinearFloatToCorrectedShort( in.x );
out[1] = LinearFloatToCorrectedShort( in.y );
out[2] = LinearFloatToCorrectedShort( in.z );
}
// divide by 3 to ensure we stay in the same range as bumped alpha data
// this is important for overlays, since a bumped overlay on a non-bumped
// lightmap surface is still drawn with the bumped lightmap shader, with 3x the base lightmap
// and vice versa. If we're not consistent, we break the CSM blending
FORCEINLINE void LinearToLightmapAlpha( unsigned char *pDstA, const float srcA )
{
pDstA[0] = RoundFloatToByte( (srcA * 255.0f) / 3.0f );
}
FORCEINLINE void LinearToLightmapAlpha( int *pDstA, const float srcA )
{
pDstA[0] = LinearToUnsignedShort( srcA / 3.0f, 16 );
}
FORCEINLINE void LinearToLightmapAlpha( float *pA )
{
pA[0] = pA[0] / 3.0f;
}
FORCEINLINE void
LinearToBumpedLightmap( const float *linearColor, const float *linearBumpColor1,
const float *linearBumpColor2, const float *linearBumpColor3,
float *ret, float *retBump1,
float *retBump2, float *retBump3 )
{
const Vector &linearUnbumped = *( ( const Vector * )linearColor );
Vector linearBump1 = *( ( const Vector * )linearBumpColor1 );
Vector linearBump2 = *( ( const Vector * )linearBumpColor2 );
Vector linearBump3 = *( ( const Vector * )linearBumpColor3 );
// find a scale factor which makes the average of the 3 bumped mapped vectors match the
// straight up vector (if possible), so that flat bumpmapped areas match non-bumpmapped
// areas.
// Note: According to Alex, this code is completely wrong.
// Because the bump vectors constitute a orthonormal basis, one does not simply average
// them to get the straight-up result. In fact they are added together then multiplied
// by 0.575
Vector bumpAverage = linearBump1;
bumpAverage += linearBump2;
bumpAverage += linearBump3;
bumpAverage *= ( 1.0f / 3.0f );
Vector correctionScale;
if( *( int * )&bumpAverage[0] != 0 &&
*( int * )&bumpAverage[1] != 0 &&
*( int * )&bumpAverage[2] != 0 )
{
// fast path when we know that we don't have to worry about divide by zero.
VectorDivide( linearUnbumped, bumpAverage, correctionScale );
}
else
{
correctionScale.Init( 0.0f, 0.0f, 0.0f );
if( bumpAverage[0] != 0.0f )
{
correctionScale[0] = linearUnbumped[0] / bumpAverage[0];
}
if( bumpAverage[1] != 0.0f )
{
correctionScale[1] = linearUnbumped[1] / bumpAverage[1];
}
if( bumpAverage[2] != 0.0f )
{
correctionScale[2] = linearUnbumped[2] / bumpAverage[2];
}
}
linearBump1 *= correctionScale;
linearBump2 *= correctionScale;
linearBump3 *= correctionScale;
*((Vector *)ret) = linearUnbumped;
*((Vector *)retBump1) = linearBump1;
*((Vector *)retBump2) = linearBump2;
*((Vector *)retBump3) = linearBump3;
}
// For use with CSM correct blending output in alpha channel from vrad
// we can go out of 0..1 range if we normalize the bumped alpha data
// so we 'squash' the range to compensate.
// Note - We shouldn't be taking the simple average in the bump case when normalizing anyway
// (i.e. add together and multiply by 0.575 comments below).
// Keeping the averaging as is for consistency with RGB's, etc
FORCEINLINE void LinearToBumpedLightmapAlpha( const float *linearAlpha, const float *linearBumpAlpha1,
const float *linearBumpAlpha2, const float *linearBumpAlpha3,
float *ret, float *retBump1,
float *retBump2, float *retBump3 )
{
// divide by three to ensure we stay in a 0..1 range here, scale up result later (via scaling in CSM light color)
const float &linearUnbumped = *((const float *)linearAlpha) / 3.0f;
const float &linearBump1 = *((const float *)linearBumpAlpha1) / 3.0f;
const float &linearBump2 = *((const float *)linearBumpAlpha2) / 3.0f;
const float &linearBump3 = *((const float *)linearBumpAlpha3) / 3.0f;
float bumpAverage = linearBump1;
bumpAverage += linearBump2;
bumpAverage += linearBump3;
bumpAverage *= (1.0f / 3.0f);
float correctionScale;
if ( *(int *)&bumpAverage != 0 )
{
// fast path when we know that we don't have to worry about divide by zero.
correctionScale = linearUnbumped / bumpAverage;
}
else
{
correctionScale = 0.0f;
if ( bumpAverage != 0.0f )
{
correctionScale = linearUnbumped / bumpAverage;
}
}
float correctedBumpAlpha1;
float correctedBumpAlpha2;
float correctedBumpAlpha3;
correctedBumpAlpha1 = linearBump1 * correctionScale;
correctedBumpAlpha2 = linearBump2 * correctionScale;
correctedBumpAlpha3 = linearBump3 * correctionScale;
ret[0] = linearUnbumped;
retBump1[0] = correctedBumpAlpha1;
retBump2[0] = correctedBumpAlpha2;
retBump3[0] = correctedBumpAlpha3;
}
// The domain of the inputs is floats [0 .. 16.0f]
// the output range is also floats [0 .. 16.0f] (eg, compression to short does not happen)
FORCEINLINE void
LinearToBumpedLightmap( FLTX4 linearColor, FLTX4 linearBumpColor1,
FLTX4 linearBumpColor2, FLTX4 linearBumpColor3,
fltx4 &ret, fltx4 &retBump1, // I pray that with inlining
fltx4 &retBump2, fltx4 &retBump3 ) // these will be returned on registers
{
// preload 3.0f onto the returns so that we don't need to multiply the bumpAverage by it
// straight away (eg, reschedule this dependent op)
static const fltx4 vThree = { 3.0f, 3.0f, 3.0f, 0.0f };
fltx4 retValBump1 = MulSIMD( vThree, linearBumpColor1);
fltx4 retValBump2 = MulSIMD( vThree, linearBumpColor2);
fltx4 retValBump3 = MulSIMD( vThree, linearBumpColor3);
// find a scale factor which makes the average of the 3 bumped mapped vectors match the
// straight up vector (if possible), so that flat bumpmapped areas match non-bumpmapped
// areas.
fltx4 bumpAverage = AddSIMD(AddSIMD(linearBumpColor1, linearBumpColor2), linearBumpColor3); // actually average * 3
// find the zero terms so that we can quash their channels in the output
bi32x4 zeroTerms = CmpEqSIMD(bumpAverage, Four_Zeros);
fltx4 correctionScale = ReciprocalEstSIMD(bumpAverage); // each channel is now 1.0f / (average[x] * 3.0f)
// divide unbumped linear by the average to get the correction scale
correctionScale = MulSIMD( AndNotSIMD(zeroTerms, linearColor), // crush values that were zero in bumpAverage. (saves on dep latency)
correctionScale); // still has an extra 1/3 factor multiplied in
// multiply this against three to get the return values
ret = linearColor;
retBump1 = MulSIMD(retValBump1, correctionScale);
retBump2 = MulSIMD(retValBump2, correctionScale);
retBump3 = MulSIMD(retValBump3, correctionScale);
}
// input: floats [0 .. 16.0f]
// output: shorts [0 .. 65535]
FORCEINLINE void
LinearToBumpedLightmap( const float *linearColor, const float *linearBumpColor1,
const float *linearBumpColor2, const float *linearBumpColor3,
unsigned short *ret, unsigned short *retBump1,
unsigned short *retBump2, unsigned short *retBump3 )
{
Vector linearUnbumped, linearBump1, linearBump2, linearBump3;
LinearToBumpedLightmap( linearColor, linearBumpColor1, linearBumpColor2, linearBumpColor3, &linearUnbumped.x, &linearBump1.x, &linearBump2.x, &linearBump3.x );
ClampToHDR( linearUnbumped, ret );
ClampToHDR( linearBump1, retBump1 );
ClampToHDR( linearBump2, retBump2 );
ClampToHDR( linearBump3, retBump3 );
}
};
#endif // COLORSPACE_H