Description
DXC is missing common factor optimization in some cases. Here's an example code:
cbuffer TemporalAAData : register ( b10 )
{
float2 viewportRelativeSize ;
float4 outputDimensions ;
}
Texture2D < float4 > HistoryColor : register ( t2 ) ;
SamplerState s_Linear : register ( s1 ) ;
RWTexture1D < float3 > outColorBuffer : register ( u0 ) ;
float4 Test ( in Texture2D < float4 > tex , in SamplerState linearSampler , in float2 uv )
{
float2 samplePos = uv;
float2 texPos1 = floor ( samplePos - 0.5f ) + 0.5f ;
float2 texPos0 = texPos1 - 1 ;
float2 texPos3 = texPos1 + 2 ;
float2 texPos12 = texPos1 + samplePos;
// DXC should recognize (outputDimensions . zw * viewportRelativeSize) is a common factor.
texPos0 *= outputDimensions . zw * viewportRelativeSize ;
texPos3 *= outputDimensions . zw * viewportRelativeSize ;
texPos12 *= outputDimensions . zw * viewportRelativeSize ;
float4 result = 0.0f ;
result += tex . SampleLevel ( linearSampler , float2 ( texPos0 . x , texPos0 . y ) , 0.0f );
result += tex . SampleLevel ( linearSampler , float2 ( texPos12 . x , texPos0 . y ) , 0.0f );
result += tex . SampleLevel ( linearSampler , float2 ( texPos3 . x , texPos0 . y ) , 0.0f );
result += tex . SampleLevel ( linearSampler , float2 ( texPos0 . x , texPos12 . y ) , 0.0f );
result += tex . SampleLevel ( linearSampler , float2 ( texPos12 . x , texPos12 . y ) , 0.0f );
result += tex . SampleLevel ( linearSampler , float2 ( texPos3 . x , texPos12 . y ) , 0.0f );
result += tex . SampleLevel ( linearSampler , float2 ( texPos0 . x , texPos3 . y ) , 0.0f );
result += tex . SampleLevel ( linearSampler , float2 ( texPos12 . x , texPos3 . y ) , 0.0f );
result += tex . SampleLevel ( linearSampler , float2 ( texPos3 . x , texPos3 . y ) , 0.0f );
return result ;
}
[ numthreads ( 8 , 8 , 1 ) ] void cs_main ( uint3 GroupID : SV_GroupID , uint GroupIndex : SV_GroupIndex , uint3 GTID : SV_GroupThreadID , uint3 DispatchThreadID : SV_DispatchThreadID )
{
uint2 pixelCoord = GTID . xy ;
outColorBuffer [ pixelCoord.x ] = Test ( HistoryColor , s_Linear , outputDimensions . zw ) . rgb;
}
Steps to Reproduce
Run the hlsl code above with the command line below:dxc.exe -T cs_6_3 -E cs_main dxc-common-factor-optimization.hlsl
Actual Behavior
After compiling the example hlsl, you will see those dxils in the result.
%24 = fmul fast float %7, %14
%25 = fmul fast float %24, %22
%26 = fmul fast float %8, %15
%27 = fmul fast float %26, %23
%28 = fmul fast float %7, %16
%29 = fmul fast float %28, %22
%30 = fmul fast float %8, %17
%31 = fmul fast float %30, %23
%32 = fmul fast float %7, %18
%33 = fmul fast float %32, %22
%34 = fmul fast float %8, %20
%35 = fmul fast float %34, %23
The common factor %7 * %22 is calculated 3 times.
If we rewrite the relevant code like this:
float2 temp = outputDimensions . zw * viewportRelativeSize;
texPos0 *= temp ;
texPos3 *= temp ;
texPos12 *= temp ;
The generated dxils are:
%24 = fmul fast float %22, %7
%25 = fmul fast float %23, %8
%26 = fmul fast float %24, %14
%27 = fmul fast float %25, %15
%28 = fmul fast float %24, %16
%29 = fmul fast float %25, %17
%30 = fmul fast float %24, %18
%31 = fmul fast float %25, %20
The common factor %7 * %22 is only calculated 1 time.
DXC should recognize the common factor and generate the optimized dxils.
Environment
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4