// FXAA Ultra Quality Anti-Aliasing
// Based on the FXAA algorithm by Timothy Lottes (NVIDIA)

//!BGFX EFFECT
//!VERSION 1
//!NAME FXAA Ultra
//!CATEGORY Anti-Aliasing
//!DESCRIPTION Fast Approximate Anti-Aliasing with ultra quality settings. Maximum 32 search steps for the best edge quality at slightly higher performance cost.


//!TEXTURE
Texture2D INPUT;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
Texture2D OUTPUT;

//!SAMPLER
//!FILTER LINEAR
SamplerState linearSampler;


//!PASS 1
//!IN INPUT
//!OUT OUTPUT
//!BLOCK_SIZE 16
//!NUM_THREADS 64

#define FXAA_PRESET 5
#include "FXAA.hlsli"


void Pass1(uint2 blockStart, uint3 threadId) {
	uint2 gxy = (TileSwizzle8x8(threadId.x) << 1) + blockStart;

	const uint2 outputSize = GetOutputSize();
	if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	uint i, j;

	// Collect neighborhood samples via gather operations
	float3 src[4][4];
	[unroll]
	for (i = 0; i <= 2; i += 2) {
		[unroll]
		for (j = 0; j <= 2; j += 2) {
			float2 tpos = (gxy + uint2(i, j)) * inputPt;
			const float4 sr = INPUT.GatherRed(linearSampler, tpos);
			const float4 sg = INPUT.GatherGreen(linearSampler, tpos);
			const float4 sb = INPUT.GatherBlue(linearSampler, tpos);

			// Gather returns samples in w,z,x,y order
			src[i][j] = float3(sr.w, sg.w, sb.w);
			src[i][j + 1] = float3(sr.x, sg.x, sb.x);
			src[i + 1][j] = float3(sr.z, sg.z, sb.z);
			src[i + 1][j + 1] = float3(sr.y, sg.y, sb.y);
		}
	}

	// Execute FXAA on 2x2 pixel block
	[unroll]
	for (i = 0; i <= 1; ++i) {
		[unroll]
		for (j = 0; j <= 1; ++j) {
			uint2 destPos = gxy + uint2(i, j);
			OUTPUT[destPos] = float4(ProcessFXAA(src, i + 1, j + 1, INPUT, linearSampler, (destPos + 0.5f) * inputPt, inputPt), 1);
		}
	}
}
