// Anime4K Bilateral Median Denoiser
// Based on Anime4K project by bloc97

//!BGFX EFFECT
//!VERSION 1
//!NAME Anime4K Denoise Bilateral Median
//!CATEGORY Anime4K
//!DESCRIPTION Bilateral median filter for noise reduction. Preserves edges better than mean filter by using weighted median.


//!PARAMETER
//!LABEL Intensity Sigma
//!DESC Controls denoising strength. Higher values produce stronger smoothing but may blur details.
//!DEFAULT 0.1
//!MIN 0.01
//!MAX 5
//!STEP 0.01
float intensitySigma;

//!TEXTURE
Texture2D INPUT;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
Texture2D OUTPUT;

//!SAMPLER
//!FILTER POINT
SamplerState sam;


//!PASS 1
//!IN INPUT
//!OUT OUTPUT
//!BLOCK_SIZE 8
//!NUM_THREADS 64

// Intensity window size - higher values give stronger denoising
#define INTENSITY_SIGMA intensitySigma
// Spatial window size - controls blur radius
#define SPATIAL_SIGMA 1.0
// Histogram regularization (0 = pure median, higher = closer to mean)
#define HISTOGRAM_REGULARIZATION 0.0
// Power curve for intensity weighting
#define INTENSITY_POWER_CURVE 1.0

// Kernel dimension calculations
#define KERNELSIZE uint(max(uint(SPATIAL_SIGMA), 1) * 2 + 1)
#define KERNELHALFSIZE (int(KERNELSIZE/2))
#define KERNELLEN (KERNELSIZE * KERNELSIZE)

#define GETOFFSET(i) int2(int(i % KERNELSIZE) - KERNELHALFSIZE, int(i / KERNELSIZE) - KERNELHALFSIZE)


// Calculate luminance from RGB
float GetLuma(float3 rgb) {
	return dot(float3(0.299, 0.587, 0.114), rgb);
}

// Standard Gaussian function
float GaussianFunc(float x, float s, float m) {
	float scaled = (x - m) / s;
	return exp(-0.5 * scaled * scaled);
}

// Find weighted median from histogram
float3 ComputeMedian(float3 v[KERNELLEN], float w[KERNELLEN], float n) {
	float3 result = float3(0, 0, 0);

	[unroll]
	for (uint i = 0; i < KERNELLEN; i++) {
		float w_above = 0.0;
		float w_below = 0.0;
		[unroll]
		for (uint j = 0; j < KERNELLEN; j++) {
			if (v[j].x > v[i].x) {
				w_above += w[j];
			} else if (v[j].x < v[i].x) {
				w_below += w[j];
			}
		}

		if ((n - w_above) / n >= 0.5 && w_below / n <= 0.5) {
			result = v[i];
			break;
		}
	}

	return result;
}


void Pass1(uint2 blockStart, uint3 threadId) {
	uint2 gxy = TileSwizzle8x8(threadId.x) + blockStart;

	const uint2 outputSize = GetOutputSize();
	if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	float2 pos = (gxy + 0.5f) * inputPt;

	// Build histogram arrays
	float3 histogram_v[KERNELLEN];
	float histogram_l[KERNELLEN];
	float histogram_w[KERNELLEN];
	float n = 0.0;

	// Get center pixel luminance
	float vc = GetLuma(INPUT.SampleLevel(sam, pos, 0).rgb);

	// Compute sigma values
	float is = pow(vc + 0.0001, INTENSITY_POWER_CURVE) * INTENSITY_SIGMA;
	float ss = SPATIAL_SIGMA;

	uint i;

	// Sample neighborhood and compute weights
	[unroll]
	for (i = 0; i < KERNELLEN; i++) {
		int2 ipos = GETOFFSET(i);
		histogram_v[i] = INPUT.SampleLevel(sam, pos + ipos * inputPt, 0).rgb;
		histogram_l[i] = GetLuma(histogram_v[i]);
		histogram_w[i] = GaussianFunc(histogram_l[i], is, vc) * GaussianFunc(length(ipos), ss, 0.0);
		n += histogram_w[i];
	}

	// Apply histogram regularization if enabled
	if (HISTOGRAM_REGULARIZATION > 0.0) {
		float histogram_wn[KERNELLEN];
		n = 0.0;

		[unroll]
		for (i = 0; i < KERNELLEN; i++) {
			histogram_wn[i] = 0.0;
		}

		[unroll]
		for (i = 0; i < KERNELLEN; i++) {
			histogram_wn[i] += GaussianFunc(0.0, HISTOGRAM_REGULARIZATION, 0.0) * histogram_w[i];
			[unroll]
			for (uint j = (i + 1); j < KERNELLEN; j++) {
				float d = GaussianFunc(histogram_l[j], HISTOGRAM_REGULARIZATION, histogram_l[i]);
				histogram_wn[j] += d * histogram_w[i];
				histogram_wn[i] += d * histogram_w[j];
			}
			n += histogram_wn[i];
		}

		OUTPUT[gxy] = float4(ComputeMedian(histogram_v, histogram_wn, n), 1);
		return;
	}

	OUTPUT[gxy] = float4(ComputeMedian(histogram_v, histogram_w, n), 1);
}
