// GTU v050
// Author: aliaspider - aliaspider@gmail.com (GPLv3 License)
// Ported from https://github.com/libretro/common-shaders/tree/master/crt/shaders/gtu-v050
// Gaussian TV Upscaler with NTSC color simulation

//!BGFX EFFECT
//!VERSION 1
//!NAME GTU v050
//!CATEGORY CRT
//!DESCRIPTION Gaussian TV Upscaler v0.50 - Simulates analog TV signal processing with optional composite video artifacts, NTSC color space conversion, and authentic scanline rendering.

//!PARAMETER
//!LABEL Composite Connection
//!DESC Enable composite video signal simulation with color bleeding
//!DEFAULT 0
//!MIN 0
//!MAX 1
//!STEP 1
int compositeConnection;

//!PARAMETER
//!LABEL No Scanlines
//!DESC Disable scanline effect for cleaner output
//!DEFAULT 0
//!MIN 0
//!MAX 1
//!STEP 1
int noScanlines;

//!PARAMETER
//!LABEL Signal Resolution Y
//!DESC Horizontal luminance resolution in TV lines
//!DEFAULT 256
//!MIN 16
//!MAX 1024
//!STEP 16
int signalResolution;

//!PARAMETER
//!LABEL Signal Resolution I
//!DESC Horizontal resolution of I chrominance channel (NTSC)
//!DEFAULT 83
//!MIN 1
//!MAX 350
//!STEP 1
int signalResolutionI;

//!PARAMETER
//!LABEL Signal Resolution Q
//!DESC Horizontal resolution of Q chrominance channel (NTSC)
//!DEFAULT 25
//!MIN 1
//!MAX 350
//!STEP 1
int signalResolutionQ;

//!PARAMETER
//!LABEL TV Vertical Resolution
//!DESC Vertical resolution in TV lines
//!DEFAULT 250
//!MIN 20
//!MAX 1000
//!STEP 10
int tvVerticalResolution;

//!PARAMETER
//!LABEL Black Level
//!DESC Adjusts the black level (brightness floor)
//!DEFAULT 0.07
//!MIN -0.3
//!MAX 0.3
//!STEP 0.01
float blackLevel;

//!PARAMETER
//!LABEL Contrast
//!DESC Output contrast adjustment
//!DEFAULT 1
//!MIN 0
//!MAX 2
//!STEP 0.01
float contrast;


//!TEXTURE
Texture2D INPUT;

//!TEXTURE
Texture2D OUTPUT;

//!TEXTURE
//!WIDTH OUTPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex1;

//!SAMPLER
//!FILTER POINT
SamplerState sam;


//!PASS 1
//!STYLE PS
//!IN INPUT
//!OUT tex1

// Color space conversion matrices
#define RGB_to_YIQ transpose(float3x3( 0.299 , 0.595716 , 0.211456 , 0.587 , -0.274453 , -0.522591 , 0.114 , -0.321263 , 0.311135 ))
#define YIQ_to_RGB transpose(float3x3( 1.0 , 1.0  , 1.0 , 0.9563 , -0.2721 , -1.1070 , 0.6210 , -0.6474 , 1.7046 ))
#define pi 3.14159265358

// Signal processing helper functions
float d(float x, float b) {
	return pi * b * min(abs(x) + 0.5, 1.0 / b);
}

float e(float x, float b) {
	return (pi * b * min(max(abs(x) - 0.5, -1.0 / b), 1.0 / b));
}

// Lanczos-like filter kernel
float STU(float x, float b) {
	return ((d(x, b) + sin(d(x, b)) - e(x, b) - sin(e(x, b))) / (2.0 * pi));
}

float4 Pass1(float2 pos) {
	float2 inputSize = GetInputSize();
	float2 inputPt = GetInputPt();

	float offset = frac((pos.x * inputSize.x) - 0.5);
	float3 tempColor = 0;
	float X;
	float3 c;
	float range;
	float i;

	if (compositeConnection) {
		// Composite mode: process Y, I, Q channels separately
		range = ceil(0.5 + inputSize.x / min(min(signalResolution, signalResolutionI), signalResolutionQ));

		for (i = -range; i < range + 2.0; i++) {
			X = (offset - (i));
			c = INPUT.SampleLevel(sam, float2(pos.x - X * inputPt.x, pos.y), 0).rgb;
			c = mul(RGB_to_YIQ, c);
			tempColor += float3(
				(c.x * STU(X, (signalResolution * inputPt.x))),
				(c.y * STU(X, (signalResolutionI * inputPt.x))),
				(c.z * STU(X, (signalResolutionQ * inputPt.x)))
			);
		}

		tempColor = clamp(mul(YIQ_to_RGB, tempColor), 0.0, 1.0);
	} else {
		// RGB mode: process all channels with same resolution
		range = ceil(0.5 + inputSize.x / signalResolution);

		for (i = -range; i < range + 2.0; i++) {
			X = (offset - (i));
			c = INPUT.SampleLevel(sam, float2(pos.x - X * inputPt.x, pos.y), 0).rgb;
			tempColor += (c * STU(X, (signalResolution * inputPt.x)));
		}

		tempColor = clamp(tempColor, 0.0, 1.0);
	}

	return float4(tempColor, 1.0);
}


//!PASS 2
//!STYLE PS
//!IN tex1
//!OUT OUTPUT

#define pi 3.14159265358
#define normalGauss(x) ((exp(-(x)*(x)*0.5))/sqrt(2.0*pi))

// Approximate Gaussian integral using polynomial
float normalGaussIntegral(float x) {
	float a1 = 0.4361836;
	float a2 = -0.1201676;
	float a3 = 0.9372980;
	float p = 0.3326700;
	float t = 1.0 / (1.0 + p * abs(x));
	return (0.5 - normalGauss(x) * (t * (a1 + t * (a2 + a3 * t)))) * sign(x);
}

// Apply scanline effect using integrated Gaussian
float3 scanlines(float x, float3 c) {
	float inputHeight = GetInputSize().y;
	float inputPtY = GetInputPt().y;
	float outputHeight = GetOutputSize().y;
	float outputPtY = GetOutputPt().y;

	float temp = sqrt(2 * pi) * (tvVerticalResolution * inputPtY);

	float rrr = 0.5 * (inputHeight * outputPtY);
	float x1 = (x + rrr) * temp;
	float x2 = (x - rrr) * temp;
	c.r = (c.r * (normalGaussIntegral(x1) - normalGaussIntegral(x2)));
	c.g = (c.g * (normalGaussIntegral(x1) - normalGaussIntegral(x2)));
	c.b = (c.b * (normalGaussIntegral(x1) - normalGaussIntegral(x2)));
	c *= GetScale().y;
	return c;
}


float d(float x, float b) {
	return pi * b * min(abs(x) + 0.5, 1.0 / b);
}

float e(float x, float b) {
	return (pi * b * min(max(abs(x) - 0.5, -1.0 / b), 1.0 / b));
}

float STU(float x, float b) {
	return ((d(x, b) + sin(d(x, b)) - e(x, b) - sin(e(x, b))) / (2.0 * pi));
}

float4 Pass2(float2 pos) {
	uint2 inputSize = GetInputSize();
	float2 inputPt = GetInputPt();

	float2 offset = frac((pos * uint2(GetOutputSize().x, inputSize.y)) - 0.5);
	float3 tempColor = 0;
	float3 Cj;

	float range = ceil(0.5 + inputSize.y / tvVerticalResolution);

	float i;

	if (noScanlines) {
		// Clean mode without scanlines
		for (i = -range; i < range + 2.0; i++) {
			Cj = tex1.SampleLevel(sam, float2(pos.x, pos.y - (offset.y - (i)) * inputPt.y), 0).xyz;
			tempColor += Cj * STU(offset.y - (i), (tvVerticalResolution * inputPt.y));
		}
	} else {
		// Apply scanline effect
		for (i = -range; i < range + 2.0; i++) {
			Cj = tex1.SampleLevel(sam, float2(pos.x, pos.y - (offset.y - (i)) * inputPt.y), 0).xyz;
			tempColor += scanlines(offset.y - (i), Cj);
		}
	}

	// Apply black level and contrast adjustment
	tempColor -= blackLevel;
	tempColor *= contrast / (1.0 - blackLevel);
	return float4(tempColor, 1.0);
}
