// CRT Geom
// Based on CRT-interlaced shader by cgwg, Themaister and DOLLS (GPL License)
// Ported from https://github.com/libretro/common-shaders/blob/master/crt/shaders/crt-geom.cg
// Features screen curvature simulation and advanced scanline rendering

//!BGFX EFFECT
//!VERSION 1
//!NAME CRT Geom
//!CATEGORY CRT
//!DESCRIPTION A comprehensive CRT simulation with barrel distortion, screen curvature, corner rounding, and authentic scanline rendering. Provides a realistic curved CRT monitor appearance.
//!USE _DYNAMIC

//!PARAMETER
//!LABEL Target Gamma
//!DESC Gamma value of the simulated CRT display
//!DEFAULT 2.4
//!MIN 0.1
//!MAX 5
//!STEP 0.01
float CRTGamma;

//!PARAMETER
//!LABEL Monitor Gamma
//!DESC Gamma value of your actual display monitor
//!DEFAULT 2.2
//!MIN 0.1
//!MAX 5
//!STEP 0.01
float monitorGamma;

//!PARAMETER
//!LABEL Distance
//!DESC Simulated viewing distance from the CRT screen
//!DEFAULT 1.5
//!MIN 0.1
//!MAX 3.0
//!STEP 0.01
float distance;

//!PARAMETER
//!LABEL Curvature
//!DESC Enable or disable screen curvature effect
//!DEFAULT 1
//!MIN 0
//!MAX 1
//!STEP 1
int curvature;

//!PARAMETER
//!LABEL Curvature Radius
//!DESC Radius of the screen curvature (lower = more curved)
//!DEFAULT 2
//!MIN 0.1
//!MAX 10
//!STEP 0.1
float radius;

//!PARAMETER
//!LABEL Corner Size
//!DESC Size of the rounded corners
//!DEFAULT 0.03
//!MIN 0
//!MAX 1.0
//!STEP 0.01
float cornerSize;

//!PARAMETER
//!LABEL Corner Smoothness
//!DESC Smoothness of the corner transition
//!DEFAULT 1000
//!MIN 80
//!MAX 2000
//!STEP 1
int cornerSmooth;

//!PARAMETER
//!LABEL Horizontal Tilt
//!DESC Horizontal tilt angle of the simulated screen
//!DEFAULT 0
//!MIN -0.5
//!MAX 0.5
//!STEP 0.01
float xTilt;

//!PARAMETER
//!LABEL Vertical Tilt
//!DESC Vertical tilt angle of the simulated screen
//!DEFAULT 0
//!MIN -0.5
//!MAX 0.5
//!STEP 0.01
float yTilt;

//!PARAMETER
//!LABEL Horizontal Overscan
//!DESC Horizontal overscan percentage (100 = no overscan)
//!DEFAULT 100
//!MIN -125
//!MAX 125
//!STEP 1
int overScanX;

//!PARAMETER
//!LABEL Vertical Overscan
//!DESC Vertical overscan percentage (100 = no overscan)
//!DEFAULT 100
//!MIN -125
//!MAX 125
//!STEP 1
int overScanY;

//!PARAMETER
//!LABEL Dot Mask
//!DESC Intensity of the alternating dot mask pattern
//!DEFAULT 0.3
//!MIN 0
//!MAX 0.3
//!STEP 0.01
float dotMask;

//!PARAMETER
//!LABEL Sharpness
//!DESC Texture sampling sharpness multiplier
//!DEFAULT 1
//!MIN 1
//!MAX 3
//!STEP 1
int sharper;

//!PARAMETER
//!LABEL Scanline Weight
//!DESC Controls the width and intensity of scanlines
//!DEFAULT 0.3
//!MIN 0.1
//!MAX 0.5
//!STEP 0.01
float scanlineWeight;

//!PARAMETER
//!LABEL Luminance Boost
//!DESC Additional luminance added to the scanline calculation
//!DEFAULT 0
//!MIN 0
//!MAX 1
//!STEP 0.01
float lum;

//!PARAMETER
//!LABEL Interlacing
//!DESC Enable interlacing simulation (alternates scanlines each frame)
//!DEFAULT 0
//!MIN 0
//!MAX 1
//!STEP 1
int interlace;

//!TEXTURE
Texture2D INPUT;

//!TEXTURE
Texture2D OUTPUT;

//!SAMPLER
//!FILTER POINT
SamplerState sam;

//!PASS 1
//!STYLE PS
//!IN INPUT
//!OUT OUTPUT

#pragma warning(disable: 3571)

// Uncomment for faster gaussian-only beam profile
// #define USEGAUSSIAN

// Constants and macros
#define PI 3.141592653589
#define FIX(c) max(abs(c), 1e-5)
#define aspect float2(1.0, 0.75)

#define TEX2D(c) pow(INPUT.SampleLevel(sam, (c), 0), CRTGamma)

// Calculate intersection point on curved screen surface
float intersect(float2 xy, float4 sin_cos_angle) {
	float A = dot(xy, xy) + distance * distance;
	float B = 2.0 * (radius * (dot(xy, sin_cos_angle.xy) - distance * sin_cos_angle.zw.x * sin_cos_angle.zw.y) - distance * distance);
	float C = distance * distance + 2.0 * radius * distance * sin_cos_angle.zw.x * sin_cos_angle.zw.y;
	return (-B - sqrt(B * B - 4.0 * A * C)) / (2.0 * A);
}

// Backward transformation from screen to texture coordinates
float2 bkwtrans(float2 xy, float4 sin_cos_angle) {
	float c = intersect(xy, sin_cos_angle);
	float2 point_ = c * xy;
	point_ += radius * sin_cos_angle.xy;
	point_ /= radius;
	float2 tang = sin_cos_angle.xy / sin_cos_angle.zw;
	float2 poc = point_ / sin_cos_angle.zw;
	float A = dot(tang, tang) + 1.0;
	float B = -2.0 * dot(poc, tang);
	float C = dot(poc, poc) - 1.0;
	float a = (-B + sqrt(B * B - 4.0 * A * C)) / (2.0 * A);
	float2 uv = (point_ - a * sin_cos_angle.xy) / sin_cos_angle.zw;
	float r = FIX(radius * acos(a));
	return uv * r / sin(r / radius);
}

// Forward transformation from texture to screen coordinates
float2 fwtrans(float2 uv, float4 sin_cos_angle) {
	float r = FIX(sqrt(dot(uv, uv)));
	uv *= sin(r / radius) / r;
	float x = 1.0 - cos(r / radius);
	float D = distance / radius + x * sin_cos_angle.z * sin_cos_angle.w + dot(uv, sin_cos_angle.xy);
	return distance * (uv * sin_cos_angle.zw - x * sin_cos_angle.xy) / D;
}

// Calculate maximum scale factor for curvature
float3 maxscale(float4 sin_cos_angle) {
	float2 c = bkwtrans(-radius * sin_cos_angle.xy / (1.0 + radius / distance * sin_cos_angle.z * sin_cos_angle.w), sin_cos_angle);
	float2 a = 0.5 * aspect;
	float2 lo = float2(fwtrans(float2(-a.x, c.y), sin_cos_angle).x,
		fwtrans(float2(c.x, -a.y), sin_cos_angle).y) / aspect;
	float2 hi = float2(fwtrans(float2(+a.x, c.y), sin_cos_angle).x,
		fwtrans(float2(c.x, +a.y), sin_cos_angle).y) / aspect;
	return float3((hi + lo) * aspect * 0.5, max(hi.x - lo.x, hi.y - lo.y));
}

// Calculate scanline weight based on distance and color
// The beam width varies with brightness - brighter pixels have wider beams
float4 scanlineWeights(float distance1, float4 color) {
#ifdef USEGAUSSIAN
	float4 wid = 0.3 + 0.1 * pow(color, 3.0);
	float v = distance1 / (wid * scanline_weight / 0.3);
	float4 weights = { v, v, v, v };
	return (lum + 0.4) * exp(-weights * weights) / wid;
#else
	float4 wid = 2.0 + 2.0 * pow(color, 4.0);
	float v = distance1 / scanlineWeight;
	float4 weights = float4(v, v, v, v);
	return (lum + 1.4) * exp(-pow(weights * rsqrt(0.5 * wid), wid)) / (0.6 + 0.2 * wid);
#endif
}

float4 Pass1(float2 pos) {
	const uint2 outputSize = GetOutputSize();
	const uint2 inputSize = GetInputSize();

	float4 sin_cos_angle = { sin(float2(xTilt, yTilt)), cos(float2(xTilt, yTilt)) };
	float3 stretch = maxscale(sin_cos_angle);
	float2 TextureSize = float2(sharper * inputSize.x, inputSize.y);

	// Current pixel x-coordinate for mask calculation
	float mod_factor = pos.x * outputSize.x;
	// Interlacing factor: when interlace=1, denominator is 200; when interlace=0, denominator is 1000
	float2 ilfac = { 1.0, clamp(floor(inputSize.y / (200.0 * (-4 * interlace + 5))), 1.0, 2.0)};
	float2 one = ilfac / TextureSize;

	// Calculate texture coordinates with curvature
	float2 xy = 0.0;
	if (curvature > 0) {
		float2 cd = pos;
		cd = (cd - 0.5) * aspect * stretch.z + stretch.xy;
		xy = bkwtrans(cd, sin_cos_angle) / float2(overScanX / 100.0, overScanY / 100.0) / aspect + float2(0.5, 0.5);
	} else {
		xy = pos;
	}

	// Corner darkening calculation
	float2 cd2 = xy;
	cd2 = (cd2 - 0.5) * float2(overScanX, overScanY) / 100.0 + 0.5;
	cd2 = min(cd2, 1.0 - cd2) * aspect;
	float2 cdist = float2(cornerSize, cornerSize);
	cd2 = (cdist - min(cd2, cdist));
	float dist = sqrt(dot(cd2, cd2));
	float cval = clamp((cdist.x - dist) * cornerSmooth, 0.0, 1.0);

	float2 ratio_scale = (xy * TextureSize - 0.5) / ilfac;
	float filter = rcp(GetScale().y);
	float2 uv_ratio = frac(ratio_scale);

	// Snap to texel center
	xy = (floor(ratio_scale) * ilfac + 0.5) / TextureSize;

	// Lanczos2 kernel coefficients
	float4 coeffs = PI * float4(1.0 + uv_ratio.x, uv_ratio.x, 1.0 - uv_ratio.x, 2.0 - uv_ratio.x);
	coeffs = FIX(coeffs);
	coeffs = 2.0 * sin(coeffs) * sin(coeffs / 2.0) / (coeffs * coeffs);
	coeffs /= dot(coeffs, float4(1.0, 1.0, 1.0, 1.0));

	// Sample current and next scanline
	float4 col = clamp(mul(coeffs, float4x4(
		TEX2D(xy + float2(-one.x, 0.0)),
		TEX2D(xy),
		TEX2D(xy + float2(one.x, 0.0)),
		TEX2D(xy + float2(2.0 * one.x, 0.0)))),
		0.0, 1.0);
	float4 col2 = clamp(mul(coeffs, float4x4(
		TEX2D(xy + float2(-one.x, one.y)),
		TEX2D(xy + float2(0.0, one.y)),
		TEX2D(xy + one),
		TEX2D(xy + float2(2.0 * one.x, one.y)))),
		0.0, 1.0);

	col = pow(col, CRTGamma);
	col2 = pow(col2, CRTGamma);

	// Calculate scanline influence with subpixel filtering
	float4 weights = scanlineWeights(uv_ratio.y, col);
	float4 weights2 = scanlineWeights(1.0 - uv_ratio.y, col2);

	uv_ratio.y = uv_ratio.y + 1.0 / 3.0 * filter;
	weights = (weights + scanlineWeights(uv_ratio.y, col)) / 3.0;
	weights2 = (weights2 + scanlineWeights(abs(1.0 - uv_ratio.y), col2)) / 3.0;
	uv_ratio.y = uv_ratio.y - 2.0 / 3.0 * filter;
	weights = weights + scanlineWeights(abs(uv_ratio.y), col) / 3.0;
	weights2 = weights2 + scanlineWeights(abs(1.0 - uv_ratio.y), col2) / 3.0;

	float3 mul_res = (col * weights + col2 * weights2).rgb;
	mul_res *= float3(cval, cval, cval);

	// Dot mask - alternating green and magenta tint
	float3 dotMaskWeights = lerp(
		float3(1.0, 1.0 - dotMask, 1.0),
		float3(1.0 - dotMask, 1.0, 1.0 - dotMask),
		floor(fmod(mod_factor, 2.0))
	);
	mul_res *= dotMaskWeights;

	// Final gamma correction for display
	mul_res = pow(mul_res, 1.0 / monitorGamma);

	return float4(mul_res, 1.0);
}
