Skip to content

Commit

Permalink
fix(win/video): don't offload chroma subsampling math to texture samp…
Browse files Browse the repository at this point in the history
…ler when downscaling (#3014)

* Don't use sampler math for chroma if downscaling

* Correct portrait rotation offsets
  • Loading branch information
ns6089 committed Sep 3, 2024
1 parent 9d7e90e commit 7ce8547
Show file tree
Hide file tree
Showing 13 changed files with 177 additions and 93 deletions.
189 changes: 112 additions & 77 deletions src/platform/windows/display_vram.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@ namespace platf::dxgi {
blob_t convert_yuv420_packed_uv_type0_ps_linear_hlsl;
blob_t convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl;
blob_t convert_yuv420_packed_uv_type0_vs_hlsl;
blob_t convert_yuv420_packed_uv_type0s_ps_hlsl;
blob_t convert_yuv420_packed_uv_type0s_ps_linear_hlsl;
blob_t convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer_hlsl;
blob_t convert_yuv420_packed_uv_type0s_vs_hlsl;
blob_t convert_yuv420_planar_y_ps_hlsl;
blob_t convert_yuv420_planar_y_ps_linear_hlsl;
blob_t convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl;
Expand Down Expand Up @@ -488,6 +492,110 @@ namespace platf::dxgi {
frame_texture->AddRef();
output_texture.reset(frame_texture);

HRESULT status = S_OK;

#define create_vertex_shader_helper(x, y) \
if (FAILED(status = device->CreateVertexShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \
BOOST_LOG(error) << "Failed to create vertex shader " << #x << ": " << util::log_hex(status); \
return -1; \
}
#define create_pixel_shader_helper(x, y) \
if (FAILED(status = device->CreatePixelShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \
BOOST_LOG(error) << "Failed to create pixel shader " << #x << ": " << util::log_hex(status); \
return -1; \
}

const bool downscaling = display->width > width || display->height > height;

switch (format) {
case DXGI_FORMAT_NV12:
// Semi-planar 8-bit YUV 4:2:0
create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps);
create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
if (downscaling) {
create_vertex_shader_helper(convert_yuv420_packed_uv_type0s_vs_hlsl, convert_UV_vs);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_hlsl, convert_UV_ps);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_linear_hlsl, convert_UV_fp16_ps);
}
else {
create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
}
break;

case DXGI_FORMAT_P010:
// Semi-planar 16-bit YUV 4:2:0, 10 most significant bits store the value
create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps);
if (display->is_hdr()) {
create_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
}
if (downscaling) {
create_vertex_shader_helper(convert_yuv420_packed_uv_type0s_vs_hlsl, convert_UV_vs);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_hlsl, convert_UV_ps);
if (display->is_hdr()) {
create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_linear_hlsl, convert_UV_fp16_ps);
}
}
else {
create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
if (display->is_hdr()) {
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
}
}
break;

case DXGI_FORMAT_R16_UINT:
// Planar 16-bit YUV 4:4:4, 10 most significant bits store the value
create_vertex_shader_helper(convert_yuv444_planar_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv444_planar_ps_hlsl, convert_Y_or_YUV_ps);
if (display->is_hdr()) {
create_pixel_shader_helper(convert_yuv444_planar_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv444_planar_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
}
break;

case DXGI_FORMAT_AYUV:
// Packed 8-bit YUV 4:4:4
create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_hlsl, convert_Y_or_YUV_ps);
create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
break;

case DXGI_FORMAT_Y410:
// Packed 10-bit YUV 4:4:4
create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_hlsl, convert_Y_or_YUV_ps);
if (display->is_hdr()) {
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
}
break;

default:
BOOST_LOG(error) << "Unable to create shaders because of the unrecognized surface format";
return -1;
}

#undef create_vertex_shader_helper
#undef create_pixel_shader_helper

auto out_width = width;
auto out_height = height;

Expand Down Expand Up @@ -676,83 +784,6 @@ namespace platf::dxgi {
BOOST_LOG(warning) << "Failed to increase encoding GPU thread priority. Please run application as administrator for optimal performance.";
}

#define create_vertex_shader_helper(x, y) \
if (FAILED(status = device->CreateVertexShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \
BOOST_LOG(error) << "Failed to create vertex shader " << #x << ": " << util::log_hex(status); \
return -1; \
}
#define create_pixel_shader_helper(x, y) \
if (FAILED(status = device->CreatePixelShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \
BOOST_LOG(error) << "Failed to create pixel shader " << #x << ": " << util::log_hex(status); \
return -1; \
}

switch (format) {
case DXGI_FORMAT_NV12:
// Semi-planar 8-bit YUV 4:2:0
create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps);
create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
break;

case DXGI_FORMAT_P010:
// Semi-planar 16-bit YUV 4:2:0, 10 most significant bits store the value
create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps);
create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
if (display->is_hdr()) {
create_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
}
break;

case DXGI_FORMAT_R16_UINT:
// Planar 16-bit YUV 4:4:4, 10 most significant bits store the value
create_vertex_shader_helper(convert_yuv444_planar_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv444_planar_ps_hlsl, convert_Y_or_YUV_ps);
if (display->is_hdr()) {
create_pixel_shader_helper(convert_yuv444_planar_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv444_planar_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
}
break;

case DXGI_FORMAT_AYUV:
// Packed 8-bit YUV 4:4:4
create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_hlsl, convert_Y_or_YUV_ps);
create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
break;

case DXGI_FORMAT_Y410:
// Packed 10-bit YUV 4:4:4
create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_hlsl, convert_Y_or_YUV_ps);
if (display->is_hdr()) {
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
}
break;

default:
BOOST_LOG(error) << "Unable to create shaders because of the unrecognized surface format";
return -1;
}

#undef create_vertex_shader_helper
#undef create_pixel_shader_helper

auto default_color_vectors = ::video::color_vectors_from_colorspace(::video::colorspace_e::rec601, false);
if (!default_color_vectors) {
BOOST_LOG(error) << "Missing color vectors for Rec. 601"sv;
Expand Down Expand Up @@ -1923,6 +1954,10 @@ namespace platf::dxgi {
compile_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear);
compile_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer);
compile_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs);
compile_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps);
compile_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_linear);
compile_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer);
compile_vertex_shader_helper(convert_yuv420_packed_uv_type0s_vs);
compile_pixel_shader_helper(convert_yuv420_planar_y_ps);
compile_pixel_shader_helper(convert_yuv420_planar_y_ps_linear);
compile_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ cbuffer rotate_texture_steps_cbuffer : register(b1) {

vertex_t main_vs(uint vertex_id : SV_VertexID)
{
return generate_fullscreen_triangle_vertex(vertex_id, subsample_offset.x, rotate_texture_steps);
return generate_fullscreen_triangle_vertex(vertex_id, subsample_offset, rotate_texture_steps);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#include "include/convert_base.hlsl"

#define LEFT_SUBSAMPLING_SCALE

#include "include/convert_yuv420_packed_uv_ps_base.hlsl"
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#include "include/convert_linear_base.hlsl"

#define LEFT_SUBSAMPLING_SCALE

#include "include/convert_yuv420_packed_uv_ps_base.hlsl"
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#include "include/convert_perceptual_quantizer_base.hlsl"

#define LEFT_SUBSAMPLING_SCALE

#include "include/convert_yuv420_packed_uv_ps_base.hlsl"
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
cbuffer subsample_offset_cbuffer : register(b0) {
float2 subsample_offset;
};

cbuffer rotate_texture_steps_cbuffer : register(b1) {
int rotate_texture_steps;
};

#define LEFT_SUBSAMPLING_SCALE
#include "include/base_vs.hlsl"

vertex_t main_vs(uint vertex_id : SV_VertexID)
{
return generate_fullscreen_triangle_vertex(vertex_id, subsample_offset, rotate_texture_steps);
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ cbuffer rotate_texture_steps_cbuffer : register(b1) {

vertex_t main_vs(uint vertex_id : SV_VertexID)
{
return generate_fullscreen_triangle_vertex(vertex_id, rotate_texture_steps);
return generate_fullscreen_triangle_vertex(vertex_id, float2(0, 0), rotate_texture_steps);
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ cbuffer rotate_texture_steps_cbuffer : register(b1) {

vertex_t main_vs(uint vertex_id : SV_VertexID)
{
return generate_fullscreen_triangle_vertex(vertex_id, rotate_texture_steps);
return generate_fullscreen_triangle_vertex(vertex_id, float2(0, 0), rotate_texture_steps);
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ cbuffer color_matrix_cbuffer : register(b3) {

vertex_t main_vs(uint vertex_id : SV_VertexID)
{
vertex_t output = generate_fullscreen_triangle_vertex(vertex_id % 3, rotate_texture_steps);
vertex_t output = generate_fullscreen_triangle_vertex(vertex_id % 3, float2(0, 0), rotate_texture_steps);

output.viewport = vertex_id / 3;

Expand Down
2 changes: 1 addition & 1 deletion src_assets/windows/assets/shaders/directx/cursor_vs.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ cbuffer rotate_texture_steps_cbuffer : register(b2) {

vertex_t main_vs(uint vertex_id : SV_VertexID)
{
return generate_fullscreen_triangle_vertex(vertex_id, rotate_texture_steps);
return generate_fullscreen_triangle_vertex(vertex_id, float2(0, 0), rotate_texture_steps);
}
23 changes: 15 additions & 8 deletions src_assets/windows/assets/shaders/directx/include/base_vs.hlsl
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
#include "include/base_vs_types.hlsl"

#if defined(LEFT_SUBSAMPLING)
vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, float subsample_offset, int rotate_texture_steps)
#elif defined(TOPLEFT_SUBSAMPLING)
vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, float2 subsample_offset, int rotate_texture_steps)
#else
vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, int rotate_texture_steps)
#endif
{
vertex_t output;
float2 tex_coord;
Expand All @@ -30,11 +24,24 @@ vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, int rotate_texture_
sin(rotation_radians), cos(rotation_radians) };
float2 rotation_center = { 0.5, 0.5 };
tex_coord = round(rotation_center + mul(rotation_matrix, tex_coord - rotation_center));

if (rotate_texture_steps % 2) {
subsample_offset.xy = subsample_offset.yx;
}
}

#if defined(LEFT_SUBSAMPLING)
output.tex_right_left_center = float3(tex_coord.x, tex_coord.x - subsample_offset, tex_coord.y);
#elif defined (TOPLEFT_SUBSAMPLING)
output.tex_right_left_center = float3(tex_coord.x, tex_coord.x - subsample_offset.x, tex_coord.y);
#elif defined(LEFT_SUBSAMPLING_SCALE)
float2 halfsample_offset = subsample_offset / 2;
float3 right_center_left = float3(tex_coord.x + halfsample_offset.x,
tex_coord.x - halfsample_offset.x,
tex_coord.x - 3 * halfsample_offset.x);
float2 top_bottom = float2(tex_coord.y - halfsample_offset.y,
tex_coord.y + halfsample_offset.y);
output.tex_right_center_left_top = float4(right_center_left, top_bottom.x);
output.tex_right_center_left_bottom = float4(right_center_left, top_bottom.y);
#elif defined(TOPLEFT_SUBSAMPLING)
output.tex_right_left_top = float3(tex_coord.x, tex_coord.x - subsample_offset.x, tex_coord.y - subsample_offset.y);
output.tex_right_left_bottom = float3(tex_coord.x, tex_coord.x - subsample_offset.x, tex_coord.y);
#else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@ struct vertex_t
float4 viewpoint_pos : SV_Position;
#if defined(LEFT_SUBSAMPLING)
float3 tex_right_left_center : TEXCOORD;
#elif defined (TOPLEFT_SUBSAMPLING)
float3 tex_right_left_top : TEXCOORD;
float3 tex_right_left_bottom : TEXCOORD;
#elif defined(LEFT_SUBSAMPLING_SCALE)
float4 tex_right_center_left_top : TEXCOORD0;
float4 tex_right_center_left_bottom : TEXCOORD1;
#elif defined(TOPLEFT_SUBSAMPLING)
float3 tex_right_left_top : TEXCOORD0;
float3 tex_right_left_bottom : TEXCOORD1;
#else
float2 tex_coord : TEXCOORD;
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@ float2 main_ps(vertex_t input) : SV_Target
float3 rgb_left = image.Sample(def_sampler, input.tex_right_left_center.xz).rgb;
float3 rgb_right = image.Sample(def_sampler, input.tex_right_left_center.yz).rgb;
float3 rgb = CONVERT_FUNCTION((rgb_left + rgb_right) * 0.5);
#elif defined(LEFT_SUBSAMPLING_SCALE)
float3 rgb = image.Sample(def_sampler, input.tex_right_center_left_top.yw).rgb; // top-center
rgb += image.Sample(def_sampler, input.tex_right_center_left_bottom.yw).rgb; // bottom-center
rgb *= 2;
rgb += image.Sample(def_sampler, input.tex_right_center_left_top.xw).rgb; // top-right
rgb += image.Sample(def_sampler, input.tex_right_center_left_top.zw).rgb; // top-left
rgb += image.Sample(def_sampler, input.tex_right_center_left_bottom.xw).rgb; // bottom-right
rgb += image.Sample(def_sampler, input.tex_right_center_left_bottom.zw).rgb; // bottom-left
rgb = CONVERT_FUNCTION(rgb * (1./8));
#elif defined(TOPLEFT_SUBSAMPLING)
float3 rgb_top_left = image.Sample(def_sampler, input.tex_right_left_top.xz).rgb;
float3 rgb_top_right = image.Sample(def_sampler, input.tex_right_left_top.yz).rgb;
Expand Down

0 comments on commit 7ce8547

Please sign in to comment.