backend: gl_common: Use linear interpolation on GPU for blur kernels.

Make use of hardware linear interpolation in a GPU to sample 2 pixels
with a single texture access inside the blur shaders by sampling between
both pixels based on their relative weight.

This is significantly easier for a single dimension as 2D bilinear
filtering would raise additional constraints on the kernels (not single
zero-entries, no zero-diagonals, ...) which require additional checks
with limited improvements. Therfore, only use interpolation along the
larger dimension should be a sufficient improvement.

Using this will effectively half the number of texture accesses and
additions needed for a kernel. E.g. a 1D-pass of the gaussian blur
with radius 15 will only need 16 samples instead of 31.
This commit is contained in:
Bernd Busse 2020-06-07 12:41:32 +02:00
parent 4b0ff37b36
commit 88b1638487
No known key found for this signature in database
GPG Key ID: 6DD2A3C48E63A5AB
1 changed files with 55 additions and 12 deletions

View File

@ -996,7 +996,7 @@ void *gl_create_blur_context(backend_t *base, enum blur_method method, void *arg
}
);
static const char *FRAG_SHADER_BLUR_ADD = QUOTE(
sum += float(%.7g) * texture2D(tex_src, uv + pixel_norm * vec2(%d, %d));
sum += float(%.7g) * texture2D(tex_src, uv + pixel_norm * vec2(%.7g, %.7g));
);
// clang-format on
@ -1008,25 +1008,68 @@ void *gl_create_blur_context(backend_t *base, enum blur_method method, void *arg
// Build shader
int width = kern->w, height = kern->h;
int nele = width * height;
// '%.7g' is at most 14 characters, inserted 3 times
size_t body_len = (strlen(shader_add) + 42) * (uint)nele;
char *shader_body = ccalloc(body_len, char);
char *pc = shader_body;
// Make use of the linear interpolation hardware by sampling 2 pixels with
// one texture access by sampling between both pixels based on their
// relative weight. Easiest done in a single dimension as 2D bilinear
// filtering would raise additional constraints on the kernels. Therefore
// only use interpolation along the larger dimension.
double sum = 0.0;
if (width > height) {
// use interpolation in x dimension (width)
for (int j = 0; j < height; ++j) {
for (int k = 0; k < width; ++k) {
double val;
val = kern->data[j * width + k];
if (val == 0) {
for (int k = 0; k < width; k += 2) {
double val1, val2;
val1 = kern->data[j * width + k];
val2 = (k + 1 < width)
? kern->data[j * width + k + 1]
: 0;
double combined_weight = val1 + val2;
if (combined_weight == 0) {
continue;
}
sum += val;
pc += snprintf(pc, body_len - (ulong)(pc - shader_body),
FRAG_SHADER_BLUR_ADD, val, k - width / 2,
j - height / 2);
sum += combined_weight;
double offset_x =
k + (val2 / combined_weight) - (width / 2);
double offset_y = j - (height / 2);
pc += snprintf(
pc, body_len - (ulong)(pc - shader_body),
shader_add, combined_weight, offset_x, offset_y);
assert(pc < shader_body + body_len);
}
}
} else {
// use interpolation in y dimension (height)
for (int j = 0; j < height; j += 2) {
for (int k = 0; k < width; ++k) {
double val1, val2;
val1 = kern->data[j * width + k];
val2 = (j + 1 < height)
? kern->data[(j + 1) * width + k]
: 0;
double combined_weight = val1 + val2;
if (combined_weight == 0) {
continue;
}
sum += combined_weight;
double offset_x = k - (width / 2);
double offset_y =
j + (val2 / combined_weight) - (height / 2);
pc += snprintf(
pc, body_len - (ulong)(pc - shader_body),
shader_add, combined_weight, offset_x, offset_y);
assert(pc < shader_body + body_len);
}
}
}
auto pass = ctx->blur_shader + i;
size_t shader_len = strlen(FRAG_SHADER_BLUR) + strlen(extension) +