FFmpeg
vf_nlmeans_vulkan.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) Lynne
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/mem.h"
22 #include "libavutil/random_seed.h"
23 #include "libavutil/vulkan_spirv.h"
24 #include "libavutil/opt.h"
25 #include "vulkan_filter.h"
26 
27 #include "filters.h"
28 #include "video.h"
29 
30 #define TYPE_NAME "vec4"
31 #define TYPE_ELEMS 4
32 #define TYPE_SIZE (TYPE_ELEMS*4)
33 #define TYPE_BLOCK_ELEMS 16
34 #define TYPE_BLOCK_SIZE (TYPE_SIZE * TYPE_BLOCK_ELEMS)
35 #define WG_SIZE 32
36 
37 typedef struct NLMeansVulkanContext {
39 
43 
46 
48 
53 
54  int *xoffsets;
55  int *yoffsets;
57  float strength[4];
58  int patch[4];
59 
60  struct nlmeans_opts {
61  int r;
62  double s;
63  double sc[4];
64  int p;
65  int pc[4];
66  int t;
67  } opts;
69 
70 typedef struct IntegralPushData {
71  uint32_t width[4];
72  uint32_t height[4];
73  float strength[4];
74  uint32_t comp_off[4];
75  uint32_t comp_plane[4];
76  VkDeviceAddress integral_base;
77  uint64_t integral_size;
78  uint64_t int_stride;
79  uint32_t xyoffs_start;
80  uint32_t nb_components;
82 
83 static void shared_shd_def(FFVulkanShader *shd) {
84  GLSLC(0, #extension GL_ARB_gpu_shader_int64 : require );
85  GLSLC(0, );
86  GLSLF(0, #define DTYPE %s ,TYPE_NAME);
87  GLSLF(0, #define T_ALIGN %i ,TYPE_SIZE);
88  GLSLF(0, #define T_BLOCK_ELEMS %i ,TYPE_BLOCK_ELEMS);
89  GLSLF(0, #define T_BLOCK_ALIGN %i ,TYPE_BLOCK_SIZE);
90  GLSLC(0, );
91  GLSLC(0, layout(buffer_reference, buffer_reference_align = T_ALIGN) buffer DataBuffer { );
92  GLSLC(1, DTYPE v[]; );
93  GLSLC(0, }; );
94  GLSLC(0, struct Block { );
95  GLSLC(1, DTYPE data[T_BLOCK_ELEMS]; );
96  GLSLC(0, }; );
97  GLSLC(0, layout(buffer_reference, buffer_reference_align = T_BLOCK_ALIGN) buffer BlockBuffer { );
98  GLSLC(1, Block v[]; );
99  GLSLC(0, }; );
100  GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
101  GLSLC(1, uvec4 width; );
102  GLSLC(1, uvec4 height; );
103  GLSLC(1, vec4 strength; );
104  GLSLC(1, uvec4 comp_off; );
105  GLSLC(1, uvec4 comp_plane; );
106  GLSLC(1, DataBuffer integral_base; );
107  GLSLC(1, uint64_t integral_size; );
108  GLSLC(1, uint64_t int_stride; );
109  GLSLC(1, uint xyoffs_start; );
110  GLSLC(1, uint nb_components; );
111  GLSLC(0, }; );
112  GLSLC(0, );
113 
115  VK_SHADER_STAGE_COMPUTE_BIT);
116 }
117 
119  FFVulkanShader *shd_horizontal,
120  FFVulkanShader *shd_vertical,
121  FFVkSPIRVCompiler *spv,
122  const AVPixFmtDescriptor *desc, int planes)
123 {
124  int err;
125  uint8_t *spv_data;
126  size_t spv_len;
127  void *spv_opaque = NULL;
128  FFVulkanShader *shd;
130 
131  shd = shd_horizontal;
132  RET(ff_vk_shader_init(vkctx, shd, "nlmeans_horizontal",
133  VK_SHADER_STAGE_COMPUTE_BIT,
134  (const char *[]) { "GL_EXT_buffer_reference",
135  "GL_EXT_buffer_reference2" }, 2,
136  WG_SIZE, 1, 1,
137  0));
138  shared_shd_def(shd);
139 
140  GLSLC(0, );
141  GLSLC(0, void main() );
142  GLSLC(0, { );
143  GLSLC(1, uint64_t offset; );
144  GLSLC(1, DataBuffer dst; );
145  GLSLC(1, BlockBuffer b_dst; );
146  GLSLC(1, Block block; );
147  GLSLC(1, DTYPE s2; );
148  GLSLC(1, DTYPE prefix_sum; );
149  GLSLC(1, ivec2 pos; );
150  GLSLC(1, int k; );
151  GLSLC(1, int o; );
152  GLSLC(0, );
153  GLSLC(1, DataBuffer integral_data; );
154  GLSLC(0, );
155  GLSLC(1, uint c_plane; );
156  GLSLC(0, );
157  GLSLC(1, uint comp_idx = uint(gl_WorkGroupID.y); );
158  GLSLC(1, uint invoc_idx = uint(gl_WorkGroupID.z); );
159  GLSLC(0, );
160  GLSLC(1, if (strength[comp_idx] == 0.0) );
161  GLSLC(2, return; );
162  GLSLC(0, );
163  GLSLC(1, offset = integral_size * (invoc_idx * nb_components + comp_idx); );
164  GLSLC(1, integral_data = DataBuffer(uint64_t(integral_base) + offset); );
165  GLSLC(0, );
166  GLSLC(1, c_plane = comp_plane[comp_idx]; );
167  GLSLC(0, );
168  GLSLC(1, pos.y = int(gl_GlobalInvocationID.x); );
169  GLSLC(1, if (pos.y < height[c_plane]) { );
170  GLSLC(2, prefix_sum = DTYPE(0); );
171  GLSLC(2, offset = int_stride * uint64_t(pos.y); );
172  GLSLC(2, b_dst = BlockBuffer(uint64_t(integral_data) + offset); );
173  GLSLC(0, );
174  GLSLC(2, for (k = 0; k * T_BLOCK_ELEMS < width[c_plane]; k++) { );
175  GLSLC(3, block = b_dst.v[k]; );
176  GLSLC(3, for (o = 0; o < T_BLOCK_ELEMS; o++) { );
177  GLSLC(4, s2 = block.data[o]; );
178  GLSLC(4, block.data[o] = s2 + prefix_sum; );
179  GLSLC(4, prefix_sum += s2; );
180  GLSLC(3, } );
181  GLSLC(3, b_dst.v[k] = block; );
182  GLSLC(2, } );
183  GLSLC(1, } );
184  GLSLC(0, } );
185 
186  RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", &spv_opaque));
187  RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main"));
188 
189  RET(ff_vk_shader_register_exec(vkctx, exec, shd));
190 
191  shd = shd_vertical;
192  RET(ff_vk_shader_init(vkctx, shd, "nlmeans_vertical",
193  VK_SHADER_STAGE_COMPUTE_BIT,
194  (const char *[]) { "GL_EXT_buffer_reference",
195  "GL_EXT_buffer_reference2" }, 2,
196  WG_SIZE, 1, 1,
197  0));
198  shared_shd_def(shd);
199 
200  desc_set = (FFVulkanDescriptorSetBinding []) {
201  {
202  .name = "input_img",
203  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
204  .mem_layout = ff_vk_shader_rep_fmt(vkctx->input_format, FF_VK_REP_FLOAT),
205  .mem_quali = "readonly",
206  .dimensions = 2,
207  .elems = planes,
208  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
209  },
210  };
211  RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 1, 0, 0));
212 
213  desc_set = (FFVulkanDescriptorSetBinding []) {
214  {
215  .name = "xyoffsets_buffer",
216  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
217  .mem_quali = "readonly",
218  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
219  .buf_content = "ivec2 xyoffsets[];",
220  },
221  };
222  RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 1, 1, 0));
223 
224  GLSLC(0, );
225  GLSLC(0, void main() );
226  GLSLC(0, { );
227  GLSLC(1, uint64_t offset; );
228  GLSLC(1, DataBuffer dst; );
229  GLSLC(1, float s1; );
230  GLSLC(1, DTYPE s2; );
231  GLSLC(1, DTYPE prefix_sum; );
232  GLSLC(1, uvec2 size; );
233  GLSLC(1, ivec2 pos; );
234  GLSLC(1, ivec2 pos_off; );
235  GLSLC(0, );
236  GLSLC(1, DataBuffer integral_data; );
237  GLSLF(1, ivec2 offs[%i]; ,TYPE_ELEMS);
238  GLSLC(0, );
239  GLSLC(1, uint c_off; );
240  GLSLC(1, uint c_plane; );
241  GLSLC(0, );
242  GLSLC(1, uint comp_idx = uint(gl_WorkGroupID.y); );
243  GLSLC(1, uint invoc_idx = uint(gl_WorkGroupID.z); );
244  GLSLC(0, );
245  GLSLC(1, if (strength[comp_idx] == 0.0) );
246  GLSLC(2, return; );
247  GLSLC(0, );
248  GLSLC(1, offset = integral_size * (invoc_idx * nb_components + comp_idx); );
249  GLSLC(1, integral_data = DataBuffer(uint64_t(integral_base) + offset); );
250  for (int i = 0; i < TYPE_ELEMS; i++)
251  GLSLF(1, offs[%i] = xyoffsets[xyoffs_start + %i*invoc_idx + %i]; ,i,TYPE_ELEMS,i);
252  GLSLC(0, );
253  GLSLC(1, c_off = comp_off[comp_idx]; );
254  GLSLC(1, c_plane = comp_plane[comp_idx]; );
255  GLSLC(1, size = imageSize(input_img[c_plane]); );
256  GLSLC(0, );
257  GLSLC(1, pos.x = int(gl_GlobalInvocationID.x); );
258  GLSLC(1, if (pos.x < width[c_plane]) { );
259  GLSLC(2, prefix_sum = DTYPE(0); );
260  GLSLC(2, for (pos.y = 0; pos.y < height[c_plane]; pos.y++) { );
261  GLSLC(3, offset = int_stride * uint64_t(pos.y); );
262  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
263  GLSLC(4, s1 = imageLoad(input_img[c_plane], pos)[c_off]; );
264  for (int i = 0; i < TYPE_ELEMS; i++) {
265  GLSLF(4, pos_off = pos + offs[%i]; ,i);
266  GLSLC(4, if (!IS_WITHIN(uvec2(pos_off), size)) );
267  GLSLF(5, s2[%i] = s1; ,i);
268  GLSLC(4, else );
269  GLSLF(5, s2[%i] = imageLoad(input_img[c_plane], pos_off)[c_off]; ,i);
270  }
271  GLSLC(4, s2 = (s1 - s2) * (s1 - s2); );
272  GLSLC(3, dst.v[pos.x] = s2 + prefix_sum; );
273  GLSLC(3, prefix_sum += s2; );
274  GLSLC(2, } );
275  GLSLC(1, } );
276  GLSLC(0, } );
277 
278  RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", &spv_opaque));
279  RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main"));
280 
281  RET(ff_vk_shader_register_exec(vkctx, exec, shd));
282 
283 fail:
284  if (spv_opaque)
285  spv->free_shader(spv, &spv_opaque);
286 
287  return err;
288 }
289 
290 typedef struct WeightsPushData {
291  uint32_t width[4];
292  uint32_t height[4];
293  uint32_t ws_offset[4];
294  uint32_t ws_stride[4];
296  float strength[4];
297  uint32_t comp_off[4];
298  uint32_t comp_plane[4];
299  VkDeviceAddress integral_base;
300  uint64_t integral_size;
301  uint64_t int_stride;
302  uint32_t xyoffs_start;
303  uint32_t ws_count;
304  uint32_t nb_components;
306 
308  FFVulkanShader *shd,
309  FFVkSPIRVCompiler *spv,
310  const AVPixFmtDescriptor *desc,
311  int planes)
312 {
313  int err;
314  uint8_t *spv_data;
315  size_t spv_len;
316  void *spv_opaque = NULL;
318 
319  RET(ff_vk_shader_init(vkctx, shd, "nlmeans_weights",
320  VK_SHADER_STAGE_COMPUTE_BIT,
321  (const char *[]) { "GL_EXT_buffer_reference",
322  "GL_EXT_buffer_reference2" }, 2,
323  WG_SIZE, WG_SIZE, 1,
324  0));
325 
326  GLSLC(0, #extension GL_ARB_gpu_shader_int64 : require );
327  GLSLC(0, );
328  GLSLF(0, #define DTYPE %s ,TYPE_NAME);
329  GLSLF(0, #define T_ALIGN %i ,TYPE_SIZE);
330  GLSLC(0, );
331  GLSLC(0, layout(buffer_reference, buffer_reference_align = T_ALIGN) buffer DataBuffer { );
332  GLSLC(1, DTYPE v[]; );
333  GLSLC(0, }; );
334  GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
335  GLSLC(1, uvec4 width; );
336  GLSLC(1, uvec4 height; );
337  GLSLC(1, uvec4 ws_offset; );
338  GLSLC(1, uvec4 ws_stride; );
339  GLSLC(1, ivec4 patch_size; );
340  GLSLC(1, vec4 strength; );
341  GLSLC(1, uvec4 comp_off; );
342  GLSLC(1, uvec4 comp_plane; );
343  GLSLC(1, DataBuffer integral_base; );
344  GLSLC(1, uint64_t integral_size; );
345  GLSLC(1, uint64_t int_stride; );
346  GLSLC(1, uint xyoffs_start; );
347  GLSLC(1, uint ws_count; );
348  GLSLC(1, uint nb_components; );
349  GLSLC(0, }; );
350  GLSLC(0, );
351 
353  VK_SHADER_STAGE_COMPUTE_BIT);
354 
355  desc_set = (FFVulkanDescriptorSetBinding []) {
356  {
357  .name = "input_img",
358  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
359  .mem_layout = ff_vk_shader_rep_fmt(vkctx->input_format, FF_VK_REP_FLOAT),
360  .mem_quali = "readonly",
361  .dimensions = 2,
362  .elems = planes,
363  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
364  },
365  {
366  .name = "weights_buffer",
367  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
368  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
369  .buf_content = "float weights[];",
370  },
371  {
372  .name = "sums_buffer",
373  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
374  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
375  .buf_content = "float sums[];",
376  },
377  };
378  RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 3, 0, 0));
379 
380  desc_set = (FFVulkanDescriptorSetBinding []) {
381  {
382  .name = "xyoffsets_buffer",
383  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
384  .mem_quali = "readonly",
385  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
386  .buf_content = "ivec2 xyoffsets[];",
387  },
388  };
389  RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 1, 1, 0));
390 
391  GLSLC(0, );
392  GLSLC(0, void main() );
393  GLSLC(0, { );
394  GLSLC(1, uint64_t offset; );
395  GLSLC(1, DataBuffer dst; );
396  GLSLC(1, uvec2 size; );
397  GLSLC(1, ivec2 pos; );
398  GLSLC(1, ivec2 pos_off; );
399  GLSLC(1, int p; );
400  GLSLC(1, float s; );
401  GLSLC(0, );
402  GLSLC(1, DataBuffer integral_data; );
403  GLSLF(1, ivec2 offs[%i]; ,TYPE_ELEMS);
404  GLSLC(0, );
405  GLSLC(1, uint c_off; );
406  GLSLC(1, uint c_plane; );
407  GLSLC(1, uint ws_off; );
408  GLSLC(0, );
409  GLSLC(1, pos = ivec2(gl_GlobalInvocationID.xy); );
410  GLSLC(1, uint comp_idx = uint(gl_WorkGroupID.z) %% nb_components; );
411  GLSLC(1, uint invoc_idx = uint(gl_WorkGroupID.z) / nb_components; );
412  GLSLC(0, );
413  GLSLC(1, c_off = comp_off[comp_idx]; );
414  GLSLC(1, c_plane = comp_plane[comp_idx]; );
415  GLSLC(1, p = patch_size[comp_idx]; );
416  GLSLC(1, s = strength[comp_idx]; );
417  GLSLC(1, if (s == 0.0 || pos.x < p || pos.y < p || pos.x >= width[c_plane] - p || pos.y >= height[c_plane] - p) );
418  GLSLC(2, return; );
419  GLSLC(0, );
420  GLSLC(1, offset = integral_size * (invoc_idx * nb_components + comp_idx); );
421  GLSLC(1, integral_data = DataBuffer(uint64_t(integral_base) + offset); );
422  for (int i = 0; i < TYPE_ELEMS; i++)
423  GLSLF(1, offs[%i] = xyoffsets[xyoffs_start + %i*invoc_idx + %i]; ,i,TYPE_ELEMS,i);
424  GLSLC(0, );
425  GLSLC(1, ws_off = ws_count * invoc_idx + ws_offset[comp_idx] + pos.y * ws_stride[comp_idx] + pos.x; );
426  GLSLC(1, size = imageSize(input_img[c_plane]); );
427  GLSLC(0, );
428  GLSLC(1, DTYPE a; );
429  GLSLC(1, DTYPE b; );
430  GLSLC(1, DTYPE c; );
431  GLSLC(1, DTYPE d; );
432  GLSLC(0, );
433  GLSLC(1, DTYPE patch_diff; );
434  GLSLC(1, vec4 src; );
435  GLSLC(1, vec4 w; );
436  GLSLC(1, float w_sum; );
437  GLSLC(1, float sum; );
438  GLSLC(0, );
439  for (int i = 0; i < 4; i++) {
440  GLSLF(1, pos_off = pos + offs[%i]; ,i);
441  GLSLC(1, if (!IS_WITHIN(uvec2(pos_off), size)) );
442  GLSLF(2, src[%i] = imageLoad(input_img[c_plane], pos)[c_off]; ,i);
443  GLSLC(1, else );
444  GLSLF(2, src[%i] = imageLoad(input_img[c_plane], pos_off)[c_off]; ,i);
445  }
446  GLSLC(0, );
447  GLSLC(1, offset = int_stride * uint64_t(pos.y - p); );
448  GLSLC(1, dst = DataBuffer(uint64_t(integral_data) + offset); );
449  GLSLC(1, a = dst.v[pos.x - p]; );
450  GLSLC(1, c = dst.v[pos.x + p]; );
451  GLSLC(1, offset = int_stride * uint64_t(pos.y + p); );
452  GLSLC(1, dst = DataBuffer(uint64_t(integral_data) + offset); );
453  GLSLC(1, b = dst.v[pos.x - p]; );
454  GLSLC(1, d = dst.v[pos.x + p]; );
455  GLSLC(0, );
456  GLSLC(1, patch_diff = d + a - b - c; );
457  GLSLC(1, w = exp(patch_diff * s); );
458  GLSLC(1, w_sum = w[0] + w[1] + w[2] + w[3]; );
459  GLSLC(1, sum = dot(w, src * 255); );
460  GLSLC(0, );
461  GLSLC(1, weights[ws_off] += w_sum; );
462  GLSLC(1, sums[ws_off] += sum; );
463  GLSLC(0, } );
464 
465  RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", &spv_opaque));
466  RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main"));
467 
468  RET(ff_vk_shader_register_exec(vkctx, exec, shd));
469 
470 fail:
471  if (spv_opaque)
472  spv->free_shader(spv, &spv_opaque);
473 
474  return err;
475 }
476 
477 typedef struct DenoisePushData {
478  uint32_t comp_off[4];
479  uint32_t comp_plane[4];
480  uint32_t ws_offset[4];
481  uint32_t ws_stride[4];
482  uint32_t ws_count;
483  uint32_t t;
484  uint32_t nb_components;
486 
489  const AVPixFmtDescriptor *desc, int planes)
490 {
491  int err;
492  uint8_t *spv_data;
493  size_t spv_len;
494  void *spv_opaque = NULL;
496  RET(ff_vk_shader_init(vkctx, shd, "nlmeans_denoise",
497  VK_SHADER_STAGE_COMPUTE_BIT,
498  (const char *[]) { "GL_EXT_buffer_reference",
499  "GL_EXT_buffer_reference2" }, 2,
500  WG_SIZE, WG_SIZE, 1,
501  0));
502 
503  GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
504  GLSLC(1, uvec4 comp_off; );
505  GLSLC(1, uvec4 comp_plane; );
506  GLSLC(1, uvec4 ws_offset; );
507  GLSLC(1, uvec4 ws_stride; );
508  GLSLC(1, uint32_t ws_count; );
509  GLSLC(1, uint32_t t; );
510  GLSLC(1, uint32_t nb_components; );
511  GLSLC(0, }; );
512 
514  VK_SHADER_STAGE_COMPUTE_BIT);
515 
516  desc_set = (FFVulkanDescriptorSetBinding []) {
517  {
518  .name = "input_img",
519  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
520  .mem_layout = ff_vk_shader_rep_fmt(vkctx->input_format, FF_VK_REP_FLOAT),
521  .mem_quali = "readonly",
522  .dimensions = 2,
523  .elems = planes,
524  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
525  },
526  {
527  .name = "output_img",
528  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
529  .mem_layout = ff_vk_shader_rep_fmt(vkctx->output_format, FF_VK_REP_FLOAT),
530  .mem_quali = "writeonly",
531  .dimensions = 2,
532  .elems = planes,
533  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
534  },
535  };
536  RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 2, 0, 0));
537 
538  desc_set = (FFVulkanDescriptorSetBinding []) {
539  {
540  .name = "weights_buffer",
541  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
542  .mem_quali = "readonly",
543  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
544  .buf_content = "float weights[];",
545  },
546  {
547  .name = "sums_buffer",
548  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
549  .mem_quali = "readonly",
550  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
551  .buf_content = "float sums[];",
552  },
553  };
554 
555  RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 2, 0, 0));
556 
557  GLSLC(0, void main() );
558  GLSLC(0, { );
559  GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
560  GLSLC(1, const uint plane = uint(gl_WorkGroupID.z); );
561  GLSLC(1, const uvec2 size = imageSize(output_img[plane]); );
562  GLSLC(0, );
563  GLSLC(1, uint c_off; );
564  GLSLC(1, uint c_plane; );
565  GLSLC(1, uint ws_off; );
566  GLSLC(0, );
567  GLSLC(1, float w_sum; );
568  GLSLC(1, float sum; );
569  GLSLC(1, vec4 src; );
570  GLSLC(1, vec4 r; );
571  GLSLC(1, uint invoc_idx; );
572  GLSLC(1, uint comp_idx; );
573  GLSLC(0, );
574  GLSLC(1, if (!IS_WITHIN(pos, size)) );
575  GLSLC(2, return; );
576  GLSLC(0, );
577  GLSLC(1, src = imageLoad(input_img[plane], pos); );
578  GLSLC(1, for (comp_idx = 0; comp_idx < nb_components; comp_idx++) { );
579  GLSLC(2, if (plane == comp_plane[comp_idx]) { );
580  GLSLC(3, w_sum = 0.0; );
581  GLSLC(3, sum = 0.0; );
582  GLSLC(3, for (invoc_idx = 0; invoc_idx < t; invoc_idx++) { );
583  GLSLC(4, ws_off = ws_count * invoc_idx + ws_offset[comp_idx] + pos.y * ws_stride[comp_idx] + pos.x; );
584  GLSLC(4, w_sum += weights[ws_off]; );
585  GLSLC(4, sum += sums[ws_off]; );
586  GLSLC(3, } );
587  GLSLC(3, c_off = comp_off[comp_idx]; );
588  GLSLC(3, r[c_off] = (sum + src[c_off] * 255) / (1.0 + w_sum) / 255; );
589  GLSLC(2, } );
590  GLSLC(1, } );
591  GLSLC(1, imageStore(output_img[plane], pos, r); );
592  GLSLC(0, } );
593 
594  RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", &spv_opaque));
595  RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main"));
596 
597  RET(ff_vk_shader_register_exec(vkctx, exec, shd));
598 
599 fail:
600  if (spv_opaque)
601  spv->free_shader(spv, &spv_opaque);
602 
603  return err;
604 }
605 
607 {
608  int rad, err;
609  int xcnt = 0, ycnt = 0;
610  NLMeansVulkanContext *s = ctx->priv;
611  FFVulkanContext *vkctx = &s->vkctx;
612  const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
613  FFVkSPIRVCompiler *spv = NULL;
614  int *offsets_buf;
615  int offsets_dispatched = 0, nb_dispatches = 0;
616 
617  const AVPixFmtDescriptor *desc;
619  if (!desc)
620  return AVERROR(EINVAL);
621 
622  if (!(s->opts.r & 1)) {
623  s->opts.r |= 1;
624  av_log(ctx, AV_LOG_WARNING, "Research size should be odd, setting to %i",
625  s->opts.r);
626  }
627 
628  if (!(s->opts.p & 1)) {
629  s->opts.p |= 1;
630  av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i",
631  s->opts.p);
632  }
633 
634  for (int i = 0; i < 4; i++) {
635  double str = !isnan(s->opts.sc[i]) ? s->opts.sc[i] : s->opts.s;
636  int ps = (s->opts.pc[i] ? s->opts.pc[i] : s->opts.p);
637  if (str == 0.0) {
638  s->strength[i] = 0.0;
639  } else {
640  str = 10.0f*str;
641  str *= -str;
642  str = 255.0*255.0 / str;
643  s->strength[i] = str;
644  }
645  if (!(ps & 1)) {
646  ps |= 1;
647  av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i",
648  ps);
649  }
650  s->patch[i] = ps / 2;
651  }
652 
653  rad = s->opts.r/2;
654  s->nb_offsets = (2*rad + 1)*(2*rad + 1) - 1;
655  s->xoffsets = av_malloc(s->nb_offsets*sizeof(*s->xoffsets));
656  s->yoffsets = av_malloc(s->nb_offsets*sizeof(*s->yoffsets));
657  s->nb_offsets = 0;
658 
659  for (int x = -rad; x <= rad; x++) {
660  for (int y = -rad; y <= rad; y++) {
661  if (!x && !y)
662  continue;
663 
664  s->xoffsets[xcnt++] = x;
665  s->yoffsets[ycnt++] = y;
666  s->nb_offsets++;
667  }
668  }
669 
670  RET(ff_vk_create_buf(&s->vkctx, &s->xyoffsets_buf, 2*s->nb_offsets*sizeof(int32_t), NULL, NULL,
671  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
672  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
673  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
674  RET(ff_vk_map_buffer(&s->vkctx, &s->xyoffsets_buf, (uint8_t **)&offsets_buf, 0));
675 
676  for (int i = 0; i < 2*s->nb_offsets; i += 2) {
677  offsets_buf[i + 0] = s->xoffsets[i >> 1];
678  offsets_buf[i + 1] = s->yoffsets[i >> 1];
679  }
680 
681  RET(ff_vk_unmap_buffer(&s->vkctx, &s->xyoffsets_buf, 1));
682 
683  s->opts.t = FFMIN(s->opts.t, (FFALIGN(s->nb_offsets, TYPE_ELEMS) / TYPE_ELEMS));
684 
685  spv = ff_vk_spirv_init();
686  if (!spv) {
687  av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
688  return AVERROR_EXTERNAL;
689  }
690 
691  s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0);
692  if (!s->qf) {
693  av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n");
694  err = AVERROR(ENOTSUP);
695  goto fail;
696  }
697 
698  RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, 1, 0, 0, 0, NULL));
699 
700  RET(init_integral_pipeline(vkctx, &s->e, &s->shd_horizontal, &s->shd_vertical,
701  spv, desc, planes));
702 
703  RET(init_weights_pipeline(vkctx, &s->e, &s->shd_weights, spv, desc, planes));
704 
705  RET(init_denoise_pipeline(vkctx, &s->e, &s->shd_denoise, spv, desc, planes));
706 
707  RET(ff_vk_shader_update_desc_buffer(vkctx, &s->e.contexts[0], &s->shd_vertical,
708  1, 0, 0,
709  &s->xyoffsets_buf, 0, s->xyoffsets_buf.size,
710  VK_FORMAT_UNDEFINED));
711 
712  RET(ff_vk_shader_update_desc_buffer(vkctx, &s->e.contexts[0], &s->shd_weights,
713  1, 0, 0,
714  &s->xyoffsets_buf, 0, s->xyoffsets_buf.size,
715  VK_FORMAT_UNDEFINED));
716 
717  do {
718  int wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t);
719  offsets_dispatched += wg_invoc * TYPE_ELEMS;
720  nb_dispatches++;
721  } while (offsets_dispatched < s->nb_offsets);
722 
723  av_log(ctx, AV_LOG_VERBOSE, "Filter initialized, %i x/y offsets, %i dispatches\n",
724  s->nb_offsets, nb_dispatches);
725 
726  s->initialized = 1;
727 
728 fail:
729  if (spv)
730  spv->uninit(&spv);
731 
732  return err;
733 }
734 
736  FFVkBuffer *ws_vk, uint32_t comp_offs[4], uint32_t comp_planes[4],
737  uint32_t ws_offset[4], uint32_t ws_stride[4],
738  uint32_t ws_count, uint32_t t, uint32_t nb_components)
739 {
740  FFVulkanContext *vkctx = &s->vkctx;
741  FFVulkanFunctions *vk = &vkctx->vkfn;
742 
743  DenoisePushData pd = {
744  { comp_offs[0], comp_offs[1], comp_offs[2], comp_offs[3] },
745  { comp_planes[0], comp_planes[1], comp_planes[2], comp_planes[3] },
746  { ws_offset[0], ws_offset[1], ws_offset[2], ws_offset[3] },
747  { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
748  ws_count,
749  t,
750  nb_components,
751  };
752 
753  /* Denoise pass pipeline */
754  ff_vk_exec_bind_shader(vkctx, exec, &s->shd_denoise);
755 
756  /* Push data */
757  ff_vk_shader_update_push_const(vkctx, exec, &s->shd_denoise,
758  VK_SHADER_STAGE_COMPUTE_BIT,
759  0, sizeof(pd), &pd);
760 
761  VkBufferMemoryBarrier2 buf_bar;
762  ff_vk_buf_barrier(buf_bar, ws_vk,
763  COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT,
764  SHADER_STORAGE_WRITE_BIT,
765  COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT, NONE_KHR,
766  0, VK_WHOLE_SIZE);
767  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
768  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
769  .pBufferMemoryBarriers = &buf_bar,
770  .bufferMemoryBarrierCount = 1,
771  });
772 
773  /* End of denoise pass */
774  vk->CmdDispatch(exec->buf,
775  FFALIGN(vkctx->output_width, s->shd_denoise.lg_size[0])/s->shd_denoise.lg_size[0],
776  FFALIGN(vkctx->output_height, s->shd_denoise.lg_size[1])/s->shd_denoise.lg_size[1],
777  av_pix_fmt_count_planes(s->vkctx.output_format));
778 
779  return 0;
780 }
781 
783 {
784  int err;
785  AVFrame *out = NULL;
786  AVFilterContext *ctx = link->dst;
787  NLMeansVulkanContext *s = ctx->priv;
788  AVFilterLink *outlink = ctx->outputs[0];
789  FFVulkanContext *vkctx = &s->vkctx;
790  FFVulkanFunctions *vk = &vkctx->vkfn;
791 
792  const AVPixFmtDescriptor *desc;
793  int comp_offs[4];
794  int comp_planes[4];
795  int plane_widths[4];
796  int plane_heights[4];
797 
798  int offsets_dispatched = 0;
799 
800  /* Integral */
801  AVBufferRef *integral_buf = NULL;
802  FFVkBuffer *integral_vk;
803  size_t int_stride;
804  size_t int_size;
805 
806  /* Weights/sums */
807  AVBufferRef *ws_buf = NULL;
808  FFVkBuffer *ws_vk;
809  uint32_t ws_count = 0;
810  uint32_t ws_offset[4];
811  uint32_t ws_stride[4];
812  size_t ws_size;
813 
814  FFVkExecContext *exec;
815  VkImageView in_views[AV_NUM_DATA_POINTERS];
816  VkImageView out_views[AV_NUM_DATA_POINTERS];
817  VkImageMemoryBarrier2 img_bar[8];
818  int nb_img_bar = 0;
819  VkBufferMemoryBarrier2 buf_bar[2];
820  int nb_buf_bar = 0;
821 
822  if (!s->initialized)
823  RET(init_filter(ctx));
824 
826  if (!desc)
827  return AVERROR(EINVAL);
828 
829  /* Integral image */
830  int_stride = FFALIGN(vkctx->output_width, s->shd_vertical.lg_size[0]) * TYPE_SIZE;
831  int_size = FFALIGN(vkctx->output_height, s->shd_horizontal.lg_size[0]) * int_stride;
832 
833  /* Plane dimensions */
834  for (int i = 0; i < desc->nb_components; i++) {
835  plane_widths[i] = !i || (i == 3) ? vkctx->output_width : AV_CEIL_RSHIFT(vkctx->output_width, desc->log2_chroma_w);
836  plane_heights[i] = !i || (i == 3) ? vkctx->output_height : AV_CEIL_RSHIFT(vkctx->output_height, desc->log2_chroma_h);
837  plane_widths[i] = FFALIGN(plane_widths[i], s->shd_denoise.lg_size[0]);
838  plane_heights[i] = FFALIGN(plane_heights[i], s->shd_denoise.lg_size[1]);
839 
840  comp_offs[i] = desc->comp[i].offset / (FFALIGN(desc->comp[i].depth, 8)/8);
841  comp_planes[i] = desc->comp[i].plane;
842 
843  ws_stride[i] = plane_widths[i];
844  ws_offset[i] = ws_count;
845  ws_count += ws_stride[i] * plane_heights[i];
846  }
847 
848  ws_size = ws_count * sizeof(float);
849 
850  /* Buffers */
851  err = ff_vk_get_pooled_buffer(&s->vkctx, &s->integral_buf_pool, &integral_buf,
852  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
853  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
854  NULL,
855  int_size * s->opts.t * desc->nb_components,
856  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
857  if (err < 0)
858  return err;
859  integral_vk = (FFVkBuffer *)integral_buf->data;
860 
861  err = ff_vk_get_pooled_buffer(&s->vkctx, &s->ws_buf_pool, &ws_buf,
862  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
863  VK_BUFFER_USAGE_TRANSFER_DST_BIT,
864  NULL,
865  ws_size * s-> opts.t * 2,
866  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
867  if (err < 0)
868  return err;
869  ws_vk = (FFVkBuffer *)ws_buf->data;
870 
871  /* Output frame */
872  out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
873  if (!out) {
874  err = AVERROR(ENOMEM);
875  goto fail;
876  }
877 
878  /* Execution context */
879  exec = ff_vk_exec_get(&s->vkctx, &s->e);
880  ff_vk_exec_start(vkctx, exec);
881 
882  /* Dependencies */
883  RET(ff_vk_exec_add_dep_frame(vkctx, exec, in,
884  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
885  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
886  RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
887  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
888  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
889 
890  RET(ff_vk_exec_add_dep_buf(vkctx, exec, &integral_buf, 1, 0));
891  integral_buf = NULL;
892 
893  RET(ff_vk_exec_add_dep_buf(vkctx, exec, &ws_buf, 1, 0));
894  ws_buf = NULL;
895 
896  /* Input frame prep */
897  RET(ff_vk_create_imageviews(vkctx, exec, in_views, in, FF_VK_REP_FLOAT));
898  ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar,
899  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
900  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
901  VK_ACCESS_SHADER_READ_BIT,
902  VK_IMAGE_LAYOUT_GENERAL,
903  VK_QUEUE_FAMILY_IGNORED);
904 
905  /* Output frame prep */
906  RET(ff_vk_create_imageviews(vkctx, exec, out_views, out, FF_VK_REP_FLOAT));
907  ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
908  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
909  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
910  VK_ACCESS_SHADER_WRITE_BIT,
911  VK_IMAGE_LAYOUT_GENERAL,
912  VK_QUEUE_FAMILY_IGNORED);
913 
914  ff_vk_buf_barrier(buf_bar[nb_buf_bar++], ws_vk,
915  ALL_COMMANDS_BIT, NONE_KHR, NONE_KHR,
916  TRANSFER_BIT, TRANSFER_WRITE_BIT, NONE_KHR,
917  0, VK_WHOLE_SIZE);
918  ff_vk_buf_barrier(buf_bar[nb_buf_bar++], integral_vk,
919  ALL_COMMANDS_BIT, NONE_KHR, NONE_KHR,
920  COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT, NONE_KHR,
921  0, VK_WHOLE_SIZE);
922  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
923  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
924  .pImageMemoryBarriers = img_bar,
925  .imageMemoryBarrierCount = nb_img_bar,
926  .pBufferMemoryBarriers = buf_bar,
927  .bufferMemoryBarrierCount = nb_buf_bar,
928  });
929  nb_buf_bar = 0;
930  nb_img_bar = 0;
931 
932  /* Buffer zeroing */
933  vk->CmdFillBuffer(exec->buf, ws_vk->buf, 0, ws_vk->size, 0x0);
934 
935  /* Update integral descriptors */
936  ff_vk_shader_update_img_array(vkctx, exec, &s->shd_vertical, in, in_views, 0, 0,
937  VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE);
938  /* Update weights descriptors */
939  ff_vk_shader_update_img_array(vkctx, exec, &s->shd_weights, in, in_views, 0, 0,
940  VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE);
941  RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_weights, 0, 1, 0,
942  ws_vk, 0, ws_size * s-> opts.t,
943  VK_FORMAT_UNDEFINED));
944  RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_weights, 0, 2, 0,
945  ws_vk, ws_size * s-> opts.t, ws_size * s-> opts.t,
946  VK_FORMAT_UNDEFINED));
947 
948  /* Update denoise descriptors */
949  ff_vk_shader_update_img_array(vkctx, exec, &s->shd_denoise, in, in_views, 0, 0,
950  VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE);
951  ff_vk_shader_update_img_array(vkctx, exec, &s->shd_denoise, out, out_views, 0, 1,
952  VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE);
953  RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_denoise, 1, 0, 0,
954  ws_vk, 0, ws_size * s-> opts.t,
955  VK_FORMAT_UNDEFINED));
956  RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_denoise, 1, 1, 0,
957  ws_vk, ws_size * s-> opts.t, ws_size * s-> opts.t,
958  VK_FORMAT_UNDEFINED));
959 
960  VkPipelineStageFlagBits2 ws_stage = VK_PIPELINE_STAGE_2_TRANSFER_BIT;
961  VkAccessFlagBits2 ws_access = VK_ACCESS_2_TRANSFER_WRITE_BIT;
962  do {
963  int wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t);
964  IntegralPushData pd = {
965  { plane_widths[0], plane_widths[1], plane_widths[2], plane_widths[3] },
966  { plane_heights[0], plane_heights[1], plane_heights[2], plane_heights[3] },
967  { s->strength[0], s->strength[1], s->strength[2], s->strength[3], },
968  { comp_offs[0], comp_offs[1], comp_offs[2], comp_offs[3] },
969  { comp_planes[0], comp_planes[1], comp_planes[2], comp_planes[3] },
970  integral_vk->address,
971  (uint64_t)int_size,
972  (uint64_t)int_stride,
973  offsets_dispatched,
974  desc->nb_components,
975  };
976 
977  /* Vertical pass */
978  ff_vk_buf_barrier(buf_bar[nb_buf_bar++], integral_vk,
979  COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT, NONE_KHR,
980  COMPUTE_SHADER_BIT, SHADER_STORAGE_WRITE_BIT, NONE_KHR,
981  0, VK_WHOLE_SIZE);
982  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
983  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
984  .pBufferMemoryBarriers = buf_bar,
985  .bufferMemoryBarrierCount = nb_buf_bar,
986  });
987  nb_buf_bar = 0;
988 
989  ff_vk_exec_bind_shader(vkctx, exec, &s->shd_vertical);
990  ff_vk_shader_update_push_const(vkctx, exec, &s->shd_vertical,
991  VK_SHADER_STAGE_COMPUTE_BIT,
992  0, sizeof(pd), &pd);
993  vk->CmdDispatch(exec->buf,
994  FFALIGN(vkctx->output_width, s->shd_vertical.lg_size[0]) /
995  s->shd_vertical.lg_size[0],
996  desc->nb_components,
997  wg_invoc);
998 
999  /* Horizontal pass */
1000  ff_vk_buf_barrier(buf_bar[nb_buf_bar++], integral_vk,
1001  COMPUTE_SHADER_BIT, SHADER_STORAGE_WRITE_BIT, NONE_KHR,
1002  COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT,
1003  SHADER_STORAGE_WRITE_BIT,
1004  0, VK_WHOLE_SIZE);
1005  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
1006  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
1007  .pBufferMemoryBarriers = buf_bar,
1008  .bufferMemoryBarrierCount = nb_buf_bar,
1009  });
1010  nb_buf_bar = 0;
1011 
1012  ff_vk_exec_bind_shader(vkctx, exec, &s->shd_horizontal);
1013  ff_vk_shader_update_push_const(vkctx, exec, &s->shd_horizontal,
1014  VK_SHADER_STAGE_COMPUTE_BIT,
1015  0, sizeof(pd), &pd);
1016  vk->CmdDispatch(exec->buf,
1017  FFALIGN(vkctx->output_height, s->shd_horizontal.lg_size[0]) /
1018  s->shd_horizontal.lg_size[0],
1019  desc->nb_components,
1020  wg_invoc);
1021 
1022  /* Weights pass */
1023  ff_vk_buf_barrier(buf_bar[nb_buf_bar++], integral_vk,
1024  COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT,
1025  SHADER_STORAGE_WRITE_BIT,
1026  COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT, NONE_KHR,
1027  0, VK_WHOLE_SIZE);
1028  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
1029  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
1030  .srcStageMask = ws_stage,
1031  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
1032  .srcAccessMask = ws_access,
1033  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
1034  VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
1035  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
1036  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
1037  .buffer = ws_vk->buf,
1038  .size = ws_vk->size,
1039  .offset = 0,
1040  };
1041  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
1042  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
1043  .pBufferMemoryBarriers = buf_bar,
1044  .bufferMemoryBarrierCount = nb_buf_bar,
1045  });
1046  nb_buf_bar = 0;
1047  ws_stage = buf_bar[1].dstStageMask;
1048  ws_access = buf_bar[1].dstAccessMask;
1049 
1050  WeightsPushData wpd = {
1051  { plane_widths[0], plane_widths[1], plane_widths[2], plane_widths[3] },
1052  { plane_heights[0], plane_heights[1], plane_heights[2], plane_heights[3] },
1053  { ws_offset[0], ws_offset[1], ws_offset[2], ws_offset[3] },
1054  { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
1055  { s->patch[0], s->patch[1], s->patch[2], s->patch[3] },
1056  { s->strength[0], s->strength[1], s->strength[2], s->strength[3], },
1057  { comp_offs[0], comp_offs[1], comp_offs[2], comp_offs[3] },
1058  { comp_planes[0], comp_planes[1], comp_planes[2], comp_planes[3] },
1059  integral_vk->address,
1060  (uint64_t)int_size,
1061  (uint64_t)int_stride,
1062  offsets_dispatched,
1063  ws_count,
1064  desc->nb_components,
1065  };
1066  ff_vk_exec_bind_shader(vkctx, exec, &s->shd_weights);
1067  ff_vk_shader_update_push_const(vkctx, exec, &s->shd_weights,
1068  VK_SHADER_STAGE_COMPUTE_BIT,
1069  0, sizeof(wpd), &wpd);
1070  vk->CmdDispatch(exec->buf,
1071  FFALIGN(vkctx->output_width, s->shd_weights.lg_size[0]) /
1072  s->shd_weights.lg_size[0],
1073  FFALIGN(vkctx->output_height, s->shd_weights.lg_size[1]) /
1074  s->shd_weights.lg_size[1],
1075  wg_invoc * desc->nb_components);
1076 
1077  offsets_dispatched += wg_invoc * TYPE_ELEMS;
1078  } while (offsets_dispatched < s->nb_offsets);
1079 
1080  RET(denoise_pass(s, exec, ws_vk, comp_offs, comp_planes, ws_offset, ws_stride,
1081  ws_count, s->opts.t, desc->nb_components));
1082 
1083  err = ff_vk_exec_submit(vkctx, exec);
1084  if (err < 0)
1085  return err;
1086 
1087  err = av_frame_copy_props(out, in);
1088  if (err < 0)
1089  goto fail;
1090 
1091  av_frame_free(&in);
1092 
1093  return ff_filter_frame(outlink, out);
1094 
1095 fail:
1096  av_buffer_unref(&integral_buf);
1097  av_buffer_unref(&ws_buf);
1098  av_frame_free(&in);
1099  av_frame_free(&out);
1100  return err;
1101 }
1102 
1104 {
1105  NLMeansVulkanContext *s = avctx->priv;
1106  FFVulkanContext *vkctx = &s->vkctx;
1107 
1108  ff_vk_exec_pool_free(vkctx, &s->e);
1109  ff_vk_shader_free(vkctx, &s->shd_horizontal);
1110  ff_vk_shader_free(vkctx, &s->shd_vertical);
1111  ff_vk_shader_free(vkctx, &s->shd_weights);
1112  ff_vk_shader_free(vkctx, &s->shd_denoise);
1113 
1114  av_buffer_pool_uninit(&s->integral_buf_pool);
1115  av_buffer_pool_uninit(&s->ws_buf_pool);
1116 
1117  ff_vk_uninit(&s->vkctx);
1118 
1119  av_freep(&s->xoffsets);
1120  av_freep(&s->yoffsets);
1121 
1122  s->initialized = 0;
1123 }
1124 
1125 #define OFFSET(x) offsetof(NLMeansVulkanContext, x)
1126 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
1128  { "s", "denoising strength for all components", OFFSET(opts.s), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 0.0, 100.0, FLAGS },
1129  { "p", "patch size for all components", OFFSET(opts.p), AV_OPT_TYPE_INT, { .i64 = 3*2+1 }, 0, 99, FLAGS },
1130  { "r", "research window size", OFFSET(opts.r), AV_OPT_TYPE_INT, { .i64 = 7*2+1 }, 0, 99, FLAGS },
1131  { "t", "parallelism", OFFSET(opts.t), AV_OPT_TYPE_INT, { .i64 = 8 }, 1, 64, FLAGS },
1132 
1133  { "s1", "denoising strength for component 1", OFFSET(opts.sc[0]), AV_OPT_TYPE_DOUBLE, { .dbl = NAN }, 0.0, 100.0, FLAGS },
1134  { "s2", "denoising strength for component 2", OFFSET(opts.sc[1]), AV_OPT_TYPE_DOUBLE, { .dbl = NAN }, 0.0, 100.0, FLAGS },
1135  { "s3", "denoising strength for component 3", OFFSET(opts.sc[2]), AV_OPT_TYPE_DOUBLE, { .dbl = NAN }, 0.0, 100.0, FLAGS },
1136  { "s4", "denoising strength for component 4", OFFSET(opts.sc[3]), AV_OPT_TYPE_DOUBLE, { .dbl = NAN }, 0.0, 100.0, FLAGS },
1137 
1138  { "p1", "patch size for component 1", OFFSET(opts.pc[0]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1139  { "p2", "patch size for component 2", OFFSET(opts.pc[1]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1140  { "p3", "patch size for component 3", OFFSET(opts.pc[2]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1141  { "p4", "patch size for component 4", OFFSET(opts.pc[3]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1142 
1143  { NULL }
1144 };
1145 
1146 AVFILTER_DEFINE_CLASS(nlmeans_vulkan);
1147 
1149  {
1150  .name = "default",
1151  .type = AVMEDIA_TYPE_VIDEO,
1152  .filter_frame = &nlmeans_vulkan_filter_frame,
1153  .config_props = &ff_vk_filter_config_input,
1154  },
1155 };
1156 
1158  {
1159  .name = "default",
1160  .type = AVMEDIA_TYPE_VIDEO,
1161  .config_props = &ff_vk_filter_config_output,
1162  },
1163 };
1164 
1166  .p.name = "nlmeans_vulkan",
1167  .p.description = NULL_IF_CONFIG_SMALL("Non-local means denoiser (Vulkan)"),
1168  .p.priv_class = &nlmeans_vulkan_class,
1169  .p.flags = AVFILTER_FLAG_HWDEVICE,
1170  .priv_size = sizeof(NLMeansVulkanContext),
1171  .init = &ff_vk_filter_init,
1176  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
1177 };
ff_get_video_buffer
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:118
NLMeansVulkanContext::nlmeans_opts::p
int p
Definition: vf_nlmeans_vulkan.c:64
DenoisePushData::comp_plane
uint32_t comp_plane[4]
Definition: vf_nlmeans_vulkan.c:479
ff_vk_create_buf
int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext, void *alloc_pNext, VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
Definition: vulkan.c:1020
AV_LOG_WARNING
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:216
WeightsPushData::int_stride
uint64_t int_stride
Definition: vf_nlmeans_vulkan.c:301
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(nlmeans_vulkan)
FFVulkanContext::output_height
int output_height
Definition: vulkan.h:361
r
const char * r
Definition: vf_curves.c:127
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
ff_vf_nlmeans_vulkan
const FFFilter ff_vf_nlmeans_vulkan
Definition: vf_nlmeans_vulkan.c:1165
opt.h
NLMeansVulkanContext::opts
struct NLMeansVulkanContext::nlmeans_opts opts
ff_vk_shader_free
void ff_vk_shader_free(FFVulkanContext *s, FFVulkanShader *shd)
Free a shader.
Definition: vulkan.c:2789
ff_vk_shader_init
int ff_vk_shader_init(FFVulkanContext *s, FFVulkanShader *shd, const char *name, VkPipelineStageFlags stage, const char *extensions[], int nb_extensions, int lg_x, int lg_y, int lg_z, uint32_t required_subgroup_size)
Initialize a shader object, with a specific set of extensions, type+bind, local group size,...
Definition: vulkan.c:2101
out
static FILE * out
Definition: movenc.c:55
NLMeansVulkanContext::shd_weights
FFVulkanShader shd_weights
Definition: vf_nlmeans_vulkan.c:51
AVBufferPool
The buffer pool.
Definition: buffer_internal.h:88
WeightsPushData::strength
float strength[4]
Definition: vf_nlmeans_vulkan.c:296
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1067
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3456
AVBufferRef::data
uint8_t * data
The data buffer.
Definition: buffer.h:90
RET
#define RET(x)
Definition: vulkan.h:68
ff_vk_exec_pool_init
int ff_vk_exec_pool_init(FFVulkanContext *s, AVVulkanDeviceQueueFamily *qf, FFVkExecPool *pool, int nb_contexts, int nb_queries, VkQueryType query_type, int query_64bit, const void *query_create_pnext)
Allocates/frees an execution pool.
Definition: vulkan.c:355
WeightsPushData
Definition: vf_nlmeans_vulkan.c:290
IntegralPushData::height
uint32_t height[4]
Definition: vf_nlmeans_vulkan.c:72
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:64
NLMeansVulkanContext::integral_buf_pool
AVBufferPool * integral_buf_pool
Definition: vf_nlmeans_vulkan.c:44
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: filters.h:264
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:427
ff_vk_filter_init
int ff_vk_filter_init(AVFilterContext *avctx)
General lavfi IO functions.
Definition: vulkan_filter.c:233
ff_vk_map_buffer
static int ff_vk_map_buffer(FFVulkanContext *s, FFVkBuffer *buf, uint8_t **mem, int invalidate)
Definition: vulkan.h:603
NLMeansVulkanContext::xoffsets
int * xoffsets
Definition: vf_nlmeans_vulkan.c:54
AVOption
AVOption.
Definition: opt.h:429
b
#define b
Definition: input.c:42
data
const char data[16]
Definition: mxf.c:149
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:226
NLMeansVulkanContext::shd_horizontal
FFVulkanShader shd_horizontal
Definition: vf_nlmeans_vulkan.c:49
FFVkBuffer::address
VkDeviceAddress address
Definition: vulkan.h:130
NLMeansVulkanContext::shd_vertical
FFVulkanShader shd_vertical
Definition: vf_nlmeans_vulkan.c:50
ff_vk_exec_get
FFVkExecContext * ff_vk_exec_get(FFVulkanContext *s, FFVkExecPool *pool)
Retrieve an execution pool.
Definition: vulkan.c:546
NLMeansVulkanContext::strength
float strength[4]
Definition: vf_nlmeans_vulkan.c:57
ff_vk_uninit
void ff_vk_uninit(FFVulkanContext *s)
Frees main context.
Definition: vulkan.c:2815
FFVkSPIRVCompiler::uninit
void(* uninit)(struct FFVkSPIRVCompiler **ctx)
Definition: vulkan_spirv.h:32
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:220
NLMeansVulkanContext::initialized
int initialized
Definition: vf_nlmeans_vulkan.c:40
IntegralPushData::int_stride
uint64_t int_stride
Definition: vf_nlmeans_vulkan.c:78
DenoisePushData::comp_off
uint32_t comp_off[4]
Definition: vf_nlmeans_vulkan.c:478
WeightsPushData::ws_offset
uint32_t ws_offset[4]
Definition: vf_nlmeans_vulkan.c:293
video.h
IntegralPushData::width
uint32_t width[4]
Definition: vf_nlmeans_vulkan.c:71
AV_PIX_FMT_VULKAN
@ AV_PIX_FMT_VULKAN
Vulkan hardware images.
Definition: pixfmt.h:379
ff_vk_exec_add_dep_frame
int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, VkPipelineStageFlagBits2 wait_stage, VkPipelineStageFlagBits2 signal_stage)
Definition: vulkan.c:778
FFVkBuffer::buf
VkBuffer buf
Definition: vulkan.h:126
NLMeansVulkanContext::yoffsets
int * yoffsets
Definition: vf_nlmeans_vulkan.c:55
av_pix_fmt_count_planes
int av_pix_fmt_count_planes(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3496
AVFilterContext::priv
void * priv
private data for use by the filter
Definition: avfilter.h:289
fail
#define fail()
Definition: checkasm.h:218
vulkan_filter.h
ff_vk_shader_update_img_array
void ff_vk_shader_update_img_array(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, AVFrame *f, VkImageView *views, int set, int binding, VkImageLayout layout, VkSampler sampler)
Update a descriptor in a buffer with an image array.
Definition: vulkan.c:2717
ff_vk_frame_barrier
void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, VkPipelineStageFlags2 src_stage, VkPipelineStageFlags2 dst_stage, VkAccessFlagBits2 new_access, VkImageLayout new_layout, uint32_t new_qf)
Definition: vulkan.c:2029
ff_vk_shader_register_exec
int ff_vk_shader_register_exec(FFVulkanContext *s, FFVkExecPool *pool, FFVulkanShader *shd)
Register a shader with an exec pool.
Definition: vulkan.c:2582
NLMeansVulkanContext::shd_denoise
FFVulkanShader shd_denoise
Definition: vf_nlmeans_vulkan.c:52
DenoisePushData::ws_offset
uint32_t ws_offset[4]
Definition: vf_nlmeans_vulkan.c:480
AVFilterPad
A filter pad used for either input or output.
Definition: filters.h:40
GLSLC
#define GLSLC(N, S)
Definition: vulkan.h:45
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:210
av_cold
#define av_cold
Definition: attributes.h:106
main
int main
Definition: dovi_rpuenc.c:38
WeightsPushData::integral_size
uint64_t integral_size
Definition: vf_nlmeans_vulkan.c:300
FFFilter
Definition: filters.h:267
float
float
Definition: af_crystalizer.c:122
FFVulkanContext::output_width
int output_width
Definition: vulkan.h:360
NLMeansVulkanContext::ws_buf_pool
AVBufferPool * ws_buf_pool
Definition: vf_nlmeans_vulkan.c:45
s
#define s(width, name)
Definition: cbs_vp9.c:198
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:60
shared_shd_def
static void shared_shd_def(FFVulkanShader *shd)
Definition: vf_nlmeans_vulkan.c:83
WeightsPushData::xyoffs_start
uint32_t xyoffs_start
Definition: vf_nlmeans_vulkan.c:302
AV_OPT_TYPE_DOUBLE
@ AV_OPT_TYPE_DOUBLE
Underlying C type is double.
Definition: opt.h:267
FLAGS
#define FLAGS
Definition: vf_nlmeans_vulkan.c:1126
filters.h
FF_VK_REP_FLOAT
@ FF_VK_REP_FLOAT
Definition: vulkan.h:451
ctx
static AVFormatContext * ctx
Definition: movenc.c:49
ff_vk_exec_add_dep_buf
int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef **deps, int nb_deps, int ref)
Execution dependency management.
Definition: vulkan.c:618
denoise_pass
static int denoise_pass(NLMeansVulkanContext *s, FFVkExecContext *exec, FFVkBuffer *ws_vk, uint32_t comp_offs[4], uint32_t comp_planes[4], uint32_t ws_offset[4], uint32_t ws_stride[4], uint32_t ws_count, uint32_t t, uint32_t nb_components)
Definition: vf_nlmeans_vulkan.c:735
ff_vk_exec_pool_free
void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool)
Definition: vulkan.c:299
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: filters.h:265
NAN
#define NAN
Definition: mathematics.h:115
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
WeightsPushData::nb_components
uint32_t nb_components
Definition: vf_nlmeans_vulkan.c:304
WeightsPushData::integral_base
VkDeviceAddress integral_base
Definition: vf_nlmeans_vulkan.c:299
IntegralPushData::integral_base
VkDeviceAddress integral_base
Definition: vf_nlmeans_vulkan.c:76
opts
static AVDictionary * opts
Definition: movenc.c:51
ff_vk_shader_rep_fmt
const char * ff_vk_shader_rep_fmt(enum AVPixelFormat pix_fmt, enum FFVkShaderRepFormat rep_fmt)
Definition: vulkan.c:1605
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:599
av_buffer_unref
void av_buffer_unref(AVBufferRef **buf)
Free a given reference and automatically free the buffer if there are no more references to it.
Definition: buffer.c:139
WeightsPushData::height
uint32_t height[4]
Definition: vf_nlmeans_vulkan.c:292
nlmeans_vulkan_options
static const AVOption nlmeans_vulkan_options[]
Definition: vf_nlmeans_vulkan.c:1127
isnan
#define isnan(x)
Definition: libm.h:342
av_buffer_pool_uninit
void av_buffer_pool_uninit(AVBufferPool **ppool)
Mark the pool as being available for freeing.
Definition: buffer.c:328
ff_vk_filter_config_output
int ff_vk_filter_config_output(AVFilterLink *outlink)
Definition: vulkan_filter.c:209
ff_vk_shader_link
int ff_vk_shader_link(FFVulkanContext *s, FFVulkanShader *shd, const char *spirv, size_t spirv_len, const char *entrypoint)
Link a shader into an executable.
Definition: vulkan.c:2355
FFVkBuffer::size
size_t size
Definition: vulkan.h:129
IntegralPushData::comp_off
uint32_t comp_off[4]
Definition: vf_nlmeans_vulkan.c:74
NLMeansVulkanContext::nlmeans_opts
Definition: vf_nlmeans_vulkan.c:60
FFVulkanContext
Definition: vulkan.h:312
WeightsPushData::comp_off
uint32_t comp_off[4]
Definition: vf_nlmeans_vulkan.c:297
WeightsPushData::ws_stride
uint32_t ws_stride[4]
Definition: vf_nlmeans_vulkan.c:294
exp
int8_t exp
Definition: eval.c:73
nlmeans_vulkan_inputs
static const AVFilterPad nlmeans_vulkan_inputs[]
Definition: vf_nlmeans_vulkan.c:1148
WG_SIZE
#define WG_SIZE
Definition: vf_nlmeans_vulkan.c:35
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
FF_FILTER_FLAG_HWFRAME_AWARE
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: filters.h:208
NLMeansVulkanContext::patch
int patch[4]
Definition: vf_nlmeans_vulkan.c:58
DenoisePushData
Definition: vf_nlmeans_vulkan.c:477
Block
Definition: flashsv2enc.c:70
NLMeansVulkanContext::qf
AVVulkanDeviceQueueFamily * qf
Definition: vf_nlmeans_vulkan.c:42
ff_vk_buf_barrier
#define ff_vk_buf_barrier(dst, vkb, s_stage, s_access, s_access2, d_stage, d_access, d_access2, offs, bsz)
Definition: vulkan.h:551
init
int(* init)(AVBSFContext *ctx)
Definition: dts2pts.c:550
NLMeansVulkanContext
Definition: vf_nlmeans_vulkan.c:37
ff_vk_shader_update_push_const
void ff_vk_shader_update_push_const(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, VkShaderStageFlagBits stage, int offset, size_t size, void *src)
Update push constant in a shader.
Definition: vulkan.c:2756
NLMeansVulkanContext::xyoffsets_buf
FFVkBuffer xyoffsets_buf
Definition: vf_nlmeans_vulkan.c:47
FFVulkanDescriptorSetBinding
Definition: vulkan.h:112
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:94
IntegralPushData::comp_plane
uint32_t comp_plane[4]
Definition: vf_nlmeans_vulkan.c:75
height
#define height
Definition: dsp.h:89
IntegralPushData
Definition: vf_nlmeans_vulkan.c:70
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
AVFILTER_FLAG_HWDEVICE
#define AVFILTER_FLAG_HWDEVICE
The filter can create hardware frames using AVFilterContext.hw_device_ctx.
Definition: avfilter.h:188
nlmeans_vulkan_outputs
static const AVFilterPad nlmeans_vulkan_outputs[]
Definition: vf_nlmeans_vulkan.c:1157
NLMeansVulkanContext::nlmeans_opts::t
int t
Definition: vf_nlmeans_vulkan.c:66
size
int size
Definition: twinvq_data.h:10344
AV_NUM_DATA_POINTERS
#define AV_NUM_DATA_POINTERS
Definition: frame.h:428
DenoisePushData::t
uint32_t t
Definition: vf_nlmeans_vulkan.c:483
FFVulkanShader
Definition: vulkan.h:225
FFVulkanContext::output_format
enum AVPixelFormat output_format
Definition: vulkan.h:362
nlmeans_vulkan_uninit
static void nlmeans_vulkan_uninit(AVFilterContext *avctx)
Definition: vf_nlmeans_vulkan.c:1103
FFVkSPIRVCompiler::compile_shader
int(* compile_shader)(FFVulkanContext *s, struct FFVkSPIRVCompiler *ctx, FFVulkanShader *shd, uint8_t **data, size_t *size, const char *entrypoint, void **opaque)
Definition: vulkan_spirv.h:28
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
AVERROR_EXTERNAL
#define AVERROR_EXTERNAL
Generic error in an external library.
Definition: error.h:59
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
FFVkExecContext
Definition: vulkan.h:145
ff_vk_shader_update_desc_buffer
int ff_vk_shader_update_desc_buffer(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, int set, int bind, int elem, FFVkBuffer *buf, VkDeviceSize offset, VkDeviceSize len, VkFormat fmt)
Update a descriptor in a buffer with a buffer.
Definition: vulkan.c:2730
DenoisePushData::ws_stride
uint32_t ws_stride[4]
Definition: vf_nlmeans_vulkan.c:481
FFVulkanDescriptorSetBinding::name
const char * name
Definition: vulkan.h:113
init_denoise_pipeline
static av_cold int init_denoise_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec, FFVulkanShader *shd, FFVkSPIRVCompiler *spv, const AVPixFmtDescriptor *desc, int planes)
Definition: vf_nlmeans_vulkan.c:487
IntegralPushData::xyoffs_start
uint32_t xyoffs_start
Definition: vf_nlmeans_vulkan.c:79
TYPE_SIZE
#define TYPE_SIZE
Definition: vf_nlmeans_vulkan.c:32
FFVkSPIRVCompiler
Definition: vulkan_spirv.h:26
layout
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel layout
Definition: filter_design.txt:18
nlmeans_vulkan_filter_frame
static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
Definition: vf_nlmeans_vulkan.c:782
uninit
static void uninit(AVBSFContext *ctx)
Definition: pcm_rechunk.c:68
ff_vk_exec_start
int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e)
Start/submit/wait an execution.
Definition: vulkan.c:558
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
DenoisePushData::nb_components
uint32_t nb_components
Definition: vf_nlmeans_vulkan.c:484
init_filter
static av_cold int init_filter(AVFilterContext *ctx)
Definition: vf_nlmeans_vulkan.c:606
TYPE_BLOCK_ELEMS
#define TYPE_BLOCK_ELEMS
Definition: vf_nlmeans_vulkan.c:33
weights
static const int weights[]
Definition: hevc_pel.c:32
init_integral_pipeline
static av_cold int init_integral_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec, FFVulkanShader *shd_horizontal, FFVulkanShader *shd_vertical, FFVkSPIRVCompiler *spv, const AVPixFmtDescriptor *desc, int planes)
Definition: vf_nlmeans_vulkan.c:118
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
ff_vk_unmap_buffer
static int ff_vk_unmap_buffer(FFVulkanContext *s, FFVkBuffer *buf, int flush)
Definition: vulkan.h:610
vulkan_spirv.h
AVFilterPad::name
const char * name
Pad name.
Definition: filters.h:46
FFVkSPIRVCompiler::free_shader
void(* free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque)
Definition: vulkan_spirv.h:31
WeightsPushData::ws_count
uint32_t ws_count
Definition: vf_nlmeans_vulkan.c:303
ff_vk_exec_bind_shader
void ff_vk_exec_bind_shader(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd)
Bind a shader.
Definition: vulkan.c:2766
WeightsPushData::patch_size
int32_t patch_size[4]
Definition: vf_nlmeans_vulkan.c:295
NLMeansVulkanContext::nb_offsets
int nb_offsets
Definition: vf_nlmeans_vulkan.c:56
TYPE_NAME
#define TYPE_NAME
Definition: vf_nlmeans_vulkan.c:30
ff_vk_create_imageviews
int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, VkImageView views[AV_NUM_DATA_POINTERS], AVFrame *f, enum FFVkShaderRepFormat rep_fmt)
Create an imageview and add it as a dependency to an execution.
Definition: vulkan.c:1946
FFVulkanContext::vkfn
FFVulkanFunctions vkfn
Definition: vulkan.h:316
av_malloc
void * av_malloc(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:98
FFVkExecPool
Definition: vulkan.h:290
pos
unsigned int pos
Definition: spdifenc.c:414
ff_vk_shader_add_push_const
int ff_vk_shader_add_push_const(FFVulkanShader *shd, int offset, int size, VkShaderStageFlagBits stage)
Add/update push constants for execution.
Definition: vulkan.c:1479
OFFSET
#define OFFSET(x)
Definition: vf_nlmeans_vulkan.c:1125
ff_vk_qf_find
AVVulkanDeviceQueueFamily * ff_vk_qf_find(FFVulkanContext *s, VkQueueFlagBits dev_family, VkVideoCodecOperationFlagBitsKHR vid_ops)
Chooses an appropriate QF.
Definition: vulkan.c:286
FFVkExecContext::buf
VkCommandBuffer buf
Definition: vulkan.h:156
init_weights_pipeline
static av_cold int init_weights_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec, FFVulkanShader *shd, FFVkSPIRVCompiler *spv, const AVPixFmtDescriptor *desc, int planes)
Definition: vf_nlmeans_vulkan.c:307
NLMeansVulkanContext::nlmeans_opts::s
double s
Definition: vf_nlmeans_vulkan.c:62
FFVulkanContext::input_format
enum AVPixelFormat input_format
Definition: vulkan.h:363
NLMeansVulkanContext::nlmeans_opts::pc
int pc[4]
Definition: vf_nlmeans_vulkan.c:65
ff_vk_shader_add_descriptor_set
int ff_vk_shader_add_descriptor_set(FFVulkanContext *s, FFVulkanShader *shd, const FFVulkanDescriptorSetBinding *desc, int nb, int singular, int print_to_shader_only)
Add descriptor to a shader.
Definition: vulkan.c:2482
random_seed.h
buffer
the frame and frame reference mechanism is intended to as much as expensive copies of that data while still allowing the filters to produce correct results The data is stored in buffers represented by AVFrame structures Several references can point to the same frame buffer
Definition: filter_design.txt:49
GLSLF
#define GLSLF(N, S,...)
Definition: vulkan.h:55
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Underlying C type is int.
Definition: opt.h:259
planes
static const struct @554 planes[]
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
AVFilterContext
An instance of a filter.
Definition: avfilter.h:274
IntegralPushData::nb_components
uint32_t nb_components
Definition: vf_nlmeans_vulkan.c:80
NLMeansVulkanContext::nlmeans_opts::sc
double sc[4]
Definition: vf_nlmeans_vulkan.c:63
desc
const char * desc
Definition: libsvtav1.c:78
ff_vk_filter_config_input
int ff_vk_filter_config_input(AVFilterLink *inlink)
Definition: vulkan_filter.c:176
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:200
FFFilter::p
AVFilter p
The public AVFilter.
Definition: filters.h:271
mem.h
AVBufferRef
A reference to a data buffer.
Definition: buffer.h:82
TYPE_BLOCK_SIZE
#define TYPE_BLOCK_SIZE
Definition: vf_nlmeans_vulkan.c:34
NLMeansVulkanContext::e
FFVkExecPool e
Definition: vf_nlmeans_vulkan.c:41
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
w
uint8_t w
Definition: llvidencdsp.c:39
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
DenoisePushData::ws_count
uint32_t ws_count
Definition: vf_nlmeans_vulkan.c:482
IntegralPushData::integral_size
uint64_t integral_size
Definition: vf_nlmeans_vulkan.c:77
IntegralPushData::strength
float strength[4]
Definition: vf_nlmeans_vulkan.c:73
TYPE_ELEMS
#define TYPE_ELEMS
Definition: vf_nlmeans_vulkan.c:31
FFVkBuffer
Definition: vulkan.h:125
int32_t
int32_t
Definition: audioconvert.c:56
ff_vk_exec_submit
int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e)
Definition: vulkan.c:903
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
AVVulkanDeviceQueueFamily
Definition: hwcontext_vulkan.h:33
width
#define width
Definition: dsp.h:89
NLMeansVulkanContext::nlmeans_opts::r
int r
Definition: vf_nlmeans_vulkan.c:61
FILTER_SINGLE_PIXFMT
#define FILTER_SINGLE_PIXFMT(pix_fmt_)
Definition: filters.h:254
FFVulkanFunctions
Definition: vulkan_functions.h:274
ff_vk_get_pooled_buffer
int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool, AVBufferRef **buf, VkBufferUsageFlags usage, void *create_pNext, size_t size, VkMemoryPropertyFlagBits mem_props)
Initialize a pool and create AVBufferRefs containing FFVkBuffer.
Definition: vulkan.c:1276
src
#define src
Definition: vp8dsp.c:248
WeightsPushData::comp_plane
uint32_t comp_plane[4]
Definition: vf_nlmeans_vulkan.c:298
NLMeansVulkanContext::vkctx
FFVulkanContext vkctx
Definition: vf_nlmeans_vulkan.c:38
WeightsPushData::width
uint32_t width[4]
Definition: vf_nlmeans_vulkan.c:291