avfilter/avfiltergraph: fix constant string comparision
[ffmpeg.git] / libavutil / vulkan.c
1 /*
2 * Copyright (c) Lynne
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "avassert.h"
22 #include "mem.h"
23
24 #include "vulkan.h"
25 #include "libavutil/vulkan_loader.h"
26
27 const VkComponentMapping ff_comp_identity_map = {
28 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
29 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
30 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
31 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
32 };
33
34 /* Converts return values to strings */
35 const char *ff_vk_ret2str(VkResult res)
36 {
37 #define CASE(VAL) case VAL: return #VAL
38 switch (res) {
39 CASE(VK_SUCCESS);
40 CASE(VK_NOT_READY);
41 CASE(VK_TIMEOUT);
42 CASE(VK_EVENT_SET);
43 CASE(VK_EVENT_RESET);
44 CASE(VK_INCOMPLETE);
45 CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
46 CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
47 CASE(VK_ERROR_INITIALIZATION_FAILED);
48 CASE(VK_ERROR_DEVICE_LOST);
49 CASE(VK_ERROR_MEMORY_MAP_FAILED);
50 CASE(VK_ERROR_LAYER_NOT_PRESENT);
51 CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
52 CASE(VK_ERROR_FEATURE_NOT_PRESENT);
53 CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
54 CASE(VK_ERROR_TOO_MANY_OBJECTS);
55 CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
56 CASE(VK_ERROR_FRAGMENTED_POOL);
57 CASE(VK_ERROR_UNKNOWN);
58 CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
59 CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
60 CASE(VK_ERROR_FRAGMENTATION);
61 CASE(VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS);
62 CASE(VK_PIPELINE_COMPILE_REQUIRED);
63 CASE(VK_ERROR_SURFACE_LOST_KHR);
64 CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
65 CASE(VK_SUBOPTIMAL_KHR);
66 CASE(VK_ERROR_OUT_OF_DATE_KHR);
67 CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
68 CASE(VK_ERROR_VALIDATION_FAILED_EXT);
69 CASE(VK_ERROR_INVALID_SHADER_NV);
70 CASE(VK_ERROR_VIDEO_PICTURE_LAYOUT_NOT_SUPPORTED_KHR);
71 CASE(VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR);
72 CASE(VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR);
73 CASE(VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR);
74 CASE(VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR);
75 CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
76 CASE(VK_ERROR_NOT_PERMITTED_KHR);
77 CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
78 CASE(VK_THREAD_IDLE_KHR);
79 CASE(VK_THREAD_DONE_KHR);
80 CASE(VK_OPERATION_DEFERRED_KHR);
81 CASE(VK_OPERATION_NOT_DEFERRED_KHR);
82 default: return "Unknown error";
83 }
84 #undef CASE
85 }
86
87 /* Malitia pura, Khronos */
88 #define FN_MAP_TO(dst_t, dst_name, src_t, src_name) \
89 dst_t ff_vk_map_ ##src_name## _to_ ##dst_name(src_t src) \
90 { \
91 dst_t dst = 0x0; \
92 MAP_TO(VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT, \
93 VK_IMAGE_USAGE_SAMPLED_BIT); \
94 MAP_TO(VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT, \
95 VK_IMAGE_USAGE_TRANSFER_SRC_BIT); \
96 MAP_TO(VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT, \
97 VK_IMAGE_USAGE_TRANSFER_DST_BIT); \
98 MAP_TO(VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT, \
99 VK_IMAGE_USAGE_STORAGE_BIT); \
100 MAP_TO(VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT, \
101 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); \
102 MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_DECODE_OUTPUT_BIT_KHR, \
103 VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR); \
104 MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_DECODE_DPB_BIT_KHR, \
105 VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR); \
106 MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_ENCODE_DPB_BIT_KHR, \
107 VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR); \
108 MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_ENCODE_INPUT_BIT_KHR, \
109 VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR); \
110 MAP_TO(VK_FORMAT_FEATURE_2_HOST_IMAGE_TRANSFER_BIT_EXT, \
111 VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT); \
112 return dst; \
113 }
114
115 #define MAP_TO(flag1, flag2) if (src & flag2) dst |= flag1;
116 FN_MAP_TO(VkFormatFeatureFlagBits2, feats, VkImageUsageFlags, usage)
117 #undef MAP_TO
118 #define MAP_TO(flag1, flag2) if (src & flag1) dst |= flag2;
119 FN_MAP_TO(VkImageUsageFlags, usage, VkFormatFeatureFlagBits2, feats)
120 #undef MAP_TO
121 #undef FN_MAP_TO
122
123 static void load_enabled_qfs(FFVulkanContext *s)
124 {
125 s->nb_qfs = 0;
126 for (int i = 0; i < s->hwctx->nb_qf; i++) {
127 /* Skip duplicates */
128 int skip = 0;
129 for (int j = 0; j < s->nb_qfs; j++) {
130 if (s->qfs[j] == s->hwctx->qf[i].idx) {
131 skip = 1;
132 break;
133 }
134 }
135 if (skip)
136 continue;
137
138 s->qfs[s->nb_qfs++] = s->hwctx->qf[i].idx;
139 }
140 }
141
142 int ff_vk_load_props(FFVulkanContext *s)
143 {
144 FFVulkanFunctions *vk = &s->vkfn;
145
146 s->props = (VkPhysicalDeviceProperties2) {
147 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
148 };
149
150 FF_VK_STRUCT_EXT(s, &s->props, &s->props_11, FF_VK_EXT_NO_FLAG,
151 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
152 FF_VK_STRUCT_EXT(s, &s->props, &s->driver_props, FF_VK_EXT_NO_FLAG,
153 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES);
154 FF_VK_STRUCT_EXT(s, &s->props, &s->subgroup_props, FF_VK_EXT_NO_FLAG,
155 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES);
156
157 FF_VK_STRUCT_EXT(s, &s->props, &s->push_desc_props, FF_VK_EXT_PUSH_DESCRIPTOR,
158 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR);
159 FF_VK_STRUCT_EXT(s, &s->props, &s->hprops, FF_VK_EXT_EXTERNAL_HOST_MEMORY,
160 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT);
161 FF_VK_STRUCT_EXT(s, &s->props, &s->coop_matrix_props, FF_VK_EXT_COOP_MATRIX,
162 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_PROPERTIES_KHR);
163 FF_VK_STRUCT_EXT(s, &s->props, &s->desc_buf_props, FF_VK_EXT_DESCRIPTOR_BUFFER,
164 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT);
165 FF_VK_STRUCT_EXT(s, &s->props, &s->optical_flow_props, FF_VK_EXT_OPTICAL_FLOW,
166 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPTICAL_FLOW_PROPERTIES_NV);
167 FF_VK_STRUCT_EXT(s, &s->props, &s->host_image_props, FF_VK_EXT_HOST_IMAGE_COPY,
168 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_IMAGE_COPY_PROPERTIES_EXT);
169
170 s->feats = (VkPhysicalDeviceFeatures2) {
171 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
172 };
173
174 FF_VK_STRUCT_EXT(s, &s->feats, &s->feats_12, FF_VK_EXT_NO_FLAG,
175 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
176 FF_VK_STRUCT_EXT(s, &s->feats, &s->atomic_float_feats, FF_VK_EXT_ATOMIC_FLOAT,
177 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT);
178
179 /* Try allocating 1024 layouts */
180 s->host_image_copy_layouts = av_malloc(sizeof(*s->host_image_copy_layouts)*1024);
181 s->host_image_props.pCopySrcLayouts = s->host_image_copy_layouts;
182 s->host_image_props.copySrcLayoutCount = 512;
183 s->host_image_props.pCopyDstLayouts = s->host_image_copy_layouts + 512;
184 s->host_image_props.copyDstLayoutCount = 512;
185
186 vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props);
187
188 /* Check if we had enough memory for all layouts */
189 if (s->host_image_props.copySrcLayoutCount == 512 ||
190 s->host_image_props.copyDstLayoutCount == 512) {
191 VkImageLayout *new_array;
192 size_t new_size;
193 s->host_image_props.pCopySrcLayouts =
194 s->host_image_props.pCopyDstLayouts = NULL;
195 s->host_image_props.copySrcLayoutCount =
196 s->host_image_props.copyDstLayoutCount = 0;
197 vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props);
198
199 new_size = s->host_image_props.copySrcLayoutCount +
200 s->host_image_props.copyDstLayoutCount;
201 new_size *= sizeof(*s->host_image_copy_layouts);
202 new_array = av_realloc(s->host_image_copy_layouts, new_size);
203 if (!new_array)
204 return AVERROR(ENOMEM);
205
206 s->host_image_copy_layouts = new_array;
207 s->host_image_props.pCopySrcLayouts = new_array;
208 s->host_image_props.pCopyDstLayouts = new_array + s->host_image_props.copySrcLayoutCount;
209 vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props);
210 }
211
212 vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
213 vk->GetPhysicalDeviceFeatures2(s->hwctx->phys_dev, &s->feats);
214
215 load_enabled_qfs(s);
216
217 if (s->qf_props)
218 return 0;
219
220 vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &s->tot_nb_qfs, NULL);
221
222 s->qf_props = av_calloc(s->tot_nb_qfs, sizeof(*s->qf_props));
223 if (!s->qf_props)
224 return AVERROR(ENOMEM);
225
226 s->query_props = av_calloc(s->tot_nb_qfs, sizeof(*s->query_props));
227 if (!s->qf_props) {
228 av_freep(&s->qf_props);
229 return AVERROR(ENOMEM);
230 }
231
232 s->video_props = av_calloc(s->tot_nb_qfs, sizeof(*s->video_props));
233 if (!s->video_props) {
234 av_freep(&s->qf_props);
235 av_freep(&s->query_props);
236 return AVERROR(ENOMEM);
237 }
238
239 for (uint32_t i = 0; i < s->tot_nb_qfs; i++) {
240 s->qf_props[i] = (VkQueueFamilyProperties2) {
241 .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
242 };
243
244 FF_VK_STRUCT_EXT(s, &s->qf_props[i], &s->query_props[i], FF_VK_EXT_VIDEO_QUEUE,
245 VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR);
246 FF_VK_STRUCT_EXT(s, &s->qf_props[i], &s->video_props[i], FF_VK_EXT_VIDEO_QUEUE,
247 VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR);
248 }
249
250 vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &s->tot_nb_qfs, s->qf_props);
251
252 if (s->extensions & FF_VK_EXT_COOP_MATRIX) {
253 vk->GetPhysicalDeviceCooperativeMatrixPropertiesKHR(s->hwctx->phys_dev,
254 &s->coop_mat_props_nb, NULL);
255
256 if (s->coop_mat_props_nb) {
257 s->coop_mat_props = av_malloc_array(s->coop_mat_props_nb,
258 sizeof(VkCooperativeMatrixPropertiesKHR));
259 for (int i = 0; i < s->coop_mat_props_nb; i++) {
260 s->coop_mat_props[i] = (VkCooperativeMatrixPropertiesKHR) {
261 .sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
262 };
263 }
264
265 vk->GetPhysicalDeviceCooperativeMatrixPropertiesKHR(s->hwctx->phys_dev,
266 &s->coop_mat_props_nb,
267 s->coop_mat_props);
268 }
269 }
270
271 return 0;
272 }
273
274 AVVulkanDeviceQueueFamily *ff_vk_qf_find(FFVulkanContext *s,
275 VkQueueFlagBits dev_family,
276 VkVideoCodecOperationFlagBitsKHR vid_ops)
277 {
278 for (int i = 0; i < s->hwctx->nb_qf; i++) {
279 if ((s->hwctx->qf[i].flags & dev_family) &&
280 (s->hwctx->qf[i].video_caps & vid_ops) == vid_ops) {
281 return &s->hwctx->qf[i];
282 }
283 }
284 return NULL;
285 }
286
287 void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool)
288 {
289 FFVulkanFunctions *vk = &s->vkfn;
290
291 for (int i = 0; i < pool->pool_size; i++) {
292 FFVkExecContext *e = &pool->contexts[i];
293
294 if (e->fence) {
295 if (e->had_submission)
296 vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX);
297 vk->DestroyFence(s->hwctx->act_dev, e->fence, s->hwctx->alloc);
298 }
299
300 ff_vk_exec_discard_deps(s, e);
301
302 av_free(e->frame_deps);
303 av_free(e->sw_frame_deps);
304 av_free(e->buf_deps);
305 av_free(e->queue_family_dst);
306 av_free(e->layout_dst);
307 av_free(e->access_dst);
308 av_free(e->frame_update);
309 av_free(e->frame_locked);
310 av_free(e->sem_sig);
311 av_free(e->sem_sig_val_dst);
312 av_free(e->sem_wait);
313 }
314
315 /* Free shader-specific data */
316 for (int i = 0; i < pool->nb_reg_shd; i++) {
317 FFVulkanShaderData *sd = &pool->reg_shd[i];
318
319 if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) {
320 for (int j = 0; j < sd->nb_descriptor_sets; j++) {
321 FFVulkanDescriptorSetData *set_data = &sd->desc_set_buf[j];
322 if (set_data->buf.mem)
323 ff_vk_unmap_buffer(s, &set_data->buf, 0);
324 ff_vk_free_buf(s, &set_data->buf);
325 }
326 }
327
328 if (sd->desc_pool)
329 vk->DestroyDescriptorPool(s->hwctx->act_dev, sd->desc_pool,
330 s->hwctx->alloc);
331
332 av_freep(&sd->desc_set_buf);
333 av_freep(&sd->desc_bind);
334 av_freep(&sd->desc_sets);
335 }
336
337 av_freep(&pool->reg_shd);
338
339 for (int i = 0; i < pool->pool_size; i++) {
340 if (pool->cmd_buf_pools[i])
341 vk->FreeCommandBuffers(s->hwctx->act_dev, pool->cmd_buf_pools[i],
342 1, &pool->cmd_bufs[i]);
343
344 if (pool->cmd_buf_pools[i])
345 vk->DestroyCommandPool(s->hwctx->act_dev, pool->cmd_buf_pools[i], s->hwctx->alloc);
346 }
347 if (pool->query_pool)
348 vk->DestroyQueryPool(s->hwctx->act_dev, pool->query_pool, s->hwctx->alloc);
349
350 av_free(pool->query_data);
351 av_free(pool->cmd_buf_pools);
352 av_free(pool->cmd_bufs);
353 av_free(pool->contexts);
354 }
355
356 int ff_vk_exec_pool_init(FFVulkanContext *s, AVVulkanDeviceQueueFamily *qf,
357 FFVkExecPool *pool, int nb_contexts,
358 int nb_queries, VkQueryType query_type, int query_64bit,
359 const void *query_create_pnext)
360 {
361 int err;
362 VkResult ret;
363 FFVulkanFunctions *vk = &s->vkfn;
364
365 VkCommandPoolCreateInfo cqueue_create;
366 VkCommandBufferAllocateInfo cbuf_create;
367
368 const VkQueryPoolVideoEncodeFeedbackCreateInfoKHR *ef = NULL;
369
370 atomic_init(&pool->idx, 0);
371
372 if (query_type == VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR) {
373 ef = ff_vk_find_struct(query_create_pnext,
374 VK_STRUCTURE_TYPE_QUERY_POOL_VIDEO_ENCODE_FEEDBACK_CREATE_INFO_KHR);
375 if (!ef)
376 return AVERROR(EINVAL);
377 }
378
379 /* Allocate space for command buffer pools */
380 pool->cmd_buf_pools = av_malloc(nb_contexts*sizeof(*pool->cmd_buf_pools));
381 if (!pool->cmd_buf_pools) {
382 err = AVERROR(ENOMEM);
383 goto fail;
384 }
385
386 /* Allocate space for command buffers */
387 pool->cmd_bufs = av_malloc(nb_contexts*sizeof(*pool->cmd_bufs));
388 if (!pool->cmd_bufs) {
389 err = AVERROR(ENOMEM);
390 goto fail;
391 }
392
393 for (int i = 0; i < nb_contexts; i++) {
394 /* Create command pool */
395 cqueue_create = (VkCommandPoolCreateInfo) {
396 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
397 .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
398 VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
399 .queueFamilyIndex = qf->idx,
400 };
401
402 ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create,
403 s->hwctx->alloc, &pool->cmd_buf_pools[i]);
404 if (ret != VK_SUCCESS) {
405 av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n",
406 ff_vk_ret2str(ret));
407 err = AVERROR_EXTERNAL;
408 goto fail;
409 }
410
411 /* Allocate command buffer */
412 cbuf_create = (VkCommandBufferAllocateInfo) {
413 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
414 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
415 .commandPool = pool->cmd_buf_pools[i],
416 .commandBufferCount = 1,
417 };
418 ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create,
419 &pool->cmd_bufs[i]);
420 if (ret != VK_SUCCESS) {
421 av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
422 ff_vk_ret2str(ret));
423 err = AVERROR_EXTERNAL;
424 goto fail;
425 }
426 }
427
428 /* Query pool */
429 if (nb_queries) {
430 VkQueryPoolCreateInfo query_pool_info = {
431 .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
432 .pNext = query_create_pnext,
433 .queryType = query_type,
434 .queryCount = nb_queries*nb_contexts,
435 };
436 ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info,
437 s->hwctx->alloc, &pool->query_pool);
438 if (ret != VK_SUCCESS) {
439 av_log(s, AV_LOG_ERROR, "Query pool alloc failure: %s\n",
440 ff_vk_ret2str(ret));
441 err = AVERROR_EXTERNAL;
442 goto fail;
443 }
444
445 pool->nb_queries = nb_queries;
446 pool->query_status_stride = 1 + 1; /* One result, one status by default */
447 pool->query_results = nb_queries;
448 pool->query_statuses = nb_queries;
449
450 /* Video encode queries produce two results per query */
451 if (query_type == VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR) {
452 int nb_results = av_popcount(ef->encodeFeedbackFlags);
453 pool->query_status_stride = nb_results + 1;
454 pool->query_results *= nb_results;
455 } else if (query_type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) {
456 pool->query_status_stride = 1;
457 pool->query_results = 0;
458 }
459
460 pool->qd_size = (pool->query_results + pool->query_statuses)*(query_64bit ? 8 : 4);
461
462 /* Allocate space for the query data */
463 pool->query_data = av_calloc(nb_contexts, pool->qd_size);
464 if (!pool->query_data) {
465 err = AVERROR(ENOMEM);
466 goto fail;
467 }
468 }
469
470 /* Allocate space for the contexts */
471 pool->contexts = av_calloc(nb_contexts, sizeof(*pool->contexts));
472 if (!pool->contexts) {
473 err = AVERROR(ENOMEM);
474 goto fail;
475 }
476
477 pool->pool_size = nb_contexts;
478
479 /* Init contexts */
480 for (int i = 0; i < pool->pool_size; i++) {
481 FFVkExecContext *e = &pool->contexts[i];
482 VkFenceCreateInfo fence_create = {
483 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
484 .flags = VK_FENCE_CREATE_SIGNALED_BIT,
485 };
486
487 /* Fence */
488 ret = vk->CreateFence(s->hwctx->act_dev, &fence_create, s->hwctx->alloc,
489 &e->fence);
490 if (ret != VK_SUCCESS) {
491 av_log(s, AV_LOG_ERROR, "Failed to create submission fence: %s\n",
492 ff_vk_ret2str(ret));
493 return AVERROR_EXTERNAL;
494 }
495
496 e->idx = i;
497 e->parent = pool;
498
499 /* Query data */
500 e->query_data = ((uint8_t *)pool->query_data) + pool->qd_size*i;
501 e->query_idx = nb_queries*i;
502
503 /* Command buffer */
504 e->buf = pool->cmd_bufs[i];
505
506 /* Queue index distribution */
507 e->qi = i % qf->num;
508 e->qf = qf->idx;
509 vk->GetDeviceQueue(s->hwctx->act_dev, qf->idx, e->qi, &e->queue);
510 }
511
512 return 0;
513
514 fail:
515 ff_vk_exec_pool_free(s, pool);
516 return err;
517 }
518
519 VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e,
520 void **data, VkQueryResultFlagBits flags)
521 {
522 FFVulkanFunctions *vk = &s->vkfn;
523 const FFVkExecPool *pool = e->parent;
524 VkQueryResultFlags qf = flags & ~(VK_QUERY_RESULT_64_BIT |
525 VK_QUERY_RESULT_WITH_STATUS_BIT_KHR);
526
527 if (!e->query_data) {
528 av_log(s, AV_LOG_ERROR, "Requested a query with a NULL query_data pointer!\n");
529 return VK_INCOMPLETE;
530 }
531
532 qf |= pool->query_64bit ?
533 VK_QUERY_RESULT_64_BIT : 0x0;
534 qf |= pool->query_statuses ?
535 VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0;
536
537 if (data)
538 *data = e->query_data;
539
540 return vk->GetQueryPoolResults(s->hwctx->act_dev, pool->query_pool,
541 e->query_idx,
542 pool->nb_queries,
543 pool->qd_size, e->query_data,
544 pool->qd_size, qf);
545 }
546
547 FFVkExecContext *ff_vk_exec_get(FFVulkanContext *s, FFVkExecPool *pool)
548 {
549 return &pool->contexts[atomic_fetch_add(&pool->idx, 1) % pool->pool_size];
550 }
551
552 void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e)
553 {
554 FFVulkanFunctions *vk = &s->vkfn;
555 vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX);
556 ff_vk_exec_discard_deps(s, e);
557 }
558
559 int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e)
560 {
561 VkResult ret;
562 FFVulkanFunctions *vk = &s->vkfn;
563 const FFVkExecPool *pool = e->parent;
564
565 VkCommandBufferBeginInfo cmd_start = {
566 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
567 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
568 };
569
570 /* Wait for the fence to be signalled */
571 vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX);
572 vk->ResetFences(s->hwctx->act_dev, 1, &e->fence);
573
574 /* Discard queue dependencies */
575 ff_vk_exec_discard_deps(s, e);
576
577 ret = vk->BeginCommandBuffer(e->buf, &cmd_start);
578 if (ret != VK_SUCCESS) {
579 av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n",
580 ff_vk_ret2str(ret));
581 return AVERROR_EXTERNAL;
582 }
583
584 if (pool->nb_queries)
585 vk->CmdResetQueryPool(e->buf, pool->query_pool,
586 e->query_idx, pool->nb_queries);
587
588 return 0;
589 }
590
591 void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e)
592 {
593 for (int j = 0; j < e->nb_buf_deps; j++)
594 av_buffer_unref(&e->buf_deps[j]);
595 e->nb_buf_deps = 0;
596
597 for (int j = 0; j < e->nb_sw_frame_deps; j++)
598 av_frame_free(&e->sw_frame_deps[j]);
599 e->nb_sw_frame_deps = 0;
600
601 for (int j = 0; j < e->nb_frame_deps; j++) {
602 AVFrame *f = e->frame_deps[j];
603 if (e->frame_locked[j]) {
604 AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
605 AVVulkanFramesContext *vkfc = hwfc->hwctx;
606 AVVkFrame *vkf = (AVVkFrame *)f->data[0];
607 vkfc->unlock_frame(hwfc, vkf);
608 e->frame_locked[j] = 0;
609 }
610 e->frame_update[j] = 0;
611 }
612 e->nb_frame_deps = 0;
613
614 e->sem_wait_cnt = 0;
615 e->sem_sig_cnt = 0;
616 e->sem_sig_val_dst_cnt = 0;
617 }
618
619 int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e,
620 AVBufferRef **deps, int nb_deps, int ref)
621 {
622 AVBufferRef **dst = av_fast_realloc(e->buf_deps, &e->buf_deps_alloc_size,
623 (e->nb_buf_deps + nb_deps) * sizeof(*dst));
624 if (!dst) {
625 ff_vk_exec_discard_deps(s, e);
626 return AVERROR(ENOMEM);
627 }
628
629 e->buf_deps = dst;
630
631 for (int i = 0; i < nb_deps; i++) {
632 if (!deps[i])
633 continue;
634
635 e->buf_deps[e->nb_buf_deps] = ref ? av_buffer_ref(deps[i]) : deps[i];
636 if (!e->buf_deps[e->nb_buf_deps]) {
637 ff_vk_exec_discard_deps(s, e);
638 return AVERROR(ENOMEM);
639 }
640 e->nb_buf_deps++;
641 }
642
643 return 0;
644 }
645
646 int ff_vk_exec_add_dep_sw_frame(FFVulkanContext *s, FFVkExecContext *e,
647 AVFrame *f)
648 {
649 AVFrame **dst = av_fast_realloc(e->sw_frame_deps, &e->sw_frame_deps_alloc_size,
650 (e->nb_sw_frame_deps + 1) * sizeof(*dst));
651 if (!dst) {
652 ff_vk_exec_discard_deps(s, e);
653 return AVERROR(ENOMEM);
654 }
655
656 e->sw_frame_deps = dst;
657
658 e->sw_frame_deps[e->nb_sw_frame_deps] = av_frame_clone(f);
659 if (!e->sw_frame_deps[e->nb_sw_frame_deps]) {
660 ff_vk_exec_discard_deps(s, e);
661 return AVERROR(ENOMEM);
662 }
663
664 e->nb_sw_frame_deps++;
665
666 return 0;
667 }
668
669 #define ARR_REALLOC(str, arr, alloc_s, cnt) \
670 do { \
671 arr = av_fast_realloc(str->arr, alloc_s, (cnt + 1)*sizeof(*arr)); \
672 if (!arr) { \
673 ff_vk_exec_discard_deps(s, e); \
674 return AVERROR(ENOMEM); \
675 } \
676 str->arr = arr; \
677 } while (0)
678
679 typedef struct TempSyncCtx {
680 int nb_sem;
681 VkSemaphore sem[];
682 } TempSyncCtx;
683
684 static void destroy_tmp_semaphores(void *opaque, uint8_t *data)
685 {
686 FFVulkanContext *s = opaque;
687 FFVulkanFunctions *vk = &s->vkfn;
688 TempSyncCtx *ts = (TempSyncCtx *)data;
689
690 for (int i = 0; i < ts->nb_sem; i++)
691 vk->DestroySemaphore(s->hwctx->act_dev, ts->sem[i], s->hwctx->alloc);
692
693 av_free(ts);
694 }
695
696 int ff_vk_exec_add_dep_wait_sem(FFVulkanContext *s, FFVkExecContext *e,
697 VkSemaphore sem, uint64_t val,
698 VkPipelineStageFlagBits2 stage)
699 {
700 VkSemaphoreSubmitInfo *sem_wait;
701 ARR_REALLOC(e, sem_wait, &e->sem_wait_alloc, e->sem_wait_cnt);
702
703 e->sem_wait[e->sem_wait_cnt++] = (VkSemaphoreSubmitInfo) {
704 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
705 .semaphore = sem,
706 .value = val,
707 .stageMask = stage,
708 };
709
710 return 0;
711 }
712
713 int ff_vk_exec_add_dep_bool_sem(FFVulkanContext *s, FFVkExecContext *e,
714 VkSemaphore *sem, int nb,
715 VkPipelineStageFlagBits2 stage,
716 int wait)
717 {
718 int err;
719 size_t buf_size;
720 AVBufferRef *buf;
721 TempSyncCtx *ts;
722 FFVulkanFunctions *vk = &s->vkfn;
723
724 /* Do not transfer ownership if we're signalling a binary semaphore,
725 * since we're probably exporting it. */
726 if (!wait) {
727 for (int i = 0; i < nb; i++) {
728 VkSemaphoreSubmitInfo *sem_sig;
729 ARR_REALLOC(e, sem_sig, &e->sem_sig_alloc, e->sem_sig_cnt);
730
731 e->sem_sig[e->sem_sig_cnt++] = (VkSemaphoreSubmitInfo) {
732 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
733 .semaphore = sem[i],
734 .stageMask = stage,
735 };
736 }
737
738 return 0;
739 }
740
741 buf_size = sizeof(*ts) + sizeof(VkSemaphore)*nb;
742 ts = av_mallocz(buf_size);
743 if (!ts) {
744 err = AVERROR(ENOMEM);
745 goto fail;
746 }
747
748 memcpy(ts->sem, sem, nb*sizeof(*sem));
749 ts->nb_sem = nb;
750
751 buf = av_buffer_create((uint8_t *)ts, buf_size, destroy_tmp_semaphores, s, 0);
752 if (!buf) {
753 av_free(ts);
754 err = AVERROR(ENOMEM);
755 goto fail;
756 }
757
758 err = ff_vk_exec_add_dep_buf(s, e, &buf, 1, 0);
759 if (err < 0) {
760 av_buffer_unref(&buf);
761 return err;
762 }
763
764 for (int i = 0; i < nb; i++) {
765 err = ff_vk_exec_add_dep_wait_sem(s, e, sem[i], 0, stage);
766 if (err < 0)
767 return err;
768 }
769
770 return 0;
771
772 fail:
773 for (int i = 0; i < nb; i++)
774 vk->DestroySemaphore(s->hwctx->act_dev, sem[i], s->hwctx->alloc);
775
776 return err;
777 }
778
779 int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f,
780 VkPipelineStageFlagBits2 wait_stage,
781 VkPipelineStageFlagBits2 signal_stage)
782 {
783 uint8_t *frame_locked;
784 uint8_t *frame_update;
785 AVFrame **frame_deps;
786 AVBufferRef **buf_deps;
787 VkImageLayout *layout_dst;
788 uint32_t *queue_family_dst;
789 VkAccessFlagBits *access_dst;
790
791 AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
792 AVVulkanFramesContext *vkfc = hwfc->hwctx;
793 AVVkFrame *vkf = (AVVkFrame *)f->data[0];
794 int nb_images = ff_vk_count_images(vkf);
795
796 /* Don't add duplicates */
797 for (int i = 0; i < e->nb_frame_deps; i++)
798 if (e->frame_deps[i]->data[0] == f->data[0])
799 return 1;
800
801 ARR_REALLOC(e, layout_dst, &e->layout_dst_alloc, e->nb_frame_deps);
802 ARR_REALLOC(e, queue_family_dst, &e->queue_family_dst_alloc, e->nb_frame_deps);
803 ARR_REALLOC(e, access_dst, &e->access_dst_alloc, e->nb_frame_deps);
804
805 ARR_REALLOC(e, frame_locked, &e->frame_locked_alloc_size, e->nb_frame_deps);
806 ARR_REALLOC(e, frame_update, &e->frame_update_alloc_size, e->nb_frame_deps);
807 ARR_REALLOC(e, frame_deps, &e->frame_deps_alloc_size, e->nb_frame_deps);
808
809 /* prepare_frame in hwcontext_vulkan.c uses the regular frame management
810 * code but has no frame yet, and it doesn't need to actually store a ref
811 * to the frame. */
812 if (f->buf[0]) {
813 ARR_REALLOC(e, buf_deps, &e->buf_deps_alloc_size, e->nb_buf_deps);
814 e->buf_deps[e->nb_buf_deps] = av_buffer_ref(f->buf[0]);
815 if (!e->buf_deps[e->nb_buf_deps]) {
816 ff_vk_exec_discard_deps(s, e);
817 return AVERROR(ENOMEM);
818 }
819 e->nb_buf_deps++;
820 }
821
822 e->frame_deps[e->nb_frame_deps] = f;
823
824 vkfc->lock_frame(hwfc, vkf);
825 e->frame_locked[e->nb_frame_deps] = 1;
826 e->frame_update[e->nb_frame_deps] = 0;
827 e->nb_frame_deps++;
828
829 for (int i = 0; i < nb_images; i++) {
830 VkSemaphoreSubmitInfo *sem_wait;
831 VkSemaphoreSubmitInfo *sem_sig;
832 uint64_t **sem_sig_val_dst;
833
834 ARR_REALLOC(e, sem_wait, &e->sem_wait_alloc, e->sem_wait_cnt);
835 ARR_REALLOC(e, sem_sig, &e->sem_sig_alloc, e->sem_sig_cnt);
836 ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt);
837
838 e->sem_wait[e->sem_wait_cnt++] = (VkSemaphoreSubmitInfo) {
839 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
840 .semaphore = vkf->sem[i],
841 .value = vkf->sem_value[i],
842 .stageMask = wait_stage,
843 };
844
845 e->sem_sig[e->sem_sig_cnt++] = (VkSemaphoreSubmitInfo) {
846 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
847 .semaphore = vkf->sem[i],
848 .value = vkf->sem_value[i] + 1,
849 .stageMask = signal_stage,
850 };
851
852 e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = &vkf->sem_value[i];
853 e->sem_sig_val_dst_cnt++;
854 }
855
856 return 0;
857 }
858
859 void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f,
860 VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar)
861 {
862 int i;
863 for (i = 0; i < e->nb_frame_deps; i++)
864 if (e->frame_deps[i]->data[0] == f->data[0])
865 break;
866 av_assert0(i < e->nb_frame_deps);
867
868 /* Don't update duplicates */
869 if (nb_img_bar && !e->frame_update[i])
870 (*nb_img_bar)++;
871
872 e->queue_family_dst[i] = bar->dstQueueFamilyIndex;
873 e->access_dst[i] = bar->dstAccessMask;
874 e->layout_dst[i] = bar->newLayout;
875 e->frame_update[i] = 1;
876 }
877
878 int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e,
879 VkSemaphore *dst, uint64_t *dst_val,
880 AVFrame *f)
881 {
882 uint64_t **sem_sig_val_dst;
883 AVVkFrame *vkf = (AVVkFrame *)f->data[0];
884
885 /* Reject unknown frames */
886 int i;
887 for (i = 0; i < e->nb_frame_deps; i++)
888 if (e->frame_deps[i]->data[0] == f->data[0])
889 break;
890 if (i == e->nb_frame_deps)
891 return AVERROR(EINVAL);
892
893 ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt);
894
895 *dst = vkf->sem[0];
896 *dst_val = vkf->sem_value[0];
897
898 e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = dst_val;
899 e->sem_sig_val_dst_cnt++;
900
901 return 0;
902 }
903
904 int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e)
905 {
906 VkResult ret;
907 FFVulkanFunctions *vk = &s->vkfn;
908 VkCommandBufferSubmitInfo cmd_buf_info = (VkCommandBufferSubmitInfo) {
909 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
910 .commandBuffer = e->buf,
911 };
912 VkSubmitInfo2 submit_info = (VkSubmitInfo2) {
913 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
914 .pCommandBufferInfos = &cmd_buf_info,
915 .commandBufferInfoCount = 1,
916 .pWaitSemaphoreInfos = e->sem_wait,
917 .waitSemaphoreInfoCount = e->sem_wait_cnt,
918 .pSignalSemaphoreInfos = e->sem_sig,
919 .signalSemaphoreInfoCount = e->sem_sig_cnt,
920 };
921
922 ret = vk->EndCommandBuffer(e->buf);
923 if (ret != VK_SUCCESS) {
924 av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
925 ff_vk_ret2str(ret));
926 ff_vk_exec_discard_deps(s, e);
927 return AVERROR_EXTERNAL;
928 }
929
930 s->hwctx->lock_queue(s->device, e->qf, e->qi);
931 ret = vk->QueueSubmit2(e->queue, 1, &submit_info, e->fence);
932 s->hwctx->unlock_queue(s->device, e->qf, e->qi);
933
934 if (ret != VK_SUCCESS) {
935 av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
936 ff_vk_ret2str(ret));
937 ff_vk_exec_discard_deps(s, e);
938 return AVERROR_EXTERNAL;
939 }
940
941 for (int i = 0; i < e->sem_sig_val_dst_cnt; i++)
942 *e->sem_sig_val_dst[i] += 1;
943
944 /* Unlock all frames */
945 for (int j = 0; j < e->nb_frame_deps; j++) {
946 if (e->frame_locked[j]) {
947 AVFrame *f = e->frame_deps[j];
948 AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
949 AVVulkanFramesContext *vkfc = hwfc->hwctx;
950 AVVkFrame *vkf = (AVVkFrame *)f->data[0];
951
952 if (e->frame_update[j]) {
953 int nb_images = ff_vk_count_images(vkf);
954 for (int i = 0; i < nb_images; i++) {
955 vkf->layout[i] = e->layout_dst[j];
956 vkf->access[i] = e->access_dst[j];
957 vkf->queue_family[i] = e->queue_family_dst[j];
958 }
959 }
960 vkfc->unlock_frame(hwfc, vkf);
961 e->frame_locked[j] = 0;
962 }
963 }
964
965 e->had_submission = 1;
966
967 return 0;
968 }
969
970 int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
971 VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
972 VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
973 {
974 VkResult ret;
975 int index = -1;
976 FFVulkanFunctions *vk = &s->vkfn;
977
978 VkMemoryAllocateInfo alloc_info = {
979 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
980 .pNext = alloc_extension,
981 };
982
983 alloc_info.allocationSize = req->size;
984
985 /* The vulkan spec requires memory types to be sorted in the "optimal"
986 * order, so the first matching type we find will be the best/fastest one */
987 for (int i = 0; i < s->mprops.memoryTypeCount; i++) {
988 /* The memory type must be supported by the requirements (bitfield) */
989 if (!(req->memoryTypeBits & (1 << i)))
990 continue;
991
992 /* The memory type flags must include our properties */
993 if ((req_flags != UINT32_MAX) &&
994 ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags))
995 continue;
996
997 /* Found a suitable memory type */
998 index = i;
999 break;
1000 }
1001
1002 if (index < 0) {
1003 av_log(s, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
1004 req_flags);
1005 return AVERROR(EINVAL);
1006 }
1007
1008 alloc_info.memoryTypeIndex = index;
1009
1010 ret = vk->AllocateMemory(s->hwctx->act_dev, &alloc_info,
1011 s->hwctx->alloc, mem);
1012 if (ret != VK_SUCCESS)
1013 return AVERROR(ENOMEM);
1014
1015 if (mem_flags)
1016 *mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
1017
1018 return 0;
1019 }
1020
1021 int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
1022 void *pNext, void *alloc_pNext,
1023 VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
1024 {
1025 int err;
1026 VkResult ret;
1027 int use_ded_mem;
1028 FFVulkanFunctions *vk = &s->vkfn;
1029
1030 /* Buffer usage flags corresponding to buffer descriptor types */
1031 const VkBufferUsageFlags desc_usage =
1032 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
1033 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1034 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
1035 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
1036
1037 if ((s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) && (usage & desc_usage))
1038 usage |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
1039
1040 VkBufferCreateInfo buf_spawn = {
1041 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1042 .pNext = pNext,
1043 .usage = usage,
1044 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
1045 .size = flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT ?
1046 FFALIGN(size, s->props.properties.limits.minMemoryMapAlignment) :
1047 size,
1048 };
1049
1050 VkMemoryAllocateFlagsInfo alloc_flags = {
1051 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
1052 .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT,
1053 };
1054 VkBufferMemoryRequirementsInfo2 req_desc = {
1055 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
1056 };
1057 VkMemoryDedicatedAllocateInfo ded_alloc = {
1058 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
1059 .pNext = alloc_pNext,
1060 };
1061 VkMemoryDedicatedRequirements ded_req = {
1062 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
1063 };
1064 VkMemoryRequirements2 req = {
1065 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
1066 .pNext = &ded_req,
1067 };
1068
1069 av_log(s, AV_LOG_DEBUG, "Creating a buffer of %"SIZE_SPECIFIER" bytes, "
1070 "usage: 0x%x, flags: 0x%x\n",
1071 size, usage, flags);
1072
1073 ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, s->hwctx->alloc, &buf->buf);
1074 if (ret != VK_SUCCESS) {
1075 av_log(s, AV_LOG_ERROR, "Failed to create buffer: %s\n",
1076 ff_vk_ret2str(ret));
1077 return AVERROR_EXTERNAL;
1078 }
1079
1080 req_desc.buffer = buf->buf;
1081
1082 vk->GetBufferMemoryRequirements2(s->hwctx->act_dev, &req_desc, &req);
1083
1084 /* In case the implementation prefers/requires dedicated allocation */
1085 use_ded_mem = ded_req.prefersDedicatedAllocation |
1086 ded_req.requiresDedicatedAllocation;
1087 if (use_ded_mem) {
1088 ded_alloc.buffer = buf->buf;
1089 ded_alloc.pNext = alloc_pNext;
1090 alloc_pNext = &ded_alloc;
1091 }
1092
1093 if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) {
1094 alloc_flags.pNext = alloc_pNext;
1095 alloc_pNext = &alloc_flags;
1096 }
1097
1098 err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags, alloc_pNext,
1099 &buf->flags, &buf->mem);
1100 if (err)
1101 return err;
1102
1103 ret = vk->BindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0);
1104 if (ret != VK_SUCCESS) {
1105 av_log(s, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
1106 ff_vk_ret2str(ret));
1107 return AVERROR_EXTERNAL;
1108 }
1109
1110 if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) {
1111 VkBufferDeviceAddressInfo address_info = {
1112 .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
1113 .buffer = buf->buf,
1114 };
1115 buf->address = vk->GetBufferDeviceAddress(s->hwctx->act_dev, &address_info);
1116 }
1117
1118 buf->size = size;
1119
1120 return 0;
1121 }
1122
1123 int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer **buf, uint8_t *mem[],
1124 int nb_buffers, int invalidate)
1125 {
1126 VkResult ret;
1127 FFVulkanFunctions *vk = &s->vkfn;
1128 VkMappedMemoryRange inval_list[64];
1129 int inval_count = 0;
1130
1131 for (int i = 0; i < nb_buffers; i++) {
1132 void *dst;
1133 ret = vk->MapMemory(s->hwctx->act_dev, buf[i]->mem, 0,
1134 VK_WHOLE_SIZE, 0, &dst);
1135 if (ret != VK_SUCCESS) {
1136 av_log(s, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
1137 ff_vk_ret2str(ret));
1138 return AVERROR_EXTERNAL;
1139 }
1140 mem[i] = buf[i]->mapped_mem = dst;
1141 }
1142
1143 if (!invalidate)
1144 return 0;
1145
1146 for (int i = 0; i < nb_buffers; i++) {
1147 const VkMappedMemoryRange ival_buf = {
1148 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
1149 .memory = buf[i]->mem,
1150 .size = VK_WHOLE_SIZE,
1151 };
1152 if (buf[i]->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
1153 continue;
1154 inval_list[inval_count++] = ival_buf;
1155 }
1156
1157 if (inval_count) {
1158 ret = vk->InvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count,
1159 inval_list);
1160 if (ret != VK_SUCCESS) {
1161 av_log(s, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
1162 ff_vk_ret2str(ret));
1163 return AVERROR_EXTERNAL;
1164 }
1165 }
1166
1167 return 0;
1168 }
1169
1170 int ff_vk_flush_buffer(FFVulkanContext *s, FFVkBuffer *buf,
1171 size_t offset, size_t mem_size,
1172 int flush)
1173 {
1174 VkResult ret;
1175 FFVulkanFunctions *vk = &s->vkfn;
1176
1177 if (buf->host_ref || buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
1178 return 0;
1179
1180 const VkMappedMemoryRange flush_data = {
1181 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
1182 .memory = buf->mem,
1183 .offset = offset,
1184 .size = mem_size,
1185 };
1186
1187 if (flush)
1188 ret = vk->FlushMappedMemoryRanges(s->hwctx->act_dev, 1, &flush_data);
1189 else
1190 ret = vk->InvalidateMappedMemoryRanges(s->hwctx->act_dev, 1, &flush_data);
1191
1192 if (ret != VK_SUCCESS) {
1193 av_log(s, AV_LOG_ERROR, "Failed to flush memory: %s\n",
1194 ff_vk_ret2str(ret));
1195 return AVERROR_EXTERNAL;
1196 }
1197
1198 return 0;
1199 }
1200
1201 int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer **buf, int nb_buffers,
1202 int flush)
1203 {
1204 int err = 0;
1205 VkResult ret;
1206 FFVulkanFunctions *vk = &s->vkfn;
1207 VkMappedMemoryRange flush_list[64];
1208 int flush_count = 0;
1209
1210 if (flush) {
1211 for (int i = 0; i < nb_buffers; i++) {
1212 const VkMappedMemoryRange flush_buf = {
1213 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
1214 .memory = buf[i]->mem,
1215 .size = VK_WHOLE_SIZE,
1216 };
1217
1218 av_assert0(!buf[i]->host_ref);
1219 if (buf[i]->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
1220 continue;
1221 flush_list[flush_count++] = flush_buf;
1222 }
1223 }
1224
1225 if (flush_count) {
1226 ret = vk->FlushMappedMemoryRanges(s->hwctx->act_dev, flush_count,
1227 flush_list);
1228 if (ret != VK_SUCCESS) {
1229 av_log(s, AV_LOG_ERROR, "Failed to flush memory: %s\n",
1230 ff_vk_ret2str(ret));
1231 err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
1232 }
1233 }
1234
1235 for (int i = 0; i < nb_buffers; i++) {
1236 vk->UnmapMemory(s->hwctx->act_dev, buf[i]->mem);
1237 buf[i]->mapped_mem = NULL;
1238 }
1239
1240 return err;
1241 }
1242
1243 void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
1244 {
1245 FFVulkanFunctions *vk = &s->vkfn;
1246
1247 if (!buf || !s->hwctx)
1248 return;
1249
1250 if (buf->mapped_mem && !buf->host_ref)
1251 ff_vk_unmap_buffer(s, buf, 0);
1252 if (buf->buf != VK_NULL_HANDLE)
1253 vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
1254 if (buf->mem != VK_NULL_HANDLE)
1255 vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
1256 if (buf->host_ref)
1257 av_buffer_unref(&buf->host_ref);
1258
1259 buf->buf = VK_NULL_HANDLE;
1260 buf->mem = VK_NULL_HANDLE;
1261 buf->mapped_mem = NULL;
1262 }
1263
1264 static void free_data_buf(void *opaque, uint8_t *data)
1265 {
1266 FFVulkanContext *ctx = opaque;
1267 FFVkBuffer *buf = (FFVkBuffer *)data;
1268 ff_vk_free_buf(ctx, buf);
1269 av_free(data);
1270 }
1271
1272 static AVBufferRef *alloc_data_buf(void *opaque, size_t size)
1273 {
1274 AVBufferRef *ref;
1275 uint8_t *buf = av_mallocz(size);
1276 if (!buf)
1277 return NULL;
1278
1279 ref = av_buffer_create(buf, size, free_data_buf, opaque, 0);
1280 if (!ref)
1281 av_free(buf);
1282 return ref;
1283 }
1284
1285 int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool,
1286 AVBufferRef **buf, VkBufferUsageFlags usage,
1287 void *create_pNext, size_t size,
1288 VkMemoryPropertyFlagBits mem_props)
1289 {
1290 int err;
1291 AVBufferRef *ref;
1292 FFVkBuffer *data;
1293
1294 *buf = NULL;
1295
1296 if (!(*buf_pool)) {
1297 *buf_pool = av_buffer_pool_init2(sizeof(FFVkBuffer), ctx,
1298 alloc_data_buf, NULL);
1299 if (!(*buf_pool))
1300 return AVERROR(ENOMEM);
1301 }
1302
1303 *buf = ref = av_buffer_pool_get(*buf_pool);
1304 if (!ref)
1305 return AVERROR(ENOMEM);
1306
1307 data = (FFVkBuffer *)ref->data;
1308 data->stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
1309 data->access = VK_ACCESS_2_NONE;
1310
1311 if (data->size >= size)
1312 return 0;
1313
1314 ff_vk_free_buf(ctx, data);
1315 memset(data, 0, sizeof(*data));
1316
1317 err = ff_vk_create_buf(ctx, data, size,
1318 create_pNext, NULL, usage,
1319 mem_props);
1320 if (err < 0) {
1321 av_buffer_unref(&ref);
1322 *buf = NULL;
1323 return err;
1324 }
1325
1326 if (mem_props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
1327 err = ff_vk_map_buffer(ctx, data, &data->mapped_mem, 0);
1328 if (err < 0) {
1329 av_buffer_unref(&ref);
1330 *buf = NULL;
1331 return err;
1332 }
1333 }
1334
1335 return 0;
1336 }
1337
1338 static int create_mapped_buffer(FFVulkanContext *s,
1339 FFVkBuffer *vkb, VkBufferUsageFlags usage,
1340 size_t size,
1341 VkExternalMemoryBufferCreateInfo *create_desc,
1342 VkImportMemoryHostPointerInfoEXT *import_desc,
1343 VkMemoryHostPointerPropertiesEXT props)
1344 {
1345 int err;
1346 VkResult ret;
1347 FFVulkanFunctions *vk = &s->vkfn;
1348
1349 VkBufferCreateInfo buf_spawn = {
1350 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1351 .pNext = create_desc,
1352 .usage = usage,
1353 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
1354 .size = size,
1355 };
1356 VkMemoryRequirements req = {
1357 .size = size,
1358 .alignment = s->hprops.minImportedHostPointerAlignment,
1359 .memoryTypeBits = props.memoryTypeBits,
1360 };
1361
1362 err = ff_vk_alloc_mem(s, &req,
1363 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
1364 import_desc, &vkb->flags, &vkb->mem);
1365 if (err < 0)
1366 return err;
1367
1368 ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, s->hwctx->alloc, &vkb->buf);
1369 if (ret != VK_SUCCESS) {
1370 vk->FreeMemory(s->hwctx->act_dev, vkb->mem, s->hwctx->alloc);
1371 return AVERROR_EXTERNAL;
1372 }
1373
1374 ret = vk->BindBufferMemory(s->hwctx->act_dev, vkb->buf, vkb->mem, 0);
1375 if (ret != VK_SUCCESS) {
1376 vk->FreeMemory(s->hwctx->act_dev, vkb->mem, s->hwctx->alloc);
1377 vk->DestroyBuffer(s->hwctx->act_dev, vkb->buf, s->hwctx->alloc);
1378 return AVERROR_EXTERNAL;
1379 }
1380
1381 return 0;
1382 }
1383
1384 static void destroy_avvkbuf(void *opaque, uint8_t *data)
1385 {
1386 FFVulkanContext *s = opaque;
1387 FFVkBuffer *buf = (FFVkBuffer *)data;
1388 ff_vk_free_buf(s, buf);
1389 av_free(buf);
1390 }
1391
1392 int ff_vk_host_map_buffer(FFVulkanContext *s, AVBufferRef **dst,
1393 uint8_t *src_data, const AVBufferRef *src_buf,
1394 VkBufferUsageFlags usage)
1395 {
1396 int err;
1397 VkResult ret;
1398 FFVulkanFunctions *vk = &s->vkfn;
1399
1400 VkExternalMemoryBufferCreateInfo create_desc = {
1401 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
1402 .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
1403 };
1404 VkMemoryAllocateFlagsInfo alloc_flags = {
1405 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
1406 .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT,
1407 };
1408 VkImportMemoryHostPointerInfoEXT import_desc = {
1409 .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
1410 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
1411 .pNext = usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT ? &alloc_flags : NULL,
1412 };
1413 VkMemoryHostPointerPropertiesEXT props;
1414
1415 AVBufferRef *ref;
1416 FFVkBuffer *vkb;
1417 size_t offs;
1418 size_t buffer_size;
1419
1420 *dst = NULL;
1421
1422 /* Get the previous point at which mapping was possible and use it */
1423 offs = (uintptr_t)src_data % s->hprops.minImportedHostPointerAlignment;
1424 import_desc.pHostPointer = src_data - offs;
1425
1426 props = (VkMemoryHostPointerPropertiesEXT) {
1427 VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT,
1428 };
1429 ret = vk->GetMemoryHostPointerPropertiesEXT(s->hwctx->act_dev,
1430 import_desc.handleType,
1431 import_desc.pHostPointer,
1432 &props);
1433 if (!(ret == VK_SUCCESS && props.memoryTypeBits))
1434 return AVERROR(EINVAL);
1435
1436 /* Ref the source buffer */
1437 ref = av_buffer_ref(src_buf);
1438 if (!ref)
1439 return AVERROR(ENOMEM);
1440
1441 /* Add the offset at the start, which gets ignored */
1442 const ptrdiff_t src_offset = src_data - src_buf->data;
1443 buffer_size = offs + (src_buf->size - src_offset);
1444 buffer_size = FFALIGN(buffer_size, s->props.properties.limits.minMemoryMapAlignment);
1445 buffer_size = FFALIGN(buffer_size, s->hprops.minImportedHostPointerAlignment);
1446
1447 /* Create a buffer struct */
1448 vkb = av_mallocz(sizeof(*vkb));
1449 if (!vkb) {
1450 av_buffer_unref(&ref);
1451 return AVERROR(ENOMEM);
1452 }
1453
1454 err = create_mapped_buffer(s, vkb, usage,
1455 buffer_size, &create_desc, &import_desc,
1456 props);
1457 if (err < 0) {
1458 av_buffer_unref(&ref);
1459 av_free(vkb);
1460 return err;
1461 }
1462
1463 if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) {
1464 VkBufferDeviceAddressInfo address_info = {
1465 .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
1466 .buffer = vkb->buf,
1467 };
1468 vkb->address = vk->GetBufferDeviceAddress(s->hwctx->act_dev, &address_info);
1469 }
1470
1471 vkb->host_ref = ref;
1472 vkb->virtual_offset = offs;
1473 vkb->address += offs;
1474 vkb->mapped_mem = src_data;
1475 vkb->size = buffer_size - offs;
1476 vkb->flags |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
1477
1478 /* Create a ref */
1479 *dst = av_buffer_create((uint8_t *)vkb, sizeof(*vkb),
1480 destroy_avvkbuf, s, 0);
1481 if (!(*dst)) {
1482 destroy_avvkbuf(s, (uint8_t *)vkb);
1483 *dst = NULL;
1484 return AVERROR(ENOMEM);
1485 }
1486
1487 return 0;
1488 }
1489
1490 int ff_vk_shader_add_push_const(FFVulkanShader *shd, int offset, int size,
1491 VkShaderStageFlagBits stage)
1492 {
1493 VkPushConstantRange *pc;
1494
1495 shd->push_consts = av_realloc_array(shd->push_consts,
1496 sizeof(*shd->push_consts),
1497 shd->push_consts_num + 1);
1498 if (!shd->push_consts)
1499 return AVERROR(ENOMEM);
1500
1501 pc = &shd->push_consts[shd->push_consts_num++];
1502 memset(pc, 0, sizeof(*pc));
1503
1504 pc->stageFlags = stage;
1505 pc->offset = offset;
1506 pc->size = size;
1507
1508 return 0;
1509 }
1510
1511 int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler,
1512 int unnorm_coords, VkFilter filt)
1513 {
1514 VkResult ret;
1515 FFVulkanFunctions *vk = &s->vkfn;
1516
1517 VkSamplerCreateInfo sampler_info = {
1518 .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
1519 .magFilter = filt,
1520 .minFilter = sampler_info.magFilter,
1521 .mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST :
1522 VK_SAMPLER_MIPMAP_MODE_LINEAR,
1523 .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
1524 .addressModeV = sampler_info.addressModeU,
1525 .addressModeW = sampler_info.addressModeU,
1526 .anisotropyEnable = VK_FALSE,
1527 .compareOp = VK_COMPARE_OP_NEVER,
1528 .borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
1529 .unnormalizedCoordinates = unnorm_coords,
1530 };
1531
1532 ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info,
1533 s->hwctx->alloc, sampler);
1534 if (ret != VK_SUCCESS) {
1535 av_log(s, AV_LOG_ERROR, "Unable to init sampler: %s\n",
1536 ff_vk_ret2str(ret));
1537 return AVERROR_EXTERNAL;
1538 }
1539
1540 return 0;
1541 }
1542
1543 VkImageAspectFlags ff_vk_aspect_flag(AVFrame *f, int p)
1544 {
1545 AVVkFrame *vkf = (AVVkFrame *)f->data[0];
1546 AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
1547 int nb_images = ff_vk_count_images(vkf);
1548 int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format);
1549
1550 static const VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_PLANE_0_BIT,
1551 VK_IMAGE_ASPECT_PLANE_1_BIT,
1552 VK_IMAGE_ASPECT_PLANE_2_BIT, };
1553
1554 if (ff_vk_mt_is_np_rgb(hwfc->sw_format) || (nb_planes == nb_images))
1555 return VK_IMAGE_ASPECT_COLOR_BIT;
1556
1557 return plane_aspect[p];
1558 }
1559
1560 int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
1561 {
1562 if (pix_fmt == AV_PIX_FMT_ABGR || pix_fmt == AV_PIX_FMT_BGRA ||
1563 pix_fmt == AV_PIX_FMT_RGBA || pix_fmt == AV_PIX_FMT_RGB24 ||
1564 pix_fmt == AV_PIX_FMT_BGR24 || pix_fmt == AV_PIX_FMT_RGB48 ||
1565 pix_fmt == AV_PIX_FMT_RGBA64 || pix_fmt == AV_PIX_FMT_RGB565 ||
1566 pix_fmt == AV_PIX_FMT_BGR565 || pix_fmt == AV_PIX_FMT_BGR0 ||
1567 pix_fmt == AV_PIX_FMT_0BGR || pix_fmt == AV_PIX_FMT_RGB0 ||
1568 pix_fmt == AV_PIX_FMT_GBRP10 || pix_fmt == AV_PIX_FMT_GBRP12 ||
1569 pix_fmt == AV_PIX_FMT_GBRP14 || pix_fmt == AV_PIX_FMT_GBRP16 ||
1570 pix_fmt == AV_PIX_FMT_GBRAP || pix_fmt == AV_PIX_FMT_GBRAP10 ||
1571 pix_fmt == AV_PIX_FMT_GBRAP12 || pix_fmt == AV_PIX_FMT_GBRAP14 ||
1572 pix_fmt == AV_PIX_FMT_GBRAP16 || pix_fmt == AV_PIX_FMT_GBRAP32 ||
1573 pix_fmt == AV_PIX_FMT_GBRPF32 || pix_fmt == AV_PIX_FMT_GBRAPF32 ||
1574 pix_fmt == AV_PIX_FMT_X2RGB10 || pix_fmt == AV_PIX_FMT_X2BGR10 ||
1575 pix_fmt == AV_PIX_FMT_RGBAF32 || pix_fmt == AV_PIX_FMT_RGBF32 ||
1576 pix_fmt == AV_PIX_FMT_RGBA128 || pix_fmt == AV_PIX_FMT_RGB96 ||
1577 pix_fmt == AV_PIX_FMT_GBRP || pix_fmt == AV_PIX_FMT_BAYER_RGGB16)
1578 return 1;
1579 return 0;
1580 }
1581
1582 void ff_vk_set_perm(enum AVPixelFormat pix_fmt, int lut[4], int inv)
1583 {
1584 switch (pix_fmt) {
1585 case AV_PIX_FMT_GBRP:
1586 case AV_PIX_FMT_GBRAP:
1587 case AV_PIX_FMT_GBRAP10:
1588 case AV_PIX_FMT_GBRAP12:
1589 case AV_PIX_FMT_GBRAP14:
1590 case AV_PIX_FMT_GBRAP16:
1591 case AV_PIX_FMT_GBRP10:
1592 case AV_PIX_FMT_GBRP12:
1593 case AV_PIX_FMT_GBRP14:
1594 case AV_PIX_FMT_GBRP16:
1595 case AV_PIX_FMT_GBRPF32:
1596 case AV_PIX_FMT_GBRAP32:
1597 case AV_PIX_FMT_GBRAPF32:
1598 lut[0] = 1;
1599 lut[1] = 2;
1600 lut[2] = 0;
1601 lut[3] = 3;
1602 break;
1603 case AV_PIX_FMT_X2BGR10:
1604 lut[0] = 0;
1605 lut[1] = 2;
1606 lut[2] = 1;
1607 lut[3] = 3;
1608 break;
1609 default:
1610 lut[0] = 0;
1611 lut[1] = 1;
1612 lut[2] = 2;
1613 lut[3] = 3;
1614 break;
1615 }
1616
1617 if (inv) {
1618 int lut_tmp[4] = { lut[0], lut[1], lut[2], lut[3] };
1619 for (int i = 0; i < 4; i++)
1620 lut[lut_tmp[i]] = i;
1621 }
1622
1623 return;
1624 }
1625
1626 const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pix_fmt,
1627 enum FFVkShaderRepFormat rep_fmt)
1628 {
1629 switch (pix_fmt) {
1630 case AV_PIX_FMT_RGBA:
1631 case AV_PIX_FMT_BGRA:
1632 case AV_PIX_FMT_RGB24:
1633 case AV_PIX_FMT_BGR24:
1634 case AV_PIX_FMT_BGR0:
1635 case AV_PIX_FMT_RGB0:
1636 case AV_PIX_FMT_RGB565:
1637 case AV_PIX_FMT_BGR565:
1638 case AV_PIX_FMT_UYVA:
1639 case AV_PIX_FMT_YUYV422:
1640 case AV_PIX_FMT_UYVY422: {
1641 const char *rep_tab[] = {
1642 [FF_VK_REP_NATIVE] = "rgba8ui",
1643 [FF_VK_REP_FLOAT] = "rgba8",
1644 [FF_VK_REP_INT] = "rgba8i",
1645 [FF_VK_REP_UINT] = "rgba8ui",
1646 };
1647 return rep_tab[rep_fmt];
1648 }
1649 case AV_PIX_FMT_X2RGB10:
1650 case AV_PIX_FMT_X2BGR10:
1651 case AV_PIX_FMT_Y210:
1652 case AV_PIX_FMT_XV30: {
1653 const char *rep_tab[] = {
1654 [FF_VK_REP_NATIVE] = "rgb10_a2ui",
1655 [FF_VK_REP_FLOAT] = "rgb10_a2",
1656 [FF_VK_REP_INT] = NULL,
1657 [FF_VK_REP_UINT] = "rgb10_a2ui",
1658 };
1659 return rep_tab[rep_fmt];
1660 }
1661 case AV_PIX_FMT_RGB48:
1662 case AV_PIX_FMT_RGBA64:
1663 case AV_PIX_FMT_Y212:
1664 case AV_PIX_FMT_Y216:
1665 case AV_PIX_FMT_XV36:
1666 case AV_PIX_FMT_XV48: {
1667 const char *rep_tab[] = {
1668 [FF_VK_REP_NATIVE] = "rgba16ui",
1669 [FF_VK_REP_FLOAT] = "rgba16",
1670 [FF_VK_REP_INT] = "rgba16i",
1671 [FF_VK_REP_UINT] = "rgba16ui",
1672 };
1673 return rep_tab[rep_fmt];
1674 }
1675 case AV_PIX_FMT_RGBF32:
1676 case AV_PIX_FMT_RGBAF32: {
1677 const char *rep_tab[] = {
1678 [FF_VK_REP_NATIVE] = "rgba32f",
1679 [FF_VK_REP_FLOAT] = "rgba32f",
1680 [FF_VK_REP_INT] = "rgba32i",
1681 [FF_VK_REP_UINT] = "rgba32ui",
1682 };
1683 return rep_tab[rep_fmt];
1684 }
1685 case AV_PIX_FMT_RGB96:
1686 case AV_PIX_FMT_RGBA128: {
1687 const char *rep_tab[] = {
1688 [FF_VK_REP_NATIVE] = "rgba32ui",
1689 [FF_VK_REP_FLOAT] = NULL,
1690 [FF_VK_REP_INT] = "rgba32i",
1691 [FF_VK_REP_UINT] = "rgba32ui",
1692 };
1693 return rep_tab[rep_fmt];
1694 }
1695 case AV_PIX_FMT_GBRP:
1696 case AV_PIX_FMT_GRAY8:
1697 case AV_PIX_FMT_GBRAP:
1698 case AV_PIX_FMT_YUV420P:
1699 case AV_PIX_FMT_YUV422P:
1700 case AV_PIX_FMT_YUV444P:
1701 case AV_PIX_FMT_YUVA420P:
1702 case AV_PIX_FMT_YUVA422P:
1703 case AV_PIX_FMT_YUVA444P: {
1704 const char *rep_tab[] = {
1705 [FF_VK_REP_NATIVE] = "r8ui",
1706 [FF_VK_REP_FLOAT] = "r8",
1707 [FF_VK_REP_INT] = "r8i",
1708 [FF_VK_REP_UINT] = "r8ui",
1709 };
1710 return rep_tab[rep_fmt];
1711 };
1712 case AV_PIX_FMT_GRAY10:
1713 case AV_PIX_FMT_GRAY12:
1714 case AV_PIX_FMT_GRAY14:
1715 case AV_PIX_FMT_GRAY16:
1716 case AV_PIX_FMT_GBRAP10:
1717 case AV_PIX_FMT_GBRAP12:
1718 case AV_PIX_FMT_GBRAP14:
1719 case AV_PIX_FMT_GBRAP16:
1720 case AV_PIX_FMT_GBRP10:
1721 case AV_PIX_FMT_GBRP12:
1722 case AV_PIX_FMT_GBRP14:
1723 case AV_PIX_FMT_GBRP16:
1724 case AV_PIX_FMT_YUV420P10:
1725 case AV_PIX_FMT_YUV420P12:
1726 case AV_PIX_FMT_YUV420P16:
1727 case AV_PIX_FMT_YUV422P10:
1728 case AV_PIX_FMT_YUV422P12:
1729 case AV_PIX_FMT_YUV422P16:
1730 case AV_PIX_FMT_YUV444P10:
1731 case AV_PIX_FMT_YUV444P12:
1732 case AV_PIX_FMT_YUV444P16:
1733 case AV_PIX_FMT_YUVA420P10:
1734 case AV_PIX_FMT_YUVA420P16:
1735 case AV_PIX_FMT_YUVA422P10:
1736 case AV_PIX_FMT_YUVA422P12:
1737 case AV_PIX_FMT_YUVA422P16:
1738 case AV_PIX_FMT_YUVA444P10:
1739 case AV_PIX_FMT_YUVA444P12:
1740 case AV_PIX_FMT_YUVA444P16:
1741 case AV_PIX_FMT_BAYER_RGGB16: {
1742 const char *rep_tab[] = {
1743 [FF_VK_REP_NATIVE] = "r16ui",
1744 [FF_VK_REP_FLOAT] = "r16f",
1745 [FF_VK_REP_INT] = "r16i",
1746 [FF_VK_REP_UINT] = "r16ui",
1747 };
1748 return rep_tab[rep_fmt];
1749 };
1750 case AV_PIX_FMT_GRAY32:
1751 case AV_PIX_FMT_GRAYF32:
1752 case AV_PIX_FMT_GBRPF32:
1753 case AV_PIX_FMT_GBRAPF32: {
1754 const char *rep_tab[] = {
1755 [FF_VK_REP_NATIVE] = "r32f",
1756 [FF_VK_REP_FLOAT] = "r32f",
1757 [FF_VK_REP_INT] = "r32i",
1758 [FF_VK_REP_UINT] = "r32ui",
1759 };
1760 return rep_tab[rep_fmt];
1761 };
1762 case AV_PIX_FMT_GBRAP32: {
1763 const char *rep_tab[] = {
1764 [FF_VK_REP_NATIVE] = "r32ui",
1765 [FF_VK_REP_FLOAT] = NULL,
1766 [FF_VK_REP_INT] = "r32i",
1767 [FF_VK_REP_UINT] = "r32ui",
1768 };
1769 return rep_tab[rep_fmt];
1770 };
1771 case AV_PIX_FMT_NV12:
1772 case AV_PIX_FMT_NV16:
1773 case AV_PIX_FMT_NV24: {
1774 const char *rep_tab[] = {
1775 [FF_VK_REP_NATIVE] = "rg8ui",
1776 [FF_VK_REP_FLOAT] = "rg8",
1777 [FF_VK_REP_INT] = "rg8i",
1778 [FF_VK_REP_UINT] = "rg8ui",
1779 };
1780 return rep_tab[rep_fmt];
1781 };
1782 case AV_PIX_FMT_P010:
1783 case AV_PIX_FMT_P210:
1784 case AV_PIX_FMT_P410: {
1785 const char *rep_tab[] = {
1786 [FF_VK_REP_NATIVE] = "rgb10_a2ui",
1787 [FF_VK_REP_FLOAT] = "rgb10_a2",
1788 [FF_VK_REP_INT] = NULL,
1789 [FF_VK_REP_UINT] = "rgb10_a2ui",
1790 };
1791 return rep_tab[rep_fmt];
1792 };
1793 case AV_PIX_FMT_P012:
1794 case AV_PIX_FMT_P016:
1795 case AV_PIX_FMT_P212:
1796 case AV_PIX_FMT_P216:
1797 case AV_PIX_FMT_P412:
1798 case AV_PIX_FMT_P416: {
1799 const char *rep_tab[] = {
1800 [FF_VK_REP_NATIVE] = "rg16ui",
1801 [FF_VK_REP_FLOAT] = "rg16",
1802 [FF_VK_REP_INT] = "rg16i",
1803 [FF_VK_REP_UINT] = "rg16ui",
1804 };
1805 return rep_tab[rep_fmt];
1806 };
1807 default:
1808 return "rgba32f";
1809 }
1810 }
1811
1812 typedef struct ImageViewCtx {
1813 int nb_views;
1814 VkImageView views[];
1815 } ImageViewCtx;
1816
1817 static void destroy_imageviews(void *opaque, uint8_t *data)
1818 {
1819 FFVulkanContext *s = opaque;
1820 FFVulkanFunctions *vk = &s->vkfn;
1821 ImageViewCtx *iv = (ImageViewCtx *)data;
1822
1823 for (int i = 0; i < iv->nb_views; i++)
1824 vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc);
1825
1826 av_free(iv);
1827 }
1828
1829 static VkFormat map_fmt_to_rep(VkFormat fmt, enum FFVkShaderRepFormat rep_fmt)
1830 {
1831 #define REPS_FMT(fmt) \
1832 [FF_VK_REP_NATIVE] = fmt ## _UINT, \
1833 [FF_VK_REP_FLOAT] = fmt ## _UNORM, \
1834 [FF_VK_REP_INT] = fmt ## _SINT, \
1835 [FF_VK_REP_UINT] = fmt ## _UINT,
1836
1837 #define REPS_FMT_PACK(fmt, num) \
1838 [FF_VK_REP_NATIVE] = fmt ## _UINT_PACK ## num, \
1839 [FF_VK_REP_FLOAT] = fmt ## _UNORM_PACK ## num, \
1840 [FF_VK_REP_INT] = fmt ## _SINT_PACK ## num, \
1841 [FF_VK_REP_UINT] = fmt ## _UINT_PACK ## num,
1842
1843 const VkFormat fmts_map[][4] = {
1844 { REPS_FMT_PACK(VK_FORMAT_A2B10G10R10, 32) },
1845 { REPS_FMT_PACK(VK_FORMAT_A2R10G10B10, 32) },
1846 {
1847 VK_FORMAT_B5G6R5_UNORM_PACK16,
1848 VK_FORMAT_B5G6R5_UNORM_PACK16,
1849 VK_FORMAT_UNDEFINED,
1850 VK_FORMAT_UNDEFINED,
1851 },
1852 {
1853 VK_FORMAT_R5G6B5_UNORM_PACK16,
1854 VK_FORMAT_R5G6B5_UNORM_PACK16,
1855 VK_FORMAT_UNDEFINED,
1856 VK_FORMAT_UNDEFINED,
1857 },
1858 { REPS_FMT(VK_FORMAT_B8G8R8) },
1859 { REPS_FMT(VK_FORMAT_B8G8R8A8) },
1860 { REPS_FMT(VK_FORMAT_R8) },
1861 { REPS_FMT(VK_FORMAT_R8G8) },
1862 { REPS_FMT(VK_FORMAT_R8G8B8) },
1863 { REPS_FMT(VK_FORMAT_R8G8B8A8) },
1864 { REPS_FMT(VK_FORMAT_R16) },
1865 { REPS_FMT(VK_FORMAT_R16G16) },
1866 { REPS_FMT(VK_FORMAT_R16G16B16) },
1867 { REPS_FMT(VK_FORMAT_R16G16B16A16) },
1868 {
1869 VK_FORMAT_R32_UINT,
1870 VK_FORMAT_R32_SFLOAT,
1871 VK_FORMAT_R32_SINT,
1872 VK_FORMAT_R32_UINT,
1873 },
1874 {
1875 VK_FORMAT_R32G32B32_SFLOAT,
1876 VK_FORMAT_R32G32B32_SFLOAT,
1877 VK_FORMAT_UNDEFINED,
1878 VK_FORMAT_UNDEFINED,
1879 },
1880 {
1881 VK_FORMAT_R32G32B32A32_SFLOAT,
1882 VK_FORMAT_R32G32B32A32_SFLOAT,
1883 VK_FORMAT_UNDEFINED,
1884 VK_FORMAT_UNDEFINED,
1885 },
1886 {
1887 VK_FORMAT_R32G32B32_UINT,
1888 VK_FORMAT_UNDEFINED,
1889 VK_FORMAT_R32G32B32_SINT,
1890 VK_FORMAT_R32G32B32_UINT,
1891 },
1892 {
1893 VK_FORMAT_R32G32B32A32_UINT,
1894 VK_FORMAT_UNDEFINED,
1895 VK_FORMAT_R32G32B32A32_SINT,
1896 VK_FORMAT_R32G32B32A32_UINT,
1897 },
1898 };
1899 #undef REPS_FMT_PACK
1900 #undef REPS_FMT
1901
1902 if (fmt == VK_FORMAT_UNDEFINED)
1903 return VK_FORMAT_UNDEFINED;
1904
1905 for (int i = 0; i < FF_ARRAY_ELEMS(fmts_map); i++) {
1906 if (fmts_map[i][FF_VK_REP_NATIVE] == fmt ||
1907 fmts_map[i][FF_VK_REP_FLOAT] == fmt ||
1908 fmts_map[i][FF_VK_REP_INT] == fmt ||
1909 fmts_map[i][FF_VK_REP_UINT] == fmt)
1910 return fmts_map[i][rep_fmt];
1911 }
1912
1913 return VK_FORMAT_UNDEFINED;
1914 }
1915
1916 int ff_vk_create_imageview(FFVulkanContext *s,
1917 VkImageView *img_view, VkImageAspectFlags *aspect,
1918 AVFrame *f, int plane, enum FFVkShaderRepFormat rep_fmt)
1919 {
1920 VkResult ret;
1921 FFVulkanFunctions *vk = &s->vkfn;
1922 AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
1923 AVVulkanFramesContext *vkfc = hwfc->hwctx;
1924 const VkFormat *rep_fmts = av_vkfmt_from_pixfmt(hwfc->sw_format);
1925 AVVkFrame *vkf = (AVVkFrame *)f->data[0];
1926 const int nb_images = ff_vk_count_images(vkf);
1927
1928 VkImageViewUsageCreateInfo view_usage_info = {
1929 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
1930 .usage = vkfc->usage &
1931 (~(VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR |
1932 VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)),
1933 };
1934 VkImageViewCreateInfo view_create_info = {
1935 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1936 .pNext = &view_usage_info,
1937 .image = vkf->img[FFMIN(plane, nb_images - 1)],
1938 .viewType = VK_IMAGE_VIEW_TYPE_2D,
1939 .format = map_fmt_to_rep(rep_fmts[plane], rep_fmt),
1940 .components = ff_comp_identity_map,
1941 .subresourceRange = {
1942 .aspectMask = ff_vk_aspect_flag(f, plane),
1943 .levelCount = 1,
1944 .layerCount = 1,
1945 },
1946 };
1947 if (view_create_info.format == VK_FORMAT_UNDEFINED) {
1948 av_log(s, AV_LOG_ERROR, "Unable to find a compatible representation "
1949 "of format %i and mode %i\n",
1950 rep_fmts[plane], rep_fmt);
1951 return AVERROR(EINVAL);
1952 }
1953
1954 ret = vk->CreateImageView(s->hwctx->act_dev, &view_create_info,
1955 s->hwctx->alloc, img_view);
1956 if (ret != VK_SUCCESS) {
1957 av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n",
1958 ff_vk_ret2str(ret));
1959 return AVERROR_EXTERNAL;
1960 }
1961
1962 *aspect = view_create_info.subresourceRange.aspectMask;
1963
1964 return 0;
1965 }
1966
1967 int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e,
1968 VkImageView views[AV_NUM_DATA_POINTERS],
1969 AVFrame *f, enum FFVkShaderRepFormat rep_fmt)
1970 {
1971 int err;
1972 VkResult ret;
1973 AVBufferRef *buf;
1974 FFVulkanFunctions *vk = &s->vkfn;
1975 AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
1976 AVVulkanFramesContext *vkfc = hwfc->hwctx;
1977 const VkFormat *rep_fmts = av_vkfmt_from_pixfmt(hwfc->sw_format);
1978 AVVkFrame *vkf = (AVVkFrame *)f->data[0];
1979 const int nb_images = ff_vk_count_images(vkf);
1980 const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format);
1981
1982 ImageViewCtx *iv;
1983 const size_t buf_size = sizeof(*iv) + nb_planes*sizeof(VkImageView);
1984 iv = av_mallocz(buf_size);
1985 if (!iv)
1986 return AVERROR(ENOMEM);
1987
1988 for (int i = 0; i < nb_planes; i++) {
1989 VkImageViewUsageCreateInfo view_usage_info = {
1990 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
1991 .usage = vkfc->usage &
1992 (~(VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR |
1993 VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)),
1994 };
1995 VkImageViewCreateInfo view_create_info = {
1996 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1997 .pNext = &view_usage_info,
1998 .image = vkf->img[FFMIN(i, nb_images - 1)],
1999 .viewType = VK_IMAGE_VIEW_TYPE_2D,
2000 .format = map_fmt_to_rep(rep_fmts[i], rep_fmt),
2001 .components = ff_comp_identity_map,
2002 .subresourceRange = {
2003 .aspectMask = ff_vk_aspect_flag(f, i),
2004 .levelCount = 1,
2005 .layerCount = 1,
2006 },
2007 };
2008 if (view_create_info.format == VK_FORMAT_UNDEFINED) {
2009 av_log(s, AV_LOG_ERROR, "Unable to find a compatible representation "
2010 "of format %i and mode %i\n",
2011 rep_fmts[i], rep_fmt);
2012 err = AVERROR(EINVAL);
2013 goto fail;
2014 }
2015
2016 ret = vk->CreateImageView(s->hwctx->act_dev, &view_create_info,
2017 s->hwctx->alloc, &iv->views[i]);
2018 if (ret != VK_SUCCESS) {
2019 av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n",
2020 ff_vk_ret2str(ret));
2021 err = AVERROR_EXTERNAL;
2022 goto fail;
2023 }
2024
2025 iv->nb_views++;
2026 }
2027
2028 buf = av_buffer_create((uint8_t *)iv, buf_size, destroy_imageviews, s, 0);
2029 if (!buf) {
2030 err = AVERROR(ENOMEM);
2031 goto fail;
2032 }
2033
2034 /* Add to queue dependencies */
2035 err = ff_vk_exec_add_dep_buf(s, e, &buf, 1, 0);
2036 if (err < 0)
2037 av_buffer_unref(&buf);
2038
2039 memcpy(views, iv->views, nb_planes*sizeof(*views));
2040
2041 return err;
2042
2043 fail:
2044 for (int i = 0; i < iv->nb_views; i++)
2045 vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc);
2046 av_free(iv);
2047 return err;
2048 }
2049
2050 void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e,
2051 AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar,
2052 VkPipelineStageFlags2 src_stage,
2053 VkPipelineStageFlags2 dst_stage,
2054 VkAccessFlagBits2 new_access,
2055 VkImageLayout new_layout,
2056 uint32_t new_qf)
2057 {
2058 int found = -1;
2059 AVVkFrame *vkf = (AVVkFrame *)pic->data[0];
2060 const int nb_images = ff_vk_count_images(vkf);
2061 for (int i = 0; i < e->nb_frame_deps; i++)
2062 if (e->frame_deps[i]->data[0] == pic->data[0]) {
2063 if (e->frame_update[i])
2064 found = i;
2065 break;
2066 }
2067
2068 for (int i = 0; i < nb_images; i++) {
2069 bar[*nb_bar] = (VkImageMemoryBarrier2) {
2070 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
2071 .pNext = NULL,
2072 .srcStageMask = src_stage,
2073 .dstStageMask = dst_stage,
2074 .srcAccessMask = found >= 0 ? e->access_dst[found] : vkf->access[i],
2075 .dstAccessMask = new_access,
2076 .oldLayout = found >= 0 ? e->layout_dst[found] : vkf->layout[0],
2077 .newLayout = new_layout,
2078 .srcQueueFamilyIndex = found >= 0 ? e->queue_family_dst[found] : vkf->queue_family[0],
2079 .dstQueueFamilyIndex = new_qf,
2080 .image = vkf->img[i],
2081 .subresourceRange = (VkImageSubresourceRange) {
2082 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
2083 .layerCount = 1,
2084 .levelCount = 1,
2085 },
2086 };
2087 *nb_bar += 1;
2088 }
2089
2090 ff_vk_exec_update_frame(s, e, pic, &bar[*nb_bar - nb_images], NULL);
2091 }
2092
2093 int ff_vk_shader_init(FFVulkanContext *s, FFVulkanShader *shd, const char *name,
2094 VkPipelineStageFlags stage,
2095 const char *extensions[], int nb_extensions,
2096 int lg_x, int lg_y, int lg_z,
2097 uint32_t required_subgroup_size)
2098 {
2099 av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED);
2100
2101 shd->name = name;
2102 shd->stage = stage;
2103 shd->lg_size[0] = lg_x;
2104 shd->lg_size[1] = lg_y;
2105 shd->lg_size[2] = lg_z;
2106
2107 switch (shd->stage) {
2108 case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:
2109 case VK_SHADER_STAGE_CALLABLE_BIT_KHR:
2110 case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:
2111 case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:
2112 case VK_SHADER_STAGE_MISS_BIT_KHR:
2113 case VK_SHADER_STAGE_RAYGEN_BIT_KHR:
2114 shd->bind_point = VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR;
2115 break;
2116 case VK_SHADER_STAGE_COMPUTE_BIT:
2117 shd->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
2118 break;
2119 default:
2120 shd->bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS;
2121 break;
2122 };
2123
2124 if (required_subgroup_size) {
2125 shd->subgroup_info.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO;
2126 shd->subgroup_info.requiredSubgroupSize = required_subgroup_size;
2127 }
2128
2129 av_bprintf(&shd->src, "/* %s shader: %s */\n",
2130 (stage == VK_SHADER_STAGE_TASK_BIT_EXT ||
2131 stage == VK_SHADER_STAGE_MESH_BIT_EXT) ?
2132 "Mesh" :
2133 (shd->bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR) ?
2134 "Raytrace" :
2135 (shd->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) ?
2136 "Compute" : "Graphics",
2137 name);
2138 GLSLF(0, #version %i ,460);
2139 GLSLC(0, );
2140
2141 /* Common utilities */
2142 GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) );
2143 GLSLC(0, );
2144 GLSLC(0, #extension GL_EXT_scalar_block_layout : require );
2145 GLSLC(0, #extension GL_EXT_shader_explicit_arithmetic_types : require );
2146 GLSLC(0, #extension GL_EXT_control_flow_attributes : require );
2147 GLSLC(0, #extension GL_EXT_shader_image_load_formatted : require );
2148 if (s->extensions & FF_VK_EXT_EXPECT_ASSUME) {
2149 GLSLC(0, #extension GL_EXT_expect_assume : require );
2150 } else {
2151 GLSLC(0, #define assumeEXT(x) (x) );
2152 GLSLC(0, #define expectEXT(x, c) (x) );
2153 }
2154 if ((s->extensions & FF_VK_EXT_DEBUG_UTILS) &&
2155 (s->extensions & FF_VK_EXT_RELAXED_EXTENDED_INSTR)) {
2156 GLSLC(0, #extension GL_EXT_debug_printf : require );
2157 GLSLC(0, #define DEBUG );
2158 }
2159
2160 if (stage == VK_SHADER_STAGE_TASK_BIT_EXT ||
2161 stage == VK_SHADER_STAGE_MESH_BIT_EXT)
2162 GLSLC(0, #extension GL_EXT_mesh_shader : require );
2163
2164 for (int i = 0; i < nb_extensions; i++)
2165 GLSLF(0, #extension %s : %s ,extensions[i], "require");
2166 GLSLC(0, );
2167
2168 GLSLF(0, layout (local_size_x = %i, local_size_y = %i, local_size_z = %i) in;
2169 , shd->lg_size[0], shd->lg_size[1], shd->lg_size[2]);
2170 GLSLC(0, );
2171
2172 return 0;
2173 }
2174
2175 void ff_vk_shader_print(void *ctx, FFVulkanShader *shd, int prio)
2176 {
2177 int line = 0;
2178 const char *p = shd->src.str;
2179 const char *start = p;
2180 const size_t len = strlen(p);
2181
2182 AVBPrint buf;
2183 av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
2184
2185 for (int i = 0; i < len; i++) {
2186 if (p[i] == '\n') {
2187 av_bprintf(&buf, "%i\t", ++line);
2188 av_bprint_append_data(&buf, start, &p[i] - start + 1);
2189 start = &p[i + 1];
2190 }
2191 }
2192
2193 av_log(ctx, prio, "Shader %s: \n%s", shd->name, buf.str);
2194 av_bprint_finalize(&buf, NULL);
2195 }
2196
2197 static int init_pipeline_layout(FFVulkanContext *s, FFVulkanShader *shd)
2198 {
2199 VkResult ret;
2200 FFVulkanFunctions *vk = &s->vkfn;
2201 VkPipelineLayoutCreateInfo pipeline_layout_info;
2202
2203 /* Finally create the pipeline layout */
2204 pipeline_layout_info = (VkPipelineLayoutCreateInfo) {
2205 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
2206 .pSetLayouts = shd->desc_layout,
2207 .setLayoutCount = shd->nb_descriptor_sets,
2208 .pushConstantRangeCount = shd->push_consts_num,
2209 .pPushConstantRanges = shd->push_consts,
2210 };
2211
2212 ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &pipeline_layout_info,
2213 s->hwctx->alloc, &shd->pipeline_layout);
2214 if (ret != VK_SUCCESS) {
2215 av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n",
2216 ff_vk_ret2str(ret));
2217 return AVERROR_EXTERNAL;
2218 }
2219
2220 return 0;
2221 }
2222
2223 static int create_shader_module(FFVulkanContext *s, FFVulkanShader *shd,
2224 VkShaderModule *mod,
2225 uint8_t *spirv, size_t spirv_len)
2226 {
2227 VkResult ret;
2228 FFVulkanFunctions *vk = &s->vkfn;
2229
2230 VkShaderModuleCreateInfo shader_module_info = {
2231 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
2232 .pNext = NULL,
2233 .flags = 0x0,
2234 .pCode = (void *)spirv,
2235 .codeSize = spirv_len,
2236 };
2237
2238 ret = vk->CreateShaderModule(s->hwctx->act_dev, &shader_module_info,
2239 s->hwctx->alloc, mod);
2240 if (ret != VK_SUCCESS) {
2241 av_log(s, AV_LOG_ERROR, "Error creating shader module: %s\n",
2242 ff_vk_ret2str(ret));
2243 return AVERROR_EXTERNAL;
2244 }
2245
2246 return 0;
2247 }
2248
2249 static int init_compute_pipeline(FFVulkanContext *s, FFVulkanShader *shd,
2250 VkShaderModule mod, const char *entrypoint)
2251 {
2252 VkResult ret;
2253 FFVulkanFunctions *vk = &s->vkfn;
2254
2255 VkComputePipelineCreateInfo pipeline_create_info = {
2256 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
2257 .flags = (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) ?
2258 VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT : 0x0,
2259 .layout = shd->pipeline_layout,
2260 .stage = (VkPipelineShaderStageCreateInfo) {
2261 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
2262 .pNext = shd->subgroup_info.requiredSubgroupSize ?
2263 &shd->subgroup_info : NULL,
2264 .pName = entrypoint,
2265 .flags = shd->subgroup_info.requiredSubgroupSize ?
2266 VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT : 0x0,
2267 .stage = shd->stage,
2268 .module = mod,
2269 },
2270 };
2271
2272 ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1,
2273 &pipeline_create_info,
2274 s->hwctx->alloc, &shd->pipeline);
2275 if (ret != VK_SUCCESS) {
2276 av_log(s, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n",
2277 ff_vk_ret2str(ret));
2278 return AVERROR_EXTERNAL;
2279 }
2280
2281 return 0;
2282 }
2283
2284 static int create_shader_object(FFVulkanContext *s, FFVulkanShader *shd,
2285 uint8_t *spirv, size_t spirv_len,
2286 const char *entrypoint)
2287 {
2288 VkResult ret;
2289 FFVulkanFunctions *vk = &s->vkfn;
2290 size_t shader_size = 0;
2291
2292 VkShaderCreateInfoEXT shader_obj_create = {
2293 .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT,
2294 .flags = shd->subgroup_info.requiredSubgroupSize ?
2295 VK_SHADER_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT : 0x0,
2296 .stage = shd->stage,
2297 .nextStage = 0,
2298 .codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT,
2299 .pCode = spirv,
2300 .codeSize = spirv_len,
2301 .pName = entrypoint,
2302 .pSetLayouts = shd->desc_layout,
2303 .setLayoutCount = shd->nb_descriptor_sets,
2304 .pushConstantRangeCount = shd->push_consts_num,
2305 .pPushConstantRanges = shd->push_consts,
2306 .pSpecializationInfo = NULL,
2307 };
2308
2309 ret = vk->CreateShadersEXT(s->hwctx->act_dev, 1, &shader_obj_create,
2310 s->hwctx->alloc, &shd->object);
2311 if (ret != VK_SUCCESS) {
2312 av_log(s, AV_LOG_ERROR, "Unable to create shader object: %s\n",
2313 ff_vk_ret2str(ret));
2314 return AVERROR_EXTERNAL;
2315 }
2316
2317 if (vk->GetShaderBinaryDataEXT(s->hwctx->act_dev, shd->object,
2318 &shader_size, NULL) == VK_SUCCESS)
2319 av_log(s, AV_LOG_VERBOSE, "Shader %s size: %zu binary (%zu SPIR-V)\n",
2320 shd->name, shader_size, spirv_len);
2321
2322 return 0;
2323 }
2324
2325 static int init_descriptors(FFVulkanContext *s, FFVulkanShader *shd)
2326 {
2327 VkResult ret;
2328 FFVulkanFunctions *vk = &s->vkfn;
2329
2330 shd->desc_layout = av_malloc_array(shd->nb_descriptor_sets,
2331 sizeof(*shd->desc_layout));
2332 if (!shd->desc_layout)
2333 return AVERROR(ENOMEM);
2334
2335 if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER)) {
2336 int has_singular = 0;
2337 int max_descriptors = 0;
2338 for (int i = 0; i < shd->nb_descriptor_sets; i++) {
2339 max_descriptors = FFMAX(max_descriptors, shd->desc_set[i].nb_bindings);
2340 if (shd->desc_set[i].singular)
2341 has_singular = 1;
2342 }
2343 shd->use_push = (s->extensions & FF_VK_EXT_PUSH_DESCRIPTOR) &&
2344 (max_descriptors <= s->push_desc_props.maxPushDescriptors) &&
2345 (shd->nb_descriptor_sets == 1) &&
2346 (has_singular == 0);
2347 }
2348
2349 for (int i = 0; i < shd->nb_descriptor_sets; i++) {
2350 FFVulkanDescriptorSet *set = &shd->desc_set[i];
2351 VkDescriptorSetLayoutCreateInfo desc_layout_create = {
2352 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
2353 .bindingCount = set->nb_bindings,
2354 .pBindings = set->binding,
2355 .flags = (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) ?
2356 VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT :
2357 (shd->use_push) ?
2358 VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR :
2359 0x0,
2360 };
2361
2362 ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev,
2363 &desc_layout_create,
2364 s->hwctx->alloc,
2365 &shd->desc_layout[i]);
2366 if (ret != VK_SUCCESS) {
2367 av_log(s, AV_LOG_ERROR, "Unable to create descriptor set layout: %s",
2368 ff_vk_ret2str(ret));
2369 return AVERROR_EXTERNAL;
2370 }
2371
2372 if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) {
2373 vk->GetDescriptorSetLayoutSizeEXT(s->hwctx->act_dev, shd->desc_layout[i],
2374 &set->layout_size);
2375
2376 set->aligned_size = FFALIGN(set->layout_size,
2377 s->desc_buf_props.descriptorBufferOffsetAlignment);
2378
2379 for (int j = 0; j < set->nb_bindings; j++)
2380 vk->GetDescriptorSetLayoutBindingOffsetEXT(s->hwctx->act_dev,
2381 shd->desc_layout[i],
2382 j,
2383 &set->binding_offset[j]);
2384 }
2385 }
2386
2387 return 0;
2388 }
2389
2390 int ff_vk_shader_link(FFVulkanContext *s, FFVulkanShader *shd,
2391 uint8_t *spirv, size_t spirv_len,
2392 const char *entrypoint)
2393 {
2394 int err;
2395 FFVulkanFunctions *vk = &s->vkfn;
2396
2397 err = init_descriptors(s, shd);
2398 if (err < 0)
2399 return err;
2400
2401 err = init_pipeline_layout(s, shd);
2402 if (err < 0)
2403 return err;
2404
2405 if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) {
2406 shd->bound_buffer_indices = av_calloc(shd->nb_descriptor_sets,
2407 sizeof(*shd->bound_buffer_indices));
2408 if (!shd->bound_buffer_indices)
2409 return AVERROR(ENOMEM);
2410
2411 for (int i = 0; i < shd->nb_descriptor_sets; i++)
2412 shd->bound_buffer_indices[i] = i;
2413 }
2414
2415 if (s->extensions & FF_VK_EXT_SHADER_OBJECT) {
2416 err = create_shader_object(s, shd, spirv, spirv_len, entrypoint);
2417 if (err < 0)
2418 return err;
2419 } else {
2420 VkShaderModule mod;
2421 err = create_shader_module(s, shd, &mod, spirv, spirv_len);
2422 if (err < 0)
2423 return err;
2424
2425 switch (shd->bind_point) {
2426 case VK_PIPELINE_BIND_POINT_COMPUTE:
2427 err = init_compute_pipeline(s, shd, mod, entrypoint);
2428 break;
2429 default:
2430 av_log(s, AV_LOG_ERROR, "Unsupported shader type: %i\n",
2431 shd->bind_point);
2432 err = AVERROR(EINVAL);
2433 break;
2434 };
2435
2436 vk->DestroyShaderModule(s->hwctx->act_dev, mod, s->hwctx->alloc);
2437 if (err < 0)
2438 return err;
2439 }
2440
2441 return 0;
2442 }
2443
2444 static const struct descriptor_props {
2445 size_t struct_size; /* Size of the opaque which updates the descriptor */
2446 const char *type;
2447 int is_uniform;
2448 int mem_quali; /* Can use a memory qualifier */
2449 int dim_needed; /* Must indicate dimension */
2450 int buf_content; /* Must indicate buffer contents */
2451 } descriptor_props[] = {
2452 [VK_DESCRIPTOR_TYPE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 0, 0, },
2453 [VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE] = { sizeof(VkDescriptorImageInfo), "texture", 1, 0, 1, 0, },
2454 [VK_DESCRIPTOR_TYPE_STORAGE_IMAGE] = { sizeof(VkDescriptorImageInfo), "image", 1, 1, 1, 0, },
2455 [VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT] = { sizeof(VkDescriptorImageInfo), "subpassInput", 1, 0, 0, 0, },
2456 [VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 1, 0, },
2457 [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, },
2458 [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, },
2459 [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, },
2460 [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, },
2461 [VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER] = { sizeof(VkBufferView), "samplerBuffer", 1, 0, 0, 0, },
2462 [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, },
2463 };
2464
2465 int ff_vk_shader_add_descriptor_set(FFVulkanContext *s, FFVulkanShader *shd,
2466 FFVulkanDescriptorSetBinding *desc, int nb,
2467 int singular, int print_to_shader_only)
2468 {
2469 int has_sampler = 0;
2470 FFVulkanDescriptorSet *set;
2471
2472 if (print_to_shader_only)
2473 goto print;
2474
2475 /* Actual layout allocated for the pipeline */
2476 set = av_realloc_array(shd->desc_set,
2477 sizeof(*shd->desc_set),
2478 shd->nb_descriptor_sets + 1);
2479 if (!set)
2480 return AVERROR(ENOMEM);
2481 shd->desc_set = set;
2482
2483 set = &set[shd->nb_descriptor_sets];
2484 memset(set, 0, sizeof(*set));
2485
2486 set->binding = av_calloc(nb, sizeof(*set->binding));
2487 if (!set->binding)
2488 return AVERROR(ENOMEM);
2489
2490 set->binding_offset = av_calloc(nb, sizeof(*set->binding_offset));
2491 if (!set->binding_offset) {
2492 av_freep(&set->binding);
2493 return AVERROR(ENOMEM);
2494 }
2495
2496 for (int i = 0; i < nb; i++) {
2497 set->binding[i].binding = i;
2498 set->binding[i].descriptorType = desc[i].type;
2499 set->binding[i].descriptorCount = FFMAX(desc[i].elems, 1);
2500 set->binding[i].stageFlags = desc[i].stages;
2501 set->binding[i].pImmutableSamplers = desc[i].samplers;
2502
2503 if (desc[i].type == VK_DESCRIPTOR_TYPE_SAMPLER ||
2504 desc[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
2505 has_sampler |= 1;
2506 }
2507
2508 set->usage = VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT |
2509 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
2510 if (has_sampler)
2511 set->usage |= VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT;
2512
2513 if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER)) {
2514 for (int i = 0; i < nb; i++) {
2515 int j;
2516 VkDescriptorPoolSize *desc_pool_size;
2517 for (j = 0; j < shd->nb_desc_pool_size; j++)
2518 if (shd->desc_pool_size[j].type == desc[i].type)
2519 break;
2520 if (j >= shd->nb_desc_pool_size) {
2521 desc_pool_size = av_realloc_array(shd->desc_pool_size,
2522 sizeof(*desc_pool_size),
2523 shd->nb_desc_pool_size + 1);
2524 if (!desc_pool_size)
2525 return AVERROR(ENOMEM);
2526
2527 shd->desc_pool_size = desc_pool_size;
2528 shd->nb_desc_pool_size++;
2529 memset(&desc_pool_size[j], 0, sizeof(VkDescriptorPoolSize));
2530 }
2531 shd->desc_pool_size[j].type = desc[i].type;
2532 shd->desc_pool_size[j].descriptorCount += FFMAX(desc[i].elems, 1);
2533 }
2534 }
2535
2536 set->singular = singular;
2537 set->nb_bindings = nb;
2538 shd->nb_descriptor_sets++;
2539
2540 print:
2541 /* Write shader info */
2542 for (int i = 0; i < nb; i++) {
2543 const struct descriptor_props *prop = &descriptor_props[desc[i].type];
2544 GLSLA("layout (set = %i, binding = %i", FFMAX(shd->nb_descriptor_sets - 1, 0), i);
2545
2546 if (desc[i].mem_layout &&
2547 (desc[i].type != VK_DESCRIPTOR_TYPE_STORAGE_IMAGE))
2548 GLSLA(", %s", desc[i].mem_layout);
2549
2550 GLSLA(")");
2551
2552 if (prop->is_uniform)
2553 GLSLA(" uniform");
2554
2555 if (prop->mem_quali && desc[i].mem_quali)
2556 GLSLA(" %s", desc[i].mem_quali);
2557
2558 if (prop->type) {
2559 GLSLA(" ");
2560 if (desc[i].type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
2561 if (desc[i].mem_layout) {
2562 int len = strlen(desc[i].mem_layout);
2563 if (desc[i].mem_layout[len - 1] == 'i' &&
2564 desc[i].mem_layout[len - 2] == 'u') {
2565 GLSLA("u");
2566 } else if (desc[i].mem_layout[len - 1] == 'i') {
2567 GLSLA("i");
2568 }
2569 }
2570 }
2571 GLSLA("%s", prop->type);
2572 }
2573
2574 if (prop->dim_needed)
2575 GLSLA("%iD", desc[i].dimensions);
2576
2577 GLSLA(" %s", desc[i].name);
2578
2579 if (prop->buf_content) {
2580 GLSLA(" {\n ");
2581 if (desc[i].buf_elems) {
2582 GLSLA("%s", desc[i].buf_content);
2583 GLSLA("[%i];", desc[i].buf_elems);
2584 } else {
2585 GLSLA("%s", desc[i].buf_content);
2586 }
2587 GLSLA("\n}");
2588 }
2589
2590 if (desc[i].elems > 0)
2591 GLSLA("[%i]", desc[i].elems);
2592
2593 GLSLA(";");
2594 GLSLA("\n");
2595 }
2596 GLSLA("\n");
2597
2598 return 0;
2599 }
2600
2601 int ff_vk_shader_register_exec(FFVulkanContext *s, FFVkExecPool *pool,
2602 FFVulkanShader *shd)
2603 {
2604 int err;
2605 FFVulkanShaderData *sd;
2606
2607 if (!shd->nb_descriptor_sets)
2608 return 0;
2609
2610 sd = av_realloc_array(pool->reg_shd,
2611 sizeof(*pool->reg_shd),
2612 pool->nb_reg_shd + 1);
2613 if (!sd)
2614 return AVERROR(ENOMEM);
2615
2616 pool->reg_shd = sd;
2617 sd = &sd[pool->nb_reg_shd++];
2618 memset(sd, 0, sizeof(*sd));
2619
2620 sd->shd = shd;
2621 sd->nb_descriptor_sets = shd->nb_descriptor_sets;
2622
2623 if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) {
2624 sd->desc_bind = av_malloc_array(sd->nb_descriptor_sets, sizeof(*sd->desc_bind));
2625 if (!sd->desc_bind)
2626 return AVERROR(ENOMEM);
2627
2628 sd->desc_set_buf = av_calloc(sd->nb_descriptor_sets, sizeof(*sd->desc_set_buf));
2629 if (!sd->desc_set_buf)
2630 return AVERROR(ENOMEM);
2631
2632 for (int i = 0; i < sd->nb_descriptor_sets; i++) {
2633 FFVulkanDescriptorSet *set = &shd->desc_set[i];
2634 FFVulkanDescriptorSetData *sdb = &sd->desc_set_buf[i];
2635 int nb = set->singular ? 1 : pool->pool_size;
2636
2637 err = ff_vk_create_buf(s, &sdb->buf,
2638 set->aligned_size*nb,
2639 NULL, NULL, set->usage,
2640 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
2641 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
2642 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
2643 if (err < 0)
2644 return err;
2645
2646 err = ff_vk_map_buffer(s, &sdb->buf, &sdb->desc_mem, 0);
2647 if (err < 0)
2648 return err;
2649
2650 sd->desc_bind[i] = (VkDescriptorBufferBindingInfoEXT) {
2651 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT,
2652 .usage = set->usage,
2653 .address = sdb->buf.address,
2654 };
2655 }
2656 } else if (!shd->use_push) {
2657 VkResult ret;
2658 FFVulkanFunctions *vk = &s->vkfn;
2659 VkDescriptorSetLayout *tmp_layouts;
2660 VkDescriptorSetAllocateInfo set_alloc_info;
2661 VkDescriptorPoolCreateInfo pool_create_info;
2662
2663 for (int i = 0; i < shd->nb_desc_pool_size; i++)
2664 shd->desc_pool_size[i].descriptorCount *= pool->pool_size;
2665
2666 pool_create_info = (VkDescriptorPoolCreateInfo) {
2667 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
2668 .flags = 0,
2669 .pPoolSizes = shd->desc_pool_size,
2670 .poolSizeCount = shd->nb_desc_pool_size,
2671 .maxSets = sd->nb_descriptor_sets*pool->pool_size,
2672 };
2673
2674 ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info,
2675 s->hwctx->alloc, &sd->desc_pool);
2676 if (ret != VK_SUCCESS) {
2677 av_log(s, AV_LOG_ERROR, "Unable to create descriptor pool: %s\n",
2678 ff_vk_ret2str(ret));
2679 return AVERROR_EXTERNAL;
2680 }
2681
2682 tmp_layouts = av_malloc_array(pool_create_info.maxSets, sizeof(*tmp_layouts));
2683 if (!tmp_layouts)
2684 return AVERROR(ENOMEM);
2685
2686 /* Colate each execution context's descriptor set layouts */
2687 for (int i = 0; i < pool->pool_size; i++)
2688 for (int j = 0; j < sd->nb_descriptor_sets; j++)
2689 tmp_layouts[i*sd->nb_descriptor_sets + j] = shd->desc_layout[j];
2690
2691 set_alloc_info = (VkDescriptorSetAllocateInfo) {
2692 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
2693 .descriptorPool = sd->desc_pool,
2694 .pSetLayouts = tmp_layouts,
2695 .descriptorSetCount = pool_create_info.maxSets,
2696 };
2697
2698 sd->desc_sets = av_malloc_array(pool_create_info.maxSets,
2699 sizeof(*tmp_layouts));
2700 if (!sd->desc_sets) {
2701 av_free(tmp_layouts);
2702 return AVERROR(ENOMEM);
2703 }
2704 ret = vk->AllocateDescriptorSets(s->hwctx->act_dev, &set_alloc_info,
2705 sd->desc_sets);
2706 av_free(tmp_layouts);
2707 if (ret != VK_SUCCESS) {
2708 av_log(s, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n",
2709 ff_vk_ret2str(ret));
2710 av_freep(&sd->desc_sets);
2711 return AVERROR_EXTERNAL;
2712 }
2713 }
2714
2715 return 0;
2716 }
2717
2718 static inline FFVulkanShaderData *get_shd_data(FFVkExecContext *e,
2719 FFVulkanShader *shd)
2720 {
2721 for (int i = 0; i < e->parent->nb_reg_shd; i++)
2722 if (e->parent->reg_shd[i].shd == shd)
2723 return &e->parent->reg_shd[i];
2724 return NULL;
2725 }
2726
2727 static inline void update_set_descriptor(FFVulkanContext *s, FFVkExecContext *e,
2728 FFVulkanShader *shd, int set,
2729 int bind_idx, int array_idx,
2730 VkDescriptorGetInfoEXT *desc_get_info,
2731 size_t desc_size)
2732 {
2733 FFVulkanFunctions *vk = &s->vkfn;
2734 FFVulkanDescriptorSet *desc_set = &shd->desc_set[set];
2735 FFVulkanShaderData *sd = get_shd_data(e, shd);
2736 const size_t exec_offset = desc_set->singular ? 0 : desc_set->aligned_size*e->idx;
2737
2738 void *desc = sd->desc_set_buf[set].desc_mem + /* Base */
2739 exec_offset + /* Execution context */
2740 desc_set->binding_offset[bind_idx] + /* Descriptor binding */
2741 array_idx*desc_size; /* Array position */
2742
2743 vk->GetDescriptorEXT(s->hwctx->act_dev, desc_get_info, desc_size, desc);
2744 }
2745
2746 static inline void update_set_pool_write(FFVulkanContext *s, FFVkExecContext *e,
2747 FFVulkanShader *shd, int set,
2748 VkWriteDescriptorSet *write_info)
2749 {
2750 FFVulkanFunctions *vk = &s->vkfn;
2751 FFVulkanDescriptorSet *desc_set = &shd->desc_set[set];
2752 FFVulkanShaderData *sd = get_shd_data(e, shd);
2753
2754 if (desc_set->singular) {
2755 for (int i = 0; i < e->parent->pool_size; i++) {
2756 write_info->dstSet = sd->desc_sets[i*sd->nb_descriptor_sets + set];
2757 vk->UpdateDescriptorSets(s->hwctx->act_dev, 1, write_info, 0, NULL);
2758 }
2759 } else {
2760 if (shd->use_push) {
2761 vk->CmdPushDescriptorSetKHR(e->buf,
2762 shd->bind_point,
2763 shd->pipeline_layout,
2764 set, 1,
2765 write_info);
2766 } else {
2767 write_info->dstSet = sd->desc_sets[e->idx*sd->nb_descriptor_sets + set];
2768 vk->UpdateDescriptorSets(s->hwctx->act_dev, 1, write_info, 0, NULL);
2769 }
2770 }
2771 }
2772
2773 int ff_vk_shader_update_img(FFVulkanContext *s, FFVkExecContext *e,
2774 FFVulkanShader *shd, int set, int bind, int offs,
2775 VkImageView view, VkImageLayout layout,
2776 VkSampler sampler)
2777 {
2778 FFVulkanDescriptorSet *desc_set = &shd->desc_set[set];
2779
2780 if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) {
2781 VkDescriptorGetInfoEXT desc_get_info = {
2782 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT,
2783 .type = desc_set->binding[bind].descriptorType,
2784 };
2785 VkDescriptorImageInfo desc_img_info = {
2786 .imageView = view,
2787 .sampler = sampler,
2788 .imageLayout = layout,
2789 };
2790 size_t desc_size;
2791
2792 switch (desc_get_info.type) {
2793 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
2794 desc_get_info.data.pSampledImage = &desc_img_info;
2795 desc_size = s->desc_buf_props.sampledImageDescriptorSize;
2796 break;
2797 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
2798 desc_get_info.data.pStorageImage = &desc_img_info;
2799 desc_size = s->desc_buf_props.storageImageDescriptorSize;
2800 break;
2801 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
2802 desc_get_info.data.pInputAttachmentImage = &desc_img_info;
2803 desc_size = s->desc_buf_props.inputAttachmentDescriptorSize;
2804 break;
2805 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
2806 desc_get_info.data.pCombinedImageSampler = &desc_img_info;
2807 desc_size = s->desc_buf_props.combinedImageSamplerDescriptorSize;
2808 break;
2809 default:
2810 av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n",
2811 set, bind, desc_get_info.type);
2812 return AVERROR(EINVAL);
2813 break;
2814 };
2815
2816 update_set_descriptor(s, e, shd, set, bind, offs,
2817 &desc_get_info, desc_size);
2818 } else {
2819 VkDescriptorImageInfo desc_pool_write_info_img = {
2820 .sampler = sampler,
2821 .imageView = view,
2822 .imageLayout = layout,
2823 };
2824 VkWriteDescriptorSet desc_pool_write_info = {
2825 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2826 .dstBinding = bind,
2827 .descriptorCount = 1,
2828 .dstArrayElement = offs,
2829 .descriptorType = desc_set->binding[bind].descriptorType,
2830 .pImageInfo = &desc_pool_write_info_img,
2831 };
2832 update_set_pool_write(s, e, shd, set, &desc_pool_write_info);
2833 }
2834
2835 return 0;
2836 }
2837
2838 void ff_vk_shader_update_img_array(FFVulkanContext *s, FFVkExecContext *e,
2839 FFVulkanShader *shd, AVFrame *f,
2840 VkImageView *views, int set, int binding,
2841 VkImageLayout layout, VkSampler sampler)
2842 {
2843 AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
2844 const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format);
2845
2846 for (int i = 0; i < nb_planes; i++)
2847 ff_vk_shader_update_img(s, e, shd, set, binding, i,
2848 views[i], layout, sampler);
2849 }
2850
2851 int ff_vk_shader_update_desc_buffer(FFVulkanContext *s, FFVkExecContext *e,
2852 FFVulkanShader *shd,
2853 int set, int bind, int elem,
2854 FFVkBuffer *buf, VkDeviceSize offset, VkDeviceSize len,
2855 VkFormat fmt)
2856 {
2857 FFVulkanDescriptorSet *desc_set = &shd->desc_set[set];
2858
2859 if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) {
2860 VkDescriptorGetInfoEXT desc_get_info = {
2861 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT,
2862 .type = desc_set->binding[bind].descriptorType,
2863 };
2864 VkDescriptorAddressInfoEXT desc_buf_info = {
2865 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT,
2866 .address = buf->address + offset,
2867 .range = len,
2868 .format = fmt,
2869 };
2870 size_t desc_size;
2871
2872 switch (desc_get_info.type) {
2873 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
2874 desc_get_info.data.pUniformBuffer = &desc_buf_info;
2875 desc_size = s->desc_buf_props.uniformBufferDescriptorSize;
2876 break;
2877 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
2878 desc_get_info.data.pStorageBuffer = &desc_buf_info;
2879 desc_size = s->desc_buf_props.storageBufferDescriptorSize;
2880 break;
2881 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
2882 desc_get_info.data.pUniformTexelBuffer = &desc_buf_info;
2883 desc_size = s->desc_buf_props.uniformTexelBufferDescriptorSize;
2884 break;
2885 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
2886 desc_get_info.data.pStorageTexelBuffer = &desc_buf_info;
2887 desc_size = s->desc_buf_props.storageTexelBufferDescriptorSize;
2888 break;
2889 default:
2890 av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n",
2891 set, bind, desc_get_info.type);
2892 return AVERROR(EINVAL);
2893 break;
2894 };
2895
2896 update_set_descriptor(s, e, shd, set, bind, elem, &desc_get_info, desc_size);
2897 } else {
2898 VkDescriptorBufferInfo desc_pool_write_info_buf = {
2899 .buffer = buf->buf,
2900 .offset = buf->virtual_offset + offset,
2901 .range = len,
2902 };
2903 VkWriteDescriptorSet desc_pool_write_info = {
2904 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2905 .dstBinding = bind,
2906 .descriptorCount = 1,
2907 .dstArrayElement = elem,
2908 .descriptorType = desc_set->binding[bind].descriptorType,
2909 .pBufferInfo = &desc_pool_write_info_buf,
2910 };
2911 update_set_pool_write(s, e, shd, set, &desc_pool_write_info);
2912 }
2913
2914 return 0;
2915 }
2916
2917 void ff_vk_shader_update_push_const(FFVulkanContext *s, FFVkExecContext *e,
2918 FFVulkanShader *shd,
2919 VkShaderStageFlagBits stage,
2920 int offset, size_t size, void *src)
2921 {
2922 FFVulkanFunctions *vk = &s->vkfn;
2923 vk->CmdPushConstants(e->buf, shd->pipeline_layout,
2924 stage, offset, size, src);
2925 }
2926
2927 void ff_vk_exec_bind_shader(FFVulkanContext *s, FFVkExecContext *e,
2928 FFVulkanShader *shd)
2929 {
2930 FFVulkanFunctions *vk = &s->vkfn;
2931 VkDeviceSize offsets[1024];
2932 FFVulkanShaderData *sd = get_shd_data(e, shd);
2933
2934 if (s->extensions & FF_VK_EXT_SHADER_OBJECT) {
2935 VkShaderStageFlagBits stages = shd->stage;
2936 vk->CmdBindShadersEXT(e->buf, 1, &stages, &shd->object);
2937 } else {
2938 vk->CmdBindPipeline(e->buf, shd->bind_point, shd->pipeline);
2939 }
2940
2941 if (sd && sd->nb_descriptor_sets) {
2942 if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) {
2943 for (int i = 0; i < sd->nb_descriptor_sets; i++)
2944 offsets[i] = shd->desc_set[i].singular ? 0 : shd->desc_set[i].aligned_size*e->idx;
2945
2946 /* Bind descriptor buffers */
2947 vk->CmdBindDescriptorBuffersEXT(e->buf, sd->nb_descriptor_sets, sd->desc_bind);
2948 /* Binding offsets */
2949 vk->CmdSetDescriptorBufferOffsetsEXT(e->buf, shd->bind_point, shd->pipeline_layout,
2950 0, sd->nb_descriptor_sets,
2951 shd->bound_buffer_indices, offsets);
2952 } else if (!shd->use_push) {
2953 vk->CmdBindDescriptorSets(e->buf, shd->bind_point, shd->pipeline_layout,
2954 0, sd->nb_descriptor_sets,
2955 &sd->desc_sets[e->idx*sd->nb_descriptor_sets],
2956 0, NULL);
2957 }
2958 }
2959 }
2960
2961 void ff_vk_shader_free(FFVulkanContext *s, FFVulkanShader *shd)
2962 {
2963 FFVulkanFunctions *vk = &s->vkfn;
2964
2965 av_bprint_finalize(&shd->src, NULL);
2966
2967 #if 0
2968 if (shd->shader.module)
2969 vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module,
2970 s->hwctx->alloc);
2971 #endif
2972
2973 if (shd->object)
2974 vk->DestroyShaderEXT(s->hwctx->act_dev, shd->object, s->hwctx->alloc);
2975 if (shd->pipeline)
2976 vk->DestroyPipeline(s->hwctx->act_dev, shd->pipeline, s->hwctx->alloc);
2977 if (shd->pipeline_layout)
2978 vk->DestroyPipelineLayout(s->hwctx->act_dev, shd->pipeline_layout,
2979 s->hwctx->alloc);
2980
2981 for (int i = 0; i < shd->nb_descriptor_sets; i++) {
2982 FFVulkanDescriptorSet *set = &shd->desc_set[i];
2983 av_free(set->binding);
2984 av_free(set->binding_offset);
2985 }
2986
2987 if (shd->desc_layout) {
2988 for (int i = 0; i < shd->nb_descriptor_sets; i++)
2989 if (shd->desc_layout[i])
2990 vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, shd->desc_layout[i],
2991 s->hwctx->alloc);
2992 }
2993
2994 av_freep(&shd->desc_pool_size);
2995 av_freep(&shd->desc_layout);
2996 av_freep(&shd->desc_set);
2997 av_freep(&shd->bound_buffer_indices);
2998 av_freep(&shd->push_consts);
2999 shd->push_consts_num = 0;
3000 }
3001
3002 void ff_vk_uninit(FFVulkanContext *s)
3003 {
3004 av_freep(&s->query_props);
3005 av_freep(&s->qf_props);
3006 av_freep(&s->video_props);
3007 av_freep(&s->coop_mat_props);
3008 av_freep(&s->host_image_copy_layouts);
3009
3010 av_buffer_unref(&s->device_ref);
3011 av_buffer_unref(&s->frames_ref);
3012 }
3013
3014 int ff_vk_init(FFVulkanContext *s, void *log_parent,
3015 AVBufferRef *device_ref, AVBufferRef *frames_ref)
3016 {
3017 int err;
3018
3019 static const AVClass vulkan_context_class = {
3020 .class_name = "vk",
3021 .version = LIBAVUTIL_VERSION_INT,
3022 .parent_log_context_offset = offsetof(FFVulkanContext, log_parent),
3023 };
3024
3025 memset(s, 0, sizeof(*s));
3026 s->log_parent = log_parent;
3027 s->class = &vulkan_context_class;
3028
3029 if (frames_ref) {
3030 s->frames_ref = av_buffer_ref(frames_ref);
3031 if (!s->frames_ref)
3032 return AVERROR(ENOMEM);
3033
3034 s->frames = (AVHWFramesContext *)s->frames_ref->data;
3035 s->hwfc = s->frames->hwctx;
3036
3037 device_ref = s->frames->device_ref;
3038 }
3039
3040 s->device_ref = av_buffer_ref(device_ref);
3041 if (!s->device_ref) {
3042 ff_vk_uninit(s);
3043 return AVERROR(ENOMEM);
3044 }
3045
3046 s->device = (AVHWDeviceContext *)s->device_ref->data;
3047 s->hwctx = s->device->hwctx;
3048
3049 s->extensions = ff_vk_extensions_to_mask(s->hwctx->enabled_dev_extensions,
3050 s->hwctx->nb_enabled_dev_extensions);
3051 s->extensions |= ff_vk_extensions_to_mask(s->hwctx->enabled_inst_extensions,
3052 s->hwctx->nb_enabled_inst_extensions);
3053
3054 err = ff_vk_load_functions(s->device, &s->vkfn, s->extensions, 1, 1);
3055 if (err < 0) {
3056 ff_vk_uninit(s);
3057 return err;
3058 }
3059
3060 err = ff_vk_load_props(s);
3061 if (err < 0) {
3062 ff_vk_uninit(s);
3063 return err;
3064 }
3065
3066 return 0;
3067 }