4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "libavutil/vulkan_loader.h"
27 const VkComponentMapping ff_comp_identity_map
= {
28 .r
= VK_COMPONENT_SWIZZLE_IDENTITY
,
29 .g
= VK_COMPONENT_SWIZZLE_IDENTITY
,
30 .b
= VK_COMPONENT_SWIZZLE_IDENTITY
,
31 .a
= VK_COMPONENT_SWIZZLE_IDENTITY
,
34 /* Converts return values to strings */
35 const char *ff_vk_ret2str(VkResult res
)
37 #define CASE(VAL) case VAL: return #VAL
45 CASE(VK_ERROR_OUT_OF_HOST_MEMORY
);
46 CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY
);
47 CASE(VK_ERROR_INITIALIZATION_FAILED
);
48 CASE(VK_ERROR_DEVICE_LOST
);
49 CASE(VK_ERROR_MEMORY_MAP_FAILED
);
50 CASE(VK_ERROR_LAYER_NOT_PRESENT
);
51 CASE(VK_ERROR_EXTENSION_NOT_PRESENT
);
52 CASE(VK_ERROR_FEATURE_NOT_PRESENT
);
53 CASE(VK_ERROR_INCOMPATIBLE_DRIVER
);
54 CASE(VK_ERROR_TOO_MANY_OBJECTS
);
55 CASE(VK_ERROR_FORMAT_NOT_SUPPORTED
);
56 CASE(VK_ERROR_FRAGMENTED_POOL
);
57 CASE(VK_ERROR_UNKNOWN
);
58 CASE(VK_ERROR_OUT_OF_POOL_MEMORY
);
59 CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE
);
60 CASE(VK_ERROR_FRAGMENTATION
);
61 CASE(VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS
);
62 CASE(VK_PIPELINE_COMPILE_REQUIRED
);
63 CASE(VK_ERROR_SURFACE_LOST_KHR
);
64 CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR
);
65 CASE(VK_SUBOPTIMAL_KHR
);
66 CASE(VK_ERROR_OUT_OF_DATE_KHR
);
67 CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR
);
68 CASE(VK_ERROR_VALIDATION_FAILED_EXT
);
69 CASE(VK_ERROR_INVALID_SHADER_NV
);
70 CASE(VK_ERROR_VIDEO_PICTURE_LAYOUT_NOT_SUPPORTED_KHR
);
71 CASE(VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR
);
72 CASE(VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR
);
73 CASE(VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR
);
74 CASE(VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR
);
75 CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT
);
76 CASE(VK_ERROR_NOT_PERMITTED_KHR
);
77 CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT
);
78 CASE(VK_THREAD_IDLE_KHR
);
79 CASE(VK_THREAD_DONE_KHR
);
80 CASE(VK_OPERATION_DEFERRED_KHR
);
81 CASE(VK_OPERATION_NOT_DEFERRED_KHR
);
82 default: return "Unknown error";
87 /* Malitia pura, Khronos */
88 #define FN_MAP_TO(dst_t, dst_name, src_t, src_name) \
89 dst_t ff_vk_map_ ##src_name## _to_ ##dst_name(src_t src) \
92 MAP_TO(VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT, \
93 VK_IMAGE_USAGE_SAMPLED_BIT); \
94 MAP_TO(VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT, \
95 VK_IMAGE_USAGE_TRANSFER_SRC_BIT); \
96 MAP_TO(VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT, \
97 VK_IMAGE_USAGE_TRANSFER_DST_BIT); \
98 MAP_TO(VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT, \
99 VK_IMAGE_USAGE_STORAGE_BIT); \
100 MAP_TO(VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT, \
101 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); \
102 MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_DECODE_OUTPUT_BIT_KHR, \
103 VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR); \
104 MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_DECODE_DPB_BIT_KHR, \
105 VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR); \
106 MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_ENCODE_DPB_BIT_KHR, \
107 VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR); \
108 MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_ENCODE_INPUT_BIT_KHR, \
109 VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR); \
110 MAP_TO(VK_FORMAT_FEATURE_2_HOST_IMAGE_TRANSFER_BIT_EXT, \
111 VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT); \
115 #define MAP_TO(flag1, flag2) if (src & flag2) dst |= flag1;
116 FN_MAP_TO(VkFormatFeatureFlagBits2
, feats
, VkImageUsageFlags
, usage
)
118 #define MAP_TO(flag1, flag2) if (src & flag1) dst |= flag2;
119 FN_MAP_TO(VkImageUsageFlags
, usage
, VkFormatFeatureFlagBits2
, feats
)
123 static void load_enabled_qfs(FFVulkanContext
*s
)
126 for (int i
= 0; i
< s
->hwctx
->nb_qf
; i
++) {
127 /* Skip duplicates */
129 for (int j
= 0; j
< s
->nb_qfs
; j
++) {
130 if (s
->qfs
[j
] == s
->hwctx
->qf
[i
].idx
) {
138 s
->qfs
[s
->nb_qfs
++] = s
->hwctx
->qf
[i
].idx
;
142 int ff_vk_load_props(FFVulkanContext
*s
)
144 FFVulkanFunctions
*vk
= &s
->vkfn
;
146 s
->props
= (VkPhysicalDeviceProperties2
) {
147 .sType
= VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2
,
150 FF_VK_STRUCT_EXT(s
, &s
->props
, &s
->props_11
, FF_VK_EXT_NO_FLAG
,
151 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES
);
152 FF_VK_STRUCT_EXT(s
, &s
->props
, &s
->driver_props
, FF_VK_EXT_NO_FLAG
,
153 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES
);
154 FF_VK_STRUCT_EXT(s
, &s
->props
, &s
->subgroup_props
, FF_VK_EXT_NO_FLAG
,
155 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES
);
157 FF_VK_STRUCT_EXT(s
, &s
->props
, &s
->push_desc_props
, FF_VK_EXT_PUSH_DESCRIPTOR
,
158 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR
);
159 FF_VK_STRUCT_EXT(s
, &s
->props
, &s
->hprops
, FF_VK_EXT_EXTERNAL_HOST_MEMORY
,
160 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT
);
161 FF_VK_STRUCT_EXT(s
, &s
->props
, &s
->coop_matrix_props
, FF_VK_EXT_COOP_MATRIX
,
162 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_PROPERTIES_KHR
);
163 FF_VK_STRUCT_EXT(s
, &s
->props
, &s
->desc_buf_props
, FF_VK_EXT_DESCRIPTOR_BUFFER
,
164 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT
);
165 FF_VK_STRUCT_EXT(s
, &s
->props
, &s
->optical_flow_props
, FF_VK_EXT_OPTICAL_FLOW
,
166 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPTICAL_FLOW_PROPERTIES_NV
);
167 FF_VK_STRUCT_EXT(s
, &s
->props
, &s
->host_image_props
, FF_VK_EXT_HOST_IMAGE_COPY
,
168 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_IMAGE_COPY_PROPERTIES_EXT
);
170 s
->feats
= (VkPhysicalDeviceFeatures2
) {
171 .sType
= VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2
,
174 FF_VK_STRUCT_EXT(s
, &s
->feats
, &s
->feats_12
, FF_VK_EXT_NO_FLAG
,
175 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES
);
176 FF_VK_STRUCT_EXT(s
, &s
->feats
, &s
->atomic_float_feats
, FF_VK_EXT_ATOMIC_FLOAT
,
177 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT
);
179 /* Try allocating 1024 layouts */
180 s
->host_image_copy_layouts
= av_malloc(sizeof(*s
->host_image_copy_layouts
)*1024);
181 s
->host_image_props
.pCopySrcLayouts
= s
->host_image_copy_layouts
;
182 s
->host_image_props
.copySrcLayoutCount
= 512;
183 s
->host_image_props
.pCopyDstLayouts
= s
->host_image_copy_layouts
+ 512;
184 s
->host_image_props
.copyDstLayoutCount
= 512;
186 vk
->GetPhysicalDeviceProperties2(s
->hwctx
->phys_dev
, &s
->props
);
188 /* Check if we had enough memory for all layouts */
189 if (s
->host_image_props
.copySrcLayoutCount
== 512 ||
190 s
->host_image_props
.copyDstLayoutCount
== 512) {
191 VkImageLayout
*new_array
;
193 s
->host_image_props
.pCopySrcLayouts
=
194 s
->host_image_props
.pCopyDstLayouts
= NULL
;
195 s
->host_image_props
.copySrcLayoutCount
=
196 s
->host_image_props
.copyDstLayoutCount
= 0;
197 vk
->GetPhysicalDeviceProperties2(s
->hwctx
->phys_dev
, &s
->props
);
199 new_size
= s
->host_image_props
.copySrcLayoutCount
+
200 s
->host_image_props
.copyDstLayoutCount
;
201 new_size
*= sizeof(*s
->host_image_copy_layouts
);
202 new_array
= av_realloc(s
->host_image_copy_layouts
, new_size
);
204 return AVERROR(ENOMEM
);
206 s
->host_image_copy_layouts
= new_array
;
207 s
->host_image_props
.pCopySrcLayouts
= new_array
;
208 s
->host_image_props
.pCopyDstLayouts
= new_array
+ s
->host_image_props
.copySrcLayoutCount
;
209 vk
->GetPhysicalDeviceProperties2(s
->hwctx
->phys_dev
, &s
->props
);
212 vk
->GetPhysicalDeviceMemoryProperties(s
->hwctx
->phys_dev
, &s
->mprops
);
213 vk
->GetPhysicalDeviceFeatures2(s
->hwctx
->phys_dev
, &s
->feats
);
220 vk
->GetPhysicalDeviceQueueFamilyProperties2(s
->hwctx
->phys_dev
, &s
->tot_nb_qfs
, NULL
);
222 s
->qf_props
= av_calloc(s
->tot_nb_qfs
, sizeof(*s
->qf_props
));
224 return AVERROR(ENOMEM
);
226 s
->query_props
= av_calloc(s
->tot_nb_qfs
, sizeof(*s
->query_props
));
228 av_freep(&s
->qf_props
);
229 return AVERROR(ENOMEM
);
232 s
->video_props
= av_calloc(s
->tot_nb_qfs
, sizeof(*s
->video_props
));
233 if (!s
->video_props
) {
234 av_freep(&s
->qf_props
);
235 av_freep(&s
->query_props
);
236 return AVERROR(ENOMEM
);
239 for (uint32_t i
= 0; i
< s
->tot_nb_qfs
; i
++) {
240 s
->qf_props
[i
] = (VkQueueFamilyProperties2
) {
241 .sType
= VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2
,
244 FF_VK_STRUCT_EXT(s
, &s
->qf_props
[i
], &s
->query_props
[i
], FF_VK_EXT_VIDEO_QUEUE
,
245 VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR
);
246 FF_VK_STRUCT_EXT(s
, &s
->qf_props
[i
], &s
->video_props
[i
], FF_VK_EXT_VIDEO_QUEUE
,
247 VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR
);
250 vk
->GetPhysicalDeviceQueueFamilyProperties2(s
->hwctx
->phys_dev
, &s
->tot_nb_qfs
, s
->qf_props
);
252 if (s
->extensions
& FF_VK_EXT_COOP_MATRIX
) {
253 vk
->GetPhysicalDeviceCooperativeMatrixPropertiesKHR(s
->hwctx
->phys_dev
,
254 &s
->coop_mat_props_nb
, NULL
);
256 if (s
->coop_mat_props_nb
) {
257 s
->coop_mat_props
= av_malloc_array(s
->coop_mat_props_nb
,
258 sizeof(VkCooperativeMatrixPropertiesKHR
));
259 for (int i
= 0; i
< s
->coop_mat_props_nb
; i
++) {
260 s
->coop_mat_props
[i
] = (VkCooperativeMatrixPropertiesKHR
) {
261 .sType
= VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR
,
265 vk
->GetPhysicalDeviceCooperativeMatrixPropertiesKHR(s
->hwctx
->phys_dev
,
266 &s
->coop_mat_props_nb
,
274 AVVulkanDeviceQueueFamily
*ff_vk_qf_find(FFVulkanContext
*s
,
275 VkQueueFlagBits dev_family
,
276 VkVideoCodecOperationFlagBitsKHR vid_ops
)
278 for (int i
= 0; i
< s
->hwctx
->nb_qf
; i
++) {
279 if ((s
->hwctx
->qf
[i
].flags
& dev_family
) &&
280 (s
->hwctx
->qf
[i
].video_caps
& vid_ops
) == vid_ops
) {
281 return &s
->hwctx
->qf
[i
];
287 void ff_vk_exec_pool_free(FFVulkanContext
*s
, FFVkExecPool
*pool
)
289 FFVulkanFunctions
*vk
= &s
->vkfn
;
291 for (int i
= 0; i
< pool
->pool_size
; i
++) {
292 FFVkExecContext
*e
= &pool
->contexts
[i
];
295 if (e
->had_submission
)
296 vk
->WaitForFences(s
->hwctx
->act_dev
, 1, &e
->fence
, VK_TRUE
, UINT64_MAX
);
297 vk
->DestroyFence(s
->hwctx
->act_dev
, e
->fence
, s
->hwctx
->alloc
);
300 ff_vk_exec_discard_deps(s
, e
);
302 av_free(e
->frame_deps
);
303 av_free(e
->sw_frame_deps
);
304 av_free(e
->buf_deps
);
305 av_free(e
->queue_family_dst
);
306 av_free(e
->layout_dst
);
307 av_free(e
->access_dst
);
308 av_free(e
->frame_update
);
309 av_free(e
->frame_locked
);
311 av_free(e
->sem_sig_val_dst
);
312 av_free(e
->sem_wait
);
315 /* Free shader-specific data */
316 for (int i
= 0; i
< pool
->nb_reg_shd
; i
++) {
317 FFVulkanShaderData
*sd
= &pool
->reg_shd
[i
];
319 if (s
->extensions
& FF_VK_EXT_DESCRIPTOR_BUFFER
) {
320 for (int j
= 0; j
< sd
->nb_descriptor_sets
; j
++) {
321 FFVulkanDescriptorSetData
*set_data
= &sd
->desc_set_buf
[j
];
322 if (set_data
->buf
.mem
)
323 ff_vk_unmap_buffer(s
, &set_data
->buf
, 0);
324 ff_vk_free_buf(s
, &set_data
->buf
);
329 vk
->DestroyDescriptorPool(s
->hwctx
->act_dev
, sd
->desc_pool
,
332 av_freep(&sd
->desc_set_buf
);
333 av_freep(&sd
->desc_bind
);
334 av_freep(&sd
->desc_sets
);
337 av_freep(&pool
->reg_shd
);
339 for (int i
= 0; i
< pool
->pool_size
; i
++) {
340 if (pool
->cmd_buf_pools
[i
])
341 vk
->FreeCommandBuffers(s
->hwctx
->act_dev
, pool
->cmd_buf_pools
[i
],
342 1, &pool
->cmd_bufs
[i
]);
344 if (pool
->cmd_buf_pools
[i
])
345 vk
->DestroyCommandPool(s
->hwctx
->act_dev
, pool
->cmd_buf_pools
[i
], s
->hwctx
->alloc
);
347 if (pool
->query_pool
)
348 vk
->DestroyQueryPool(s
->hwctx
->act_dev
, pool
->query_pool
, s
->hwctx
->alloc
);
350 av_free(pool
->query_data
);
351 av_free(pool
->cmd_buf_pools
);
352 av_free(pool
->cmd_bufs
);
353 av_free(pool
->contexts
);
356 int ff_vk_exec_pool_init(FFVulkanContext
*s
, AVVulkanDeviceQueueFamily
*qf
,
357 FFVkExecPool
*pool
, int nb_contexts
,
358 int nb_queries
, VkQueryType query_type
, int query_64bit
,
359 const void *query_create_pnext
)
363 FFVulkanFunctions
*vk
= &s
->vkfn
;
365 VkCommandPoolCreateInfo cqueue_create
;
366 VkCommandBufferAllocateInfo cbuf_create
;
368 const VkQueryPoolVideoEncodeFeedbackCreateInfoKHR
*ef
= NULL
;
370 atomic_init(&pool
->idx
, 0);
372 if (query_type
== VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR
) {
373 ef
= ff_vk_find_struct(query_create_pnext
,
374 VK_STRUCTURE_TYPE_QUERY_POOL_VIDEO_ENCODE_FEEDBACK_CREATE_INFO_KHR
);
376 return AVERROR(EINVAL
);
379 /* Allocate space for command buffer pools */
380 pool
->cmd_buf_pools
= av_malloc(nb_contexts
*sizeof(*pool
->cmd_buf_pools
));
381 if (!pool
->cmd_buf_pools
) {
382 err
= AVERROR(ENOMEM
);
386 /* Allocate space for command buffers */
387 pool
->cmd_bufs
= av_malloc(nb_contexts
*sizeof(*pool
->cmd_bufs
));
388 if (!pool
->cmd_bufs
) {
389 err
= AVERROR(ENOMEM
);
393 for (int i
= 0; i
< nb_contexts
; i
++) {
394 /* Create command pool */
395 cqueue_create
= (VkCommandPoolCreateInfo
) {
396 .sType
= VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO
,
397 .flags
= VK_COMMAND_POOL_CREATE_TRANSIENT_BIT
|
398 VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT
,
399 .queueFamilyIndex
= qf
->idx
,
402 ret
= vk
->CreateCommandPool(s
->hwctx
->act_dev
, &cqueue_create
,
403 s
->hwctx
->alloc
, &pool
->cmd_buf_pools
[i
]);
404 if (ret
!= VK_SUCCESS
) {
405 av_log(s
, AV_LOG_ERROR
, "Command pool creation failure: %s\n",
407 err
= AVERROR_EXTERNAL
;
411 /* Allocate command buffer */
412 cbuf_create
= (VkCommandBufferAllocateInfo
) {
413 .sType
= VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO
,
414 .level
= VK_COMMAND_BUFFER_LEVEL_PRIMARY
,
415 .commandPool
= pool
->cmd_buf_pools
[i
],
416 .commandBufferCount
= 1,
418 ret
= vk
->AllocateCommandBuffers(s
->hwctx
->act_dev
, &cbuf_create
,
420 if (ret
!= VK_SUCCESS
) {
421 av_log(s
, AV_LOG_ERROR
, "Command buffer alloc failure: %s\n",
423 err
= AVERROR_EXTERNAL
;
430 VkQueryPoolCreateInfo query_pool_info
= {
431 .sType
= VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO
,
432 .pNext
= query_create_pnext
,
433 .queryType
= query_type
,
434 .queryCount
= nb_queries
*nb_contexts
,
436 ret
= vk
->CreateQueryPool(s
->hwctx
->act_dev
, &query_pool_info
,
437 s
->hwctx
->alloc
, &pool
->query_pool
);
438 if (ret
!= VK_SUCCESS
) {
439 av_log(s
, AV_LOG_ERROR
, "Query pool alloc failure: %s\n",
441 err
= AVERROR_EXTERNAL
;
445 pool
->nb_queries
= nb_queries
;
446 pool
->query_status_stride
= 1 + 1; /* One result, one status by default */
447 pool
->query_results
= nb_queries
;
448 pool
->query_statuses
= nb_queries
;
450 /* Video encode queries produce two results per query */
451 if (query_type
== VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR
) {
452 int nb_results
= av_popcount(ef
->encodeFeedbackFlags
);
453 pool
->query_status_stride
= nb_results
+ 1;
454 pool
->query_results
*= nb_results
;
455 } else if (query_type
== VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR
) {
456 pool
->query_status_stride
= 1;
457 pool
->query_results
= 0;
460 pool
->qd_size
= (pool
->query_results
+ pool
->query_statuses
)*(query_64bit
? 8 : 4);
462 /* Allocate space for the query data */
463 pool
->query_data
= av_calloc(nb_contexts
, pool
->qd_size
);
464 if (!pool
->query_data
) {
465 err
= AVERROR(ENOMEM
);
470 /* Allocate space for the contexts */
471 pool
->contexts
= av_calloc(nb_contexts
, sizeof(*pool
->contexts
));
472 if (!pool
->contexts
) {
473 err
= AVERROR(ENOMEM
);
477 pool
->pool_size
= nb_contexts
;
480 for (int i
= 0; i
< pool
->pool_size
; i
++) {
481 FFVkExecContext
*e
= &pool
->contexts
[i
];
482 VkFenceCreateInfo fence_create
= {
483 .sType
= VK_STRUCTURE_TYPE_FENCE_CREATE_INFO
,
484 .flags
= VK_FENCE_CREATE_SIGNALED_BIT
,
488 ret
= vk
->CreateFence(s
->hwctx
->act_dev
, &fence_create
, s
->hwctx
->alloc
,
490 if (ret
!= VK_SUCCESS
) {
491 av_log(s
, AV_LOG_ERROR
, "Failed to create submission fence: %s\n",
493 return AVERROR_EXTERNAL
;
500 e
->query_data
= ((uint8_t *)pool
->query_data
) + pool
->qd_size
*i
;
501 e
->query_idx
= nb_queries
*i
;
504 e
->buf
= pool
->cmd_bufs
[i
];
506 /* Queue index distribution */
509 vk
->GetDeviceQueue(s
->hwctx
->act_dev
, qf
->idx
, e
->qi
, &e
->queue
);
515 ff_vk_exec_pool_free(s
, pool
);
519 VkResult
ff_vk_exec_get_query(FFVulkanContext
*s
, FFVkExecContext
*e
,
520 void **data
, VkQueryResultFlagBits flags
)
522 FFVulkanFunctions
*vk
= &s
->vkfn
;
523 const FFVkExecPool
*pool
= e
->parent
;
524 VkQueryResultFlags qf
= flags
& ~(VK_QUERY_RESULT_64_BIT
|
525 VK_QUERY_RESULT_WITH_STATUS_BIT_KHR
);
527 if (!e
->query_data
) {
528 av_log(s
, AV_LOG_ERROR
, "Requested a query with a NULL query_data pointer!\n");
529 return VK_INCOMPLETE
;
532 qf
|= pool
->query_64bit
?
533 VK_QUERY_RESULT_64_BIT
: 0x0;
534 qf
|= pool
->query_statuses
?
535 VK_QUERY_RESULT_WITH_STATUS_BIT_KHR
: 0x0;
538 *data
= e
->query_data
;
540 return vk
->GetQueryPoolResults(s
->hwctx
->act_dev
, pool
->query_pool
,
543 pool
->qd_size
, e
->query_data
,
547 FFVkExecContext
*ff_vk_exec_get(FFVulkanContext
*s
, FFVkExecPool
*pool
)
549 return &pool
->contexts
[atomic_fetch_add(&pool
->idx
, 1) % pool
->pool_size
];
552 void ff_vk_exec_wait(FFVulkanContext
*s
, FFVkExecContext
*e
)
554 FFVulkanFunctions
*vk
= &s
->vkfn
;
555 vk
->WaitForFences(s
->hwctx
->act_dev
, 1, &e
->fence
, VK_TRUE
, UINT64_MAX
);
556 ff_vk_exec_discard_deps(s
, e
);
559 int ff_vk_exec_start(FFVulkanContext
*s
, FFVkExecContext
*e
)
562 FFVulkanFunctions
*vk
= &s
->vkfn
;
563 const FFVkExecPool
*pool
= e
->parent
;
565 VkCommandBufferBeginInfo cmd_start
= {
566 .sType
= VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO
,
567 .flags
= VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
,
570 /* Wait for the fence to be signalled */
571 vk
->WaitForFences(s
->hwctx
->act_dev
, 1, &e
->fence
, VK_TRUE
, UINT64_MAX
);
572 vk
->ResetFences(s
->hwctx
->act_dev
, 1, &e
->fence
);
574 /* Discard queue dependencies */
575 ff_vk_exec_discard_deps(s
, e
);
577 ret
= vk
->BeginCommandBuffer(e
->buf
, &cmd_start
);
578 if (ret
!= VK_SUCCESS
) {
579 av_log(s
, AV_LOG_ERROR
, "Failed to start command recoding: %s\n",
581 return AVERROR_EXTERNAL
;
584 if (pool
->nb_queries
)
585 vk
->CmdResetQueryPool(e
->buf
, pool
->query_pool
,
586 e
->query_idx
, pool
->nb_queries
);
591 void ff_vk_exec_discard_deps(FFVulkanContext
*s
, FFVkExecContext
*e
)
593 for (int j
= 0; j
< e
->nb_buf_deps
; j
++)
594 av_buffer_unref(&e
->buf_deps
[j
]);
597 for (int j
= 0; j
< e
->nb_sw_frame_deps
; j
++)
598 av_frame_free(&e
->sw_frame_deps
[j
]);
599 e
->nb_sw_frame_deps
= 0;
601 for (int j
= 0; j
< e
->nb_frame_deps
; j
++) {
602 AVFrame
*f
= e
->frame_deps
[j
];
603 if (e
->frame_locked
[j
]) {
604 AVHWFramesContext
*hwfc
= (AVHWFramesContext
*)f
->hw_frames_ctx
->data
;
605 AVVulkanFramesContext
*vkfc
= hwfc
->hwctx
;
606 AVVkFrame
*vkf
= (AVVkFrame
*)f
->data
[0];
607 vkfc
->unlock_frame(hwfc
, vkf
);
608 e
->frame_locked
[j
] = 0;
610 e
->frame_update
[j
] = 0;
612 e
->nb_frame_deps
= 0;
616 e
->sem_sig_val_dst_cnt
= 0;
619 int ff_vk_exec_add_dep_buf(FFVulkanContext
*s
, FFVkExecContext
*e
,
620 AVBufferRef
**deps
, int nb_deps
, int ref
)
622 AVBufferRef
**dst
= av_fast_realloc(e
->buf_deps
, &e
->buf_deps_alloc_size
,
623 (e
->nb_buf_deps
+ nb_deps
) * sizeof(*dst
));
625 ff_vk_exec_discard_deps(s
, e
);
626 return AVERROR(ENOMEM
);
631 for (int i
= 0; i
< nb_deps
; i
++) {
635 e
->buf_deps
[e
->nb_buf_deps
] = ref
? av_buffer_ref(deps
[i
]) : deps
[i
];
636 if (!e
->buf_deps
[e
->nb_buf_deps
]) {
637 ff_vk_exec_discard_deps(s
, e
);
638 return AVERROR(ENOMEM
);
646 int ff_vk_exec_add_dep_sw_frame(FFVulkanContext
*s
, FFVkExecContext
*e
,
649 AVFrame
**dst
= av_fast_realloc(e
->sw_frame_deps
, &e
->sw_frame_deps_alloc_size
,
650 (e
->nb_sw_frame_deps
+ 1) * sizeof(*dst
));
652 ff_vk_exec_discard_deps(s
, e
);
653 return AVERROR(ENOMEM
);
656 e
->sw_frame_deps
= dst
;
658 e
->sw_frame_deps
[e
->nb_sw_frame_deps
] = av_frame_clone(f
);
659 if (!e
->sw_frame_deps
[e
->nb_sw_frame_deps
]) {
660 ff_vk_exec_discard_deps(s
, e
);
661 return AVERROR(ENOMEM
);
664 e
->nb_sw_frame_deps
++;
669 #define ARR_REALLOC(str, arr, alloc_s, cnt) \
671 arr = av_fast_realloc(str->arr, alloc_s, (cnt + 1)*sizeof(*arr)); \
673 ff_vk_exec_discard_deps(s, e); \
674 return AVERROR(ENOMEM); \
679 typedef struct TempSyncCtx
{
684 static void destroy_tmp_semaphores(void *opaque
, uint8_t *data
)
686 FFVulkanContext
*s
= opaque
;
687 FFVulkanFunctions
*vk
= &s
->vkfn
;
688 TempSyncCtx
*ts
= (TempSyncCtx
*)data
;
690 for (int i
= 0; i
< ts
->nb_sem
; i
++)
691 vk
->DestroySemaphore(s
->hwctx
->act_dev
, ts
->sem
[i
], s
->hwctx
->alloc
);
696 int ff_vk_exec_add_dep_wait_sem(FFVulkanContext
*s
, FFVkExecContext
*e
,
697 VkSemaphore sem
, uint64_t val
,
698 VkPipelineStageFlagBits2 stage
)
700 VkSemaphoreSubmitInfo
*sem_wait
;
701 ARR_REALLOC(e
, sem_wait
, &e
->sem_wait_alloc
, e
->sem_wait_cnt
);
703 e
->sem_wait
[e
->sem_wait_cnt
++] = (VkSemaphoreSubmitInfo
) {
704 .sType
= VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO
,
713 int ff_vk_exec_add_dep_bool_sem(FFVulkanContext
*s
, FFVkExecContext
*e
,
714 VkSemaphore
*sem
, int nb
,
715 VkPipelineStageFlagBits2 stage
,
722 FFVulkanFunctions
*vk
= &s
->vkfn
;
724 /* Do not transfer ownership if we're signalling a binary semaphore,
725 * since we're probably exporting it. */
727 for (int i
= 0; i
< nb
; i
++) {
728 VkSemaphoreSubmitInfo
*sem_sig
;
729 ARR_REALLOC(e
, sem_sig
, &e
->sem_sig_alloc
, e
->sem_sig_cnt
);
731 e
->sem_sig
[e
->sem_sig_cnt
++] = (VkSemaphoreSubmitInfo
) {
732 .sType
= VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO
,
741 buf_size
= sizeof(*ts
) + sizeof(VkSemaphore
)*nb
;
742 ts
= av_mallocz(buf_size
);
744 err
= AVERROR(ENOMEM
);
748 memcpy(ts
->sem
, sem
, nb
*sizeof(*sem
));
751 buf
= av_buffer_create((uint8_t *)ts
, buf_size
, destroy_tmp_semaphores
, s
, 0);
754 err
= AVERROR(ENOMEM
);
758 err
= ff_vk_exec_add_dep_buf(s
, e
, &buf
, 1, 0);
760 av_buffer_unref(&buf
);
764 for (int i
= 0; i
< nb
; i
++) {
765 err
= ff_vk_exec_add_dep_wait_sem(s
, e
, sem
[i
], 0, stage
);
773 for (int i
= 0; i
< nb
; i
++)
774 vk
->DestroySemaphore(s
->hwctx
->act_dev
, sem
[i
], s
->hwctx
->alloc
);
779 int ff_vk_exec_add_dep_frame(FFVulkanContext
*s
, FFVkExecContext
*e
, AVFrame
*f
,
780 VkPipelineStageFlagBits2 wait_stage
,
781 VkPipelineStageFlagBits2 signal_stage
)
783 uint8_t *frame_locked
;
784 uint8_t *frame_update
;
785 AVFrame
**frame_deps
;
786 AVBufferRef
**buf_deps
;
787 VkImageLayout
*layout_dst
;
788 uint32_t *queue_family_dst
;
789 VkAccessFlagBits
*access_dst
;
791 AVHWFramesContext
*hwfc
= (AVHWFramesContext
*)f
->hw_frames_ctx
->data
;
792 AVVulkanFramesContext
*vkfc
= hwfc
->hwctx
;
793 AVVkFrame
*vkf
= (AVVkFrame
*)f
->data
[0];
794 int nb_images
= ff_vk_count_images(vkf
);
796 /* Don't add duplicates */
797 for (int i
= 0; i
< e
->nb_frame_deps
; i
++)
798 if (e
->frame_deps
[i
]->data
[0] == f
->data
[0])
801 ARR_REALLOC(e
, layout_dst
, &e
->layout_dst_alloc
, e
->nb_frame_deps
);
802 ARR_REALLOC(e
, queue_family_dst
, &e
->queue_family_dst_alloc
, e
->nb_frame_deps
);
803 ARR_REALLOC(e
, access_dst
, &e
->access_dst_alloc
, e
->nb_frame_deps
);
805 ARR_REALLOC(e
, frame_locked
, &e
->frame_locked_alloc_size
, e
->nb_frame_deps
);
806 ARR_REALLOC(e
, frame_update
, &e
->frame_update_alloc_size
, e
->nb_frame_deps
);
807 ARR_REALLOC(e
, frame_deps
, &e
->frame_deps_alloc_size
, e
->nb_frame_deps
);
809 /* prepare_frame in hwcontext_vulkan.c uses the regular frame management
810 * code but has no frame yet, and it doesn't need to actually store a ref
813 ARR_REALLOC(e
, buf_deps
, &e
->buf_deps_alloc_size
, e
->nb_buf_deps
);
814 e
->buf_deps
[e
->nb_buf_deps
] = av_buffer_ref(f
->buf
[0]);
815 if (!e
->buf_deps
[e
->nb_buf_deps
]) {
816 ff_vk_exec_discard_deps(s
, e
);
817 return AVERROR(ENOMEM
);
822 e
->frame_deps
[e
->nb_frame_deps
] = f
;
824 vkfc
->lock_frame(hwfc
, vkf
);
825 e
->frame_locked
[e
->nb_frame_deps
] = 1;
826 e
->frame_update
[e
->nb_frame_deps
] = 0;
829 for (int i
= 0; i
< nb_images
; i
++) {
830 VkSemaphoreSubmitInfo
*sem_wait
;
831 VkSemaphoreSubmitInfo
*sem_sig
;
832 uint64_t **sem_sig_val_dst
;
834 ARR_REALLOC(e
, sem_wait
, &e
->sem_wait_alloc
, e
->sem_wait_cnt
);
835 ARR_REALLOC(e
, sem_sig
, &e
->sem_sig_alloc
, e
->sem_sig_cnt
);
836 ARR_REALLOC(e
, sem_sig_val_dst
, &e
->sem_sig_val_dst_alloc
, e
->sem_sig_val_dst_cnt
);
838 e
->sem_wait
[e
->sem_wait_cnt
++] = (VkSemaphoreSubmitInfo
) {
839 .sType
= VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO
,
840 .semaphore
= vkf
->sem
[i
],
841 .value
= vkf
->sem_value
[i
],
842 .stageMask
= wait_stage
,
845 e
->sem_sig
[e
->sem_sig_cnt
++] = (VkSemaphoreSubmitInfo
) {
846 .sType
= VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO
,
847 .semaphore
= vkf
->sem
[i
],
848 .value
= vkf
->sem_value
[i
] + 1,
849 .stageMask
= signal_stage
,
852 e
->sem_sig_val_dst
[e
->sem_sig_val_dst_cnt
] = &vkf
->sem_value
[i
];
853 e
->sem_sig_val_dst_cnt
++;
859 void ff_vk_exec_update_frame(FFVulkanContext
*s
, FFVkExecContext
*e
, AVFrame
*f
,
860 VkImageMemoryBarrier2
*bar
, uint32_t *nb_img_bar
)
863 for (i
= 0; i
< e
->nb_frame_deps
; i
++)
864 if (e
->frame_deps
[i
]->data
[0] == f
->data
[0])
866 av_assert0(i
< e
->nb_frame_deps
);
868 /* Don't update duplicates */
869 if (nb_img_bar
&& !e
->frame_update
[i
])
872 e
->queue_family_dst
[i
] = bar
->dstQueueFamilyIndex
;
873 e
->access_dst
[i
] = bar
->dstAccessMask
;
874 e
->layout_dst
[i
] = bar
->newLayout
;
875 e
->frame_update
[i
] = 1;
878 int ff_vk_exec_mirror_sem_value(FFVulkanContext
*s
, FFVkExecContext
*e
,
879 VkSemaphore
*dst
, uint64_t *dst_val
,
882 uint64_t **sem_sig_val_dst
;
883 AVVkFrame
*vkf
= (AVVkFrame
*)f
->data
[0];
885 /* Reject unknown frames */
887 for (i
= 0; i
< e
->nb_frame_deps
; i
++)
888 if (e
->frame_deps
[i
]->data
[0] == f
->data
[0])
890 if (i
== e
->nb_frame_deps
)
891 return AVERROR(EINVAL
);
893 ARR_REALLOC(e
, sem_sig_val_dst
, &e
->sem_sig_val_dst_alloc
, e
->sem_sig_val_dst_cnt
);
896 *dst_val
= vkf
->sem_value
[0];
898 e
->sem_sig_val_dst
[e
->sem_sig_val_dst_cnt
] = dst_val
;
899 e
->sem_sig_val_dst_cnt
++;
904 int ff_vk_exec_submit(FFVulkanContext
*s
, FFVkExecContext
*e
)
907 FFVulkanFunctions
*vk
= &s
->vkfn
;
908 VkCommandBufferSubmitInfo cmd_buf_info
= (VkCommandBufferSubmitInfo
) {
909 .sType
= VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO
,
910 .commandBuffer
= e
->buf
,
912 VkSubmitInfo2 submit_info
= (VkSubmitInfo2
) {
913 .sType
= VK_STRUCTURE_TYPE_SUBMIT_INFO_2
,
914 .pCommandBufferInfos
= &cmd_buf_info
,
915 .commandBufferInfoCount
= 1,
916 .pWaitSemaphoreInfos
= e
->sem_wait
,
917 .waitSemaphoreInfoCount
= e
->sem_wait_cnt
,
918 .pSignalSemaphoreInfos
= e
->sem_sig
,
919 .signalSemaphoreInfoCount
= e
->sem_sig_cnt
,
922 ret
= vk
->EndCommandBuffer(e
->buf
);
923 if (ret
!= VK_SUCCESS
) {
924 av_log(s
, AV_LOG_ERROR
, "Unable to finish command buffer: %s\n",
926 ff_vk_exec_discard_deps(s
, e
);
927 return AVERROR_EXTERNAL
;
930 s
->hwctx
->lock_queue(s
->device
, e
->qf
, e
->qi
);
931 ret
= vk
->QueueSubmit2(e
->queue
, 1, &submit_info
, e
->fence
);
932 s
->hwctx
->unlock_queue(s
->device
, e
->qf
, e
->qi
);
934 if (ret
!= VK_SUCCESS
) {
935 av_log(s
, AV_LOG_ERROR
, "Unable to submit command buffer: %s\n",
937 ff_vk_exec_discard_deps(s
, e
);
938 return AVERROR_EXTERNAL
;
941 for (int i
= 0; i
< e
->sem_sig_val_dst_cnt
; i
++)
942 *e
->sem_sig_val_dst
[i
] += 1;
944 /* Unlock all frames */
945 for (int j
= 0; j
< e
->nb_frame_deps
; j
++) {
946 if (e
->frame_locked
[j
]) {
947 AVFrame
*f
= e
->frame_deps
[j
];
948 AVHWFramesContext
*hwfc
= (AVHWFramesContext
*)f
->hw_frames_ctx
->data
;
949 AVVulkanFramesContext
*vkfc
= hwfc
->hwctx
;
950 AVVkFrame
*vkf
= (AVVkFrame
*)f
->data
[0];
952 if (e
->frame_update
[j
]) {
953 int nb_images
= ff_vk_count_images(vkf
);
954 for (int i
= 0; i
< nb_images
; i
++) {
955 vkf
->layout
[i
] = e
->layout_dst
[j
];
956 vkf
->access
[i
] = e
->access_dst
[j
];
957 vkf
->queue_family
[i
] = e
->queue_family_dst
[j
];
960 vkfc
->unlock_frame(hwfc
, vkf
);
961 e
->frame_locked
[j
] = 0;
965 e
->had_submission
= 1;
970 int ff_vk_alloc_mem(FFVulkanContext
*s
, VkMemoryRequirements
*req
,
971 VkMemoryPropertyFlagBits req_flags
, void *alloc_extension
,
972 VkMemoryPropertyFlagBits
*mem_flags
, VkDeviceMemory
*mem
)
976 FFVulkanFunctions
*vk
= &s
->vkfn
;
978 VkMemoryAllocateInfo alloc_info
= {
979 .sType
= VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO
,
980 .pNext
= alloc_extension
,
983 alloc_info
.allocationSize
= req
->size
;
985 /* The vulkan spec requires memory types to be sorted in the "optimal"
986 * order, so the first matching type we find will be the best/fastest one */
987 for (int i
= 0; i
< s
->mprops
.memoryTypeCount
; i
++) {
988 /* The memory type must be supported by the requirements (bitfield) */
989 if (!(req
->memoryTypeBits
& (1 << i
)))
992 /* The memory type flags must include our properties */
993 if ((req_flags
!= UINT32_MAX
) &&
994 ((s
->mprops
.memoryTypes
[i
].propertyFlags
& req_flags
) != req_flags
))
997 /* Found a suitable memory type */
1003 av_log(s
, AV_LOG_ERROR
, "No memory type found for flags 0x%x\n",
1005 return AVERROR(EINVAL
);
1008 alloc_info
.memoryTypeIndex
= index
;
1010 ret
= vk
->AllocateMemory(s
->hwctx
->act_dev
, &alloc_info
,
1011 s
->hwctx
->alloc
, mem
);
1012 if (ret
!= VK_SUCCESS
)
1013 return AVERROR(ENOMEM
);
1016 *mem_flags
|= s
->mprops
.memoryTypes
[index
].propertyFlags
;
1021 int ff_vk_create_buf(FFVulkanContext
*s
, FFVkBuffer
*buf
, size_t size
,
1022 void *pNext
, void *alloc_pNext
,
1023 VkBufferUsageFlags usage
, VkMemoryPropertyFlagBits flags
)
1028 FFVulkanFunctions
*vk
= &s
->vkfn
;
1030 /* Buffer usage flags corresponding to buffer descriptor types */
1031 const VkBufferUsageFlags desc_usage
=
1032 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT
|
1033 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT
|
1034 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT
|
1035 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT
;
1037 if ((s
->extensions
& FF_VK_EXT_DESCRIPTOR_BUFFER
) && (usage
& desc_usage
))
1038 usage
|= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT
;
1040 VkBufferCreateInfo buf_spawn
= {
1041 .sType
= VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO
,
1044 .sharingMode
= VK_SHARING_MODE_EXCLUSIVE
,
1045 .size
= flags
& VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
?
1046 FFALIGN(size
, s
->props
.properties
.limits
.minMemoryMapAlignment
) :
1050 VkMemoryAllocateFlagsInfo alloc_flags
= {
1051 .sType
= VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO
,
1052 .flags
= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT
,
1054 VkBufferMemoryRequirementsInfo2 req_desc
= {
1055 .sType
= VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2
,
1057 VkMemoryDedicatedAllocateInfo ded_alloc
= {
1058 .sType
= VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO
,
1059 .pNext
= alloc_pNext
,
1061 VkMemoryDedicatedRequirements ded_req
= {
1062 .sType
= VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS
,
1064 VkMemoryRequirements2 req
= {
1065 .sType
= VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2
,
1069 av_log(s
, AV_LOG_DEBUG
, "Creating a buffer of %"SIZE_SPECIFIER
" bytes, "
1070 "usage: 0x%x, flags: 0x%x\n",
1071 size
, usage
, flags
);
1073 ret
= vk
->CreateBuffer(s
->hwctx
->act_dev
, &buf_spawn
, s
->hwctx
->alloc
, &buf
->buf
);
1074 if (ret
!= VK_SUCCESS
) {
1075 av_log(s
, AV_LOG_ERROR
, "Failed to create buffer: %s\n",
1076 ff_vk_ret2str(ret
));
1077 return AVERROR_EXTERNAL
;
1080 req_desc
.buffer
= buf
->buf
;
1082 vk
->GetBufferMemoryRequirements2(s
->hwctx
->act_dev
, &req_desc
, &req
);
1084 /* In case the implementation prefers/requires dedicated allocation */
1085 use_ded_mem
= ded_req
.prefersDedicatedAllocation
|
1086 ded_req
.requiresDedicatedAllocation
;
1088 ded_alloc
.buffer
= buf
->buf
;
1089 ded_alloc
.pNext
= alloc_pNext
;
1090 alloc_pNext
= &ded_alloc
;
1093 if (usage
& VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT
) {
1094 alloc_flags
.pNext
= alloc_pNext
;
1095 alloc_pNext
= &alloc_flags
;
1098 err
= ff_vk_alloc_mem(s
, &req
.memoryRequirements
, flags
, alloc_pNext
,
1099 &buf
->flags
, &buf
->mem
);
1103 ret
= vk
->BindBufferMemory(s
->hwctx
->act_dev
, buf
->buf
, buf
->mem
, 0);
1104 if (ret
!= VK_SUCCESS
) {
1105 av_log(s
, AV_LOG_ERROR
, "Failed to bind memory to buffer: %s\n",
1106 ff_vk_ret2str(ret
));
1107 return AVERROR_EXTERNAL
;
1110 if (usage
& VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT
) {
1111 VkBufferDeviceAddressInfo address_info
= {
1112 .sType
= VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO
,
1115 buf
->address
= vk
->GetBufferDeviceAddress(s
->hwctx
->act_dev
, &address_info
);
1123 int ff_vk_map_buffers(FFVulkanContext
*s
, FFVkBuffer
**buf
, uint8_t *mem
[],
1124 int nb_buffers
, int invalidate
)
1127 FFVulkanFunctions
*vk
= &s
->vkfn
;
1128 VkMappedMemoryRange inval_list
[64];
1129 int inval_count
= 0;
1131 for (int i
= 0; i
< nb_buffers
; i
++) {
1133 ret
= vk
->MapMemory(s
->hwctx
->act_dev
, buf
[i
]->mem
, 0,
1134 VK_WHOLE_SIZE
, 0, &dst
);
1135 if (ret
!= VK_SUCCESS
) {
1136 av_log(s
, AV_LOG_ERROR
, "Failed to map buffer memory: %s\n",
1137 ff_vk_ret2str(ret
));
1138 return AVERROR_EXTERNAL
;
1140 mem
[i
] = buf
[i
]->mapped_mem
= dst
;
1146 for (int i
= 0; i
< nb_buffers
; i
++) {
1147 const VkMappedMemoryRange ival_buf
= {
1148 .sType
= VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE
,
1149 .memory
= buf
[i
]->mem
,
1150 .size
= VK_WHOLE_SIZE
,
1152 if (buf
[i
]->flags
& VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
)
1154 inval_list
[inval_count
++] = ival_buf
;
1158 ret
= vk
->InvalidateMappedMemoryRanges(s
->hwctx
->act_dev
, inval_count
,
1160 if (ret
!= VK_SUCCESS
) {
1161 av_log(s
, AV_LOG_ERROR
, "Failed to invalidate memory: %s\n",
1162 ff_vk_ret2str(ret
));
1163 return AVERROR_EXTERNAL
;
1170 int ff_vk_flush_buffer(FFVulkanContext
*s
, FFVkBuffer
*buf
,
1171 size_t offset
, size_t mem_size
,
1175 FFVulkanFunctions
*vk
= &s
->vkfn
;
1177 if (buf
->host_ref
|| buf
->flags
& VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
)
1180 const VkMappedMemoryRange flush_data
= {
1181 .sType
= VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE
,
1188 ret
= vk
->FlushMappedMemoryRanges(s
->hwctx
->act_dev
, 1, &flush_data
);
1190 ret
= vk
->InvalidateMappedMemoryRanges(s
->hwctx
->act_dev
, 1, &flush_data
);
1192 if (ret
!= VK_SUCCESS
) {
1193 av_log(s
, AV_LOG_ERROR
, "Failed to flush memory: %s\n",
1194 ff_vk_ret2str(ret
));
1195 return AVERROR_EXTERNAL
;
1201 int ff_vk_unmap_buffers(FFVulkanContext
*s
, FFVkBuffer
**buf
, int nb_buffers
,
1206 FFVulkanFunctions
*vk
= &s
->vkfn
;
1207 VkMappedMemoryRange flush_list
[64];
1208 int flush_count
= 0;
1211 for (int i
= 0; i
< nb_buffers
; i
++) {
1212 const VkMappedMemoryRange flush_buf
= {
1213 .sType
= VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE
,
1214 .memory
= buf
[i
]->mem
,
1215 .size
= VK_WHOLE_SIZE
,
1218 av_assert0(!buf
[i
]->host_ref
);
1219 if (buf
[i
]->flags
& VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
)
1221 flush_list
[flush_count
++] = flush_buf
;
1226 ret
= vk
->FlushMappedMemoryRanges(s
->hwctx
->act_dev
, flush_count
,
1228 if (ret
!= VK_SUCCESS
) {
1229 av_log(s
, AV_LOG_ERROR
, "Failed to flush memory: %s\n",
1230 ff_vk_ret2str(ret
));
1231 err
= AVERROR_EXTERNAL
; /* We still want to try to unmap them */
1235 for (int i
= 0; i
< nb_buffers
; i
++) {
1236 vk
->UnmapMemory(s
->hwctx
->act_dev
, buf
[i
]->mem
);
1237 buf
[i
]->mapped_mem
= NULL
;
1243 void ff_vk_free_buf(FFVulkanContext
*s
, FFVkBuffer
*buf
)
1245 FFVulkanFunctions
*vk
= &s
->vkfn
;
1247 if (!buf
|| !s
->hwctx
)
1250 if (buf
->mapped_mem
&& !buf
->host_ref
)
1251 ff_vk_unmap_buffer(s
, buf
, 0);
1252 if (buf
->buf
!= VK_NULL_HANDLE
)
1253 vk
->DestroyBuffer(s
->hwctx
->act_dev
, buf
->buf
, s
->hwctx
->alloc
);
1254 if (buf
->mem
!= VK_NULL_HANDLE
)
1255 vk
->FreeMemory(s
->hwctx
->act_dev
, buf
->mem
, s
->hwctx
->alloc
);
1257 av_buffer_unref(&buf
->host_ref
);
1259 buf
->buf
= VK_NULL_HANDLE
;
1260 buf
->mem
= VK_NULL_HANDLE
;
1261 buf
->mapped_mem
= NULL
;
1264 static void free_data_buf(void *opaque
, uint8_t *data
)
1266 FFVulkanContext
*ctx
= opaque
;
1267 FFVkBuffer
*buf
= (FFVkBuffer
*)data
;
1268 ff_vk_free_buf(ctx
, buf
);
1272 static AVBufferRef
*alloc_data_buf(void *opaque
, size_t size
)
1275 uint8_t *buf
= av_mallocz(size
);
1279 ref
= av_buffer_create(buf
, size
, free_data_buf
, opaque
, 0);
1285 int ff_vk_get_pooled_buffer(FFVulkanContext
*ctx
, AVBufferPool
**buf_pool
,
1286 AVBufferRef
**buf
, VkBufferUsageFlags usage
,
1287 void *create_pNext
, size_t size
,
1288 VkMemoryPropertyFlagBits mem_props
)
1297 *buf_pool
= av_buffer_pool_init2(sizeof(FFVkBuffer
), ctx
,
1298 alloc_data_buf
, NULL
);
1300 return AVERROR(ENOMEM
);
1303 *buf
= ref
= av_buffer_pool_get(*buf_pool
);
1305 return AVERROR(ENOMEM
);
1307 data
= (FFVkBuffer
*)ref
->data
;
1308 data
->stage
= VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT
;
1309 data
->access
= VK_ACCESS_2_NONE
;
1311 if (data
->size
>= size
)
1314 ff_vk_free_buf(ctx
, data
);
1315 memset(data
, 0, sizeof(*data
));
1317 err
= ff_vk_create_buf(ctx
, data
, size
,
1318 create_pNext
, NULL
, usage
,
1321 av_buffer_unref(&ref
);
1326 if (mem_props
& VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
) {
1327 err
= ff_vk_map_buffer(ctx
, data
, &data
->mapped_mem
, 0);
1329 av_buffer_unref(&ref
);
1338 static int create_mapped_buffer(FFVulkanContext
*s
,
1339 FFVkBuffer
*vkb
, VkBufferUsageFlags usage
,
1341 VkExternalMemoryBufferCreateInfo
*create_desc
,
1342 VkImportMemoryHostPointerInfoEXT
*import_desc
,
1343 VkMemoryHostPointerPropertiesEXT props
)
1347 FFVulkanFunctions
*vk
= &s
->vkfn
;
1349 VkBufferCreateInfo buf_spawn
= {
1350 .sType
= VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO
,
1351 .pNext
= create_desc
,
1353 .sharingMode
= VK_SHARING_MODE_EXCLUSIVE
,
1356 VkMemoryRequirements req
= {
1358 .alignment
= s
->hprops
.minImportedHostPointerAlignment
,
1359 .memoryTypeBits
= props
.memoryTypeBits
,
1362 err
= ff_vk_alloc_mem(s
, &req
,
1363 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
,
1364 import_desc
, &vkb
->flags
, &vkb
->mem
);
1368 ret
= vk
->CreateBuffer(s
->hwctx
->act_dev
, &buf_spawn
, s
->hwctx
->alloc
, &vkb
->buf
);
1369 if (ret
!= VK_SUCCESS
) {
1370 vk
->FreeMemory(s
->hwctx
->act_dev
, vkb
->mem
, s
->hwctx
->alloc
);
1371 return AVERROR_EXTERNAL
;
1374 ret
= vk
->BindBufferMemory(s
->hwctx
->act_dev
, vkb
->buf
, vkb
->mem
, 0);
1375 if (ret
!= VK_SUCCESS
) {
1376 vk
->FreeMemory(s
->hwctx
->act_dev
, vkb
->mem
, s
->hwctx
->alloc
);
1377 vk
->DestroyBuffer(s
->hwctx
->act_dev
, vkb
->buf
, s
->hwctx
->alloc
);
1378 return AVERROR_EXTERNAL
;
1384 static void destroy_avvkbuf(void *opaque
, uint8_t *data
)
1386 FFVulkanContext
*s
= opaque
;
1387 FFVkBuffer
*buf
= (FFVkBuffer
*)data
;
1388 ff_vk_free_buf(s
, buf
);
1392 int ff_vk_host_map_buffer(FFVulkanContext
*s
, AVBufferRef
**dst
,
1393 uint8_t *src_data
, const AVBufferRef
*src_buf
,
1394 VkBufferUsageFlags usage
)
1398 FFVulkanFunctions
*vk
= &s
->vkfn
;
1400 VkExternalMemoryBufferCreateInfo create_desc
= {
1401 .sType
= VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO
,
1402 .handleTypes
= VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT
,
1404 VkMemoryAllocateFlagsInfo alloc_flags
= {
1405 .sType
= VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO
,
1406 .flags
= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT
,
1408 VkImportMemoryHostPointerInfoEXT import_desc
= {
1409 .sType
= VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT
,
1410 .handleType
= VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT
,
1411 .pNext
= usage
& VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT
? &alloc_flags
: NULL
,
1413 VkMemoryHostPointerPropertiesEXT props
;
1422 /* Get the previous point at which mapping was possible and use it */
1423 offs
= (uintptr_t)src_data
% s
->hprops
.minImportedHostPointerAlignment
;
1424 import_desc
.pHostPointer
= src_data
- offs
;
1426 props
= (VkMemoryHostPointerPropertiesEXT
) {
1427 VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT
,
1429 ret
= vk
->GetMemoryHostPointerPropertiesEXT(s
->hwctx
->act_dev
,
1430 import_desc
.handleType
,
1431 import_desc
.pHostPointer
,
1433 if (!(ret
== VK_SUCCESS
&& props
.memoryTypeBits
))
1434 return AVERROR(EINVAL
);
1436 /* Ref the source buffer */
1437 ref
= av_buffer_ref(src_buf
);
1439 return AVERROR(ENOMEM
);
1441 /* Add the offset at the start, which gets ignored */
1442 const ptrdiff_t src_offset
= src_data
- src_buf
->data
;
1443 buffer_size
= offs
+ (src_buf
->size
- src_offset
);
1444 buffer_size
= FFALIGN(buffer_size
, s
->props
.properties
.limits
.minMemoryMapAlignment
);
1445 buffer_size
= FFALIGN(buffer_size
, s
->hprops
.minImportedHostPointerAlignment
);
1447 /* Create a buffer struct */
1448 vkb
= av_mallocz(sizeof(*vkb
));
1450 av_buffer_unref(&ref
);
1451 return AVERROR(ENOMEM
);
1454 err
= create_mapped_buffer(s
, vkb
, usage
,
1455 buffer_size
, &create_desc
, &import_desc
,
1458 av_buffer_unref(&ref
);
1463 if (usage
& VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT
) {
1464 VkBufferDeviceAddressInfo address_info
= {
1465 .sType
= VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO
,
1468 vkb
->address
= vk
->GetBufferDeviceAddress(s
->hwctx
->act_dev
, &address_info
);
1471 vkb
->host_ref
= ref
;
1472 vkb
->virtual_offset
= offs
;
1473 vkb
->address
+= offs
;
1474 vkb
->mapped_mem
= src_data
;
1475 vkb
->size
= buffer_size
- offs
;
1476 vkb
->flags
|= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
;
1479 *dst
= av_buffer_create((uint8_t *)vkb
, sizeof(*vkb
),
1480 destroy_avvkbuf
, s
, 0);
1482 destroy_avvkbuf(s
, (uint8_t *)vkb
);
1484 return AVERROR(ENOMEM
);
1490 int ff_vk_shader_add_push_const(FFVulkanShader
*shd
, int offset
, int size
,
1491 VkShaderStageFlagBits stage
)
1493 VkPushConstantRange
*pc
;
1495 shd
->push_consts
= av_realloc_array(shd
->push_consts
,
1496 sizeof(*shd
->push_consts
),
1497 shd
->push_consts_num
+ 1);
1498 if (!shd
->push_consts
)
1499 return AVERROR(ENOMEM
);
1501 pc
= &shd
->push_consts
[shd
->push_consts_num
++];
1502 memset(pc
, 0, sizeof(*pc
));
1504 pc
->stageFlags
= stage
;
1505 pc
->offset
= offset
;
1511 int ff_vk_init_sampler(FFVulkanContext
*s
, VkSampler
*sampler
,
1512 int unnorm_coords
, VkFilter filt
)
1515 FFVulkanFunctions
*vk
= &s
->vkfn
;
1517 VkSamplerCreateInfo sampler_info
= {
1518 .sType
= VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO
,
1520 .minFilter
= sampler_info
.magFilter
,
1521 .mipmapMode
= unnorm_coords
? VK_SAMPLER_MIPMAP_MODE_NEAREST
:
1522 VK_SAMPLER_MIPMAP_MODE_LINEAR
,
1523 .addressModeU
= VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE
,
1524 .addressModeV
= sampler_info
.addressModeU
,
1525 .addressModeW
= sampler_info
.addressModeU
,
1526 .anisotropyEnable
= VK_FALSE
,
1527 .compareOp
= VK_COMPARE_OP_NEVER
,
1528 .borderColor
= VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK
,
1529 .unnormalizedCoordinates
= unnorm_coords
,
1532 ret
= vk
->CreateSampler(s
->hwctx
->act_dev
, &sampler_info
,
1533 s
->hwctx
->alloc
, sampler
);
1534 if (ret
!= VK_SUCCESS
) {
1535 av_log(s
, AV_LOG_ERROR
, "Unable to init sampler: %s\n",
1536 ff_vk_ret2str(ret
));
1537 return AVERROR_EXTERNAL
;
1543 VkImageAspectFlags
ff_vk_aspect_flag(AVFrame
*f
, int p
)
1545 AVVkFrame
*vkf
= (AVVkFrame
*)f
->data
[0];
1546 AVHWFramesContext
*hwfc
= (AVHWFramesContext
*)f
->hw_frames_ctx
->data
;
1547 int nb_images
= ff_vk_count_images(vkf
);
1548 int nb_planes
= av_pix_fmt_count_planes(hwfc
->sw_format
);
1550 static const VkImageAspectFlags plane_aspect
[] = { VK_IMAGE_ASPECT_PLANE_0_BIT
,
1551 VK_IMAGE_ASPECT_PLANE_1_BIT
,
1552 VK_IMAGE_ASPECT_PLANE_2_BIT
, };
1554 if (ff_vk_mt_is_np_rgb(hwfc
->sw_format
) || (nb_planes
== nb_images
))
1555 return VK_IMAGE_ASPECT_COLOR_BIT
;
1557 return plane_aspect
[p
];
1560 int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt
)
1562 if (pix_fmt
== AV_PIX_FMT_ABGR
|| pix_fmt
== AV_PIX_FMT_BGRA
||
1563 pix_fmt
== AV_PIX_FMT_RGBA
|| pix_fmt
== AV_PIX_FMT_RGB24
||
1564 pix_fmt
== AV_PIX_FMT_BGR24
|| pix_fmt
== AV_PIX_FMT_RGB48
||
1565 pix_fmt
== AV_PIX_FMT_RGBA64
|| pix_fmt
== AV_PIX_FMT_RGB565
||
1566 pix_fmt
== AV_PIX_FMT_BGR565
|| pix_fmt
== AV_PIX_FMT_BGR0
||
1567 pix_fmt
== AV_PIX_FMT_0BGR
|| pix_fmt
== AV_PIX_FMT_RGB0
||
1568 pix_fmt
== AV_PIX_FMT_GBRP10
|| pix_fmt
== AV_PIX_FMT_GBRP12
||
1569 pix_fmt
== AV_PIX_FMT_GBRP14
|| pix_fmt
== AV_PIX_FMT_GBRP16
||
1570 pix_fmt
== AV_PIX_FMT_GBRAP
|| pix_fmt
== AV_PIX_FMT_GBRAP10
||
1571 pix_fmt
== AV_PIX_FMT_GBRAP12
|| pix_fmt
== AV_PIX_FMT_GBRAP14
||
1572 pix_fmt
== AV_PIX_FMT_GBRAP16
|| pix_fmt
== AV_PIX_FMT_GBRAP32
||
1573 pix_fmt
== AV_PIX_FMT_GBRPF32
|| pix_fmt
== AV_PIX_FMT_GBRAPF32
||
1574 pix_fmt
== AV_PIX_FMT_X2RGB10
|| pix_fmt
== AV_PIX_FMT_X2BGR10
||
1575 pix_fmt
== AV_PIX_FMT_RGBAF32
|| pix_fmt
== AV_PIX_FMT_RGBF32
||
1576 pix_fmt
== AV_PIX_FMT_RGBA128
|| pix_fmt
== AV_PIX_FMT_RGB96
||
1577 pix_fmt
== AV_PIX_FMT_GBRP
|| pix_fmt
== AV_PIX_FMT_BAYER_RGGB16
)
1582 void ff_vk_set_perm(enum AVPixelFormat pix_fmt
, int lut
[4], int inv
)
1585 case AV_PIX_FMT_GBRP
:
1586 case AV_PIX_FMT_GBRAP
:
1587 case AV_PIX_FMT_GBRAP10
:
1588 case AV_PIX_FMT_GBRAP12
:
1589 case AV_PIX_FMT_GBRAP14
:
1590 case AV_PIX_FMT_GBRAP16
:
1591 case AV_PIX_FMT_GBRP10
:
1592 case AV_PIX_FMT_GBRP12
:
1593 case AV_PIX_FMT_GBRP14
:
1594 case AV_PIX_FMT_GBRP16
:
1595 case AV_PIX_FMT_GBRPF32
:
1596 case AV_PIX_FMT_GBRAP32
:
1597 case AV_PIX_FMT_GBRAPF32
:
1603 case AV_PIX_FMT_X2BGR10
:
1618 int lut_tmp
[4] = { lut
[0], lut
[1], lut
[2], lut
[3] };
1619 for (int i
= 0; i
< 4; i
++)
1620 lut
[lut_tmp
[i
]] = i
;
1626 const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pix_fmt
,
1627 enum FFVkShaderRepFormat rep_fmt
)
1630 case AV_PIX_FMT_RGBA
:
1631 case AV_PIX_FMT_BGRA
:
1632 case AV_PIX_FMT_RGB24
:
1633 case AV_PIX_FMT_BGR24
:
1634 case AV_PIX_FMT_BGR0
:
1635 case AV_PIX_FMT_RGB0
:
1636 case AV_PIX_FMT_RGB565
:
1637 case AV_PIX_FMT_BGR565
:
1638 case AV_PIX_FMT_UYVA
:
1639 case AV_PIX_FMT_YUYV422
:
1640 case AV_PIX_FMT_UYVY422
: {
1641 const char *rep_tab
[] = {
1642 [FF_VK_REP_NATIVE
] = "rgba8ui",
1643 [FF_VK_REP_FLOAT
] = "rgba8",
1644 [FF_VK_REP_INT
] = "rgba8i",
1645 [FF_VK_REP_UINT
] = "rgba8ui",
1647 return rep_tab
[rep_fmt
];
1649 case AV_PIX_FMT_X2RGB10
:
1650 case AV_PIX_FMT_X2BGR10
:
1651 case AV_PIX_FMT_Y210
:
1652 case AV_PIX_FMT_XV30
: {
1653 const char *rep_tab
[] = {
1654 [FF_VK_REP_NATIVE
] = "rgb10_a2ui",
1655 [FF_VK_REP_FLOAT
] = "rgb10_a2",
1656 [FF_VK_REP_INT
] = NULL
,
1657 [FF_VK_REP_UINT
] = "rgb10_a2ui",
1659 return rep_tab
[rep_fmt
];
1661 case AV_PIX_FMT_RGB48
:
1662 case AV_PIX_FMT_RGBA64
:
1663 case AV_PIX_FMT_Y212
:
1664 case AV_PIX_FMT_Y216
:
1665 case AV_PIX_FMT_XV36
:
1666 case AV_PIX_FMT_XV48
: {
1667 const char *rep_tab
[] = {
1668 [FF_VK_REP_NATIVE
] = "rgba16ui",
1669 [FF_VK_REP_FLOAT
] = "rgba16",
1670 [FF_VK_REP_INT
] = "rgba16i",
1671 [FF_VK_REP_UINT
] = "rgba16ui",
1673 return rep_tab
[rep_fmt
];
1675 case AV_PIX_FMT_RGBF32
:
1676 case AV_PIX_FMT_RGBAF32
: {
1677 const char *rep_tab
[] = {
1678 [FF_VK_REP_NATIVE
] = "rgba32f",
1679 [FF_VK_REP_FLOAT
] = "rgba32f",
1680 [FF_VK_REP_INT
] = "rgba32i",
1681 [FF_VK_REP_UINT
] = "rgba32ui",
1683 return rep_tab
[rep_fmt
];
1685 case AV_PIX_FMT_RGB96
:
1686 case AV_PIX_FMT_RGBA128
: {
1687 const char *rep_tab
[] = {
1688 [FF_VK_REP_NATIVE
] = "rgba32ui",
1689 [FF_VK_REP_FLOAT
] = NULL
,
1690 [FF_VK_REP_INT
] = "rgba32i",
1691 [FF_VK_REP_UINT
] = "rgba32ui",
1693 return rep_tab
[rep_fmt
];
1695 case AV_PIX_FMT_GBRP
:
1696 case AV_PIX_FMT_GRAY8
:
1697 case AV_PIX_FMT_GBRAP
:
1698 case AV_PIX_FMT_YUV420P
:
1699 case AV_PIX_FMT_YUV422P
:
1700 case AV_PIX_FMT_YUV444P
:
1701 case AV_PIX_FMT_YUVA420P
:
1702 case AV_PIX_FMT_YUVA422P
:
1703 case AV_PIX_FMT_YUVA444P
: {
1704 const char *rep_tab
[] = {
1705 [FF_VK_REP_NATIVE
] = "r8ui",
1706 [FF_VK_REP_FLOAT
] = "r8",
1707 [FF_VK_REP_INT
] = "r8i",
1708 [FF_VK_REP_UINT
] = "r8ui",
1710 return rep_tab
[rep_fmt
];
1712 case AV_PIX_FMT_GRAY10
:
1713 case AV_PIX_FMT_GRAY12
:
1714 case AV_PIX_FMT_GRAY14
:
1715 case AV_PIX_FMT_GRAY16
:
1716 case AV_PIX_FMT_GBRAP10
:
1717 case AV_PIX_FMT_GBRAP12
:
1718 case AV_PIX_FMT_GBRAP14
:
1719 case AV_PIX_FMT_GBRAP16
:
1720 case AV_PIX_FMT_GBRP10
:
1721 case AV_PIX_FMT_GBRP12
:
1722 case AV_PIX_FMT_GBRP14
:
1723 case AV_PIX_FMT_GBRP16
:
1724 case AV_PIX_FMT_YUV420P10
:
1725 case AV_PIX_FMT_YUV420P12
:
1726 case AV_PIX_FMT_YUV420P16
:
1727 case AV_PIX_FMT_YUV422P10
:
1728 case AV_PIX_FMT_YUV422P12
:
1729 case AV_PIX_FMT_YUV422P16
:
1730 case AV_PIX_FMT_YUV444P10
:
1731 case AV_PIX_FMT_YUV444P12
:
1732 case AV_PIX_FMT_YUV444P16
:
1733 case AV_PIX_FMT_YUVA420P10
:
1734 case AV_PIX_FMT_YUVA420P16
:
1735 case AV_PIX_FMT_YUVA422P10
:
1736 case AV_PIX_FMT_YUVA422P12
:
1737 case AV_PIX_FMT_YUVA422P16
:
1738 case AV_PIX_FMT_YUVA444P10
:
1739 case AV_PIX_FMT_YUVA444P12
:
1740 case AV_PIX_FMT_YUVA444P16
:
1741 case AV_PIX_FMT_BAYER_RGGB16
: {
1742 const char *rep_tab
[] = {
1743 [FF_VK_REP_NATIVE
] = "r16ui",
1744 [FF_VK_REP_FLOAT
] = "r16f",
1745 [FF_VK_REP_INT
] = "r16i",
1746 [FF_VK_REP_UINT
] = "r16ui",
1748 return rep_tab
[rep_fmt
];
1750 case AV_PIX_FMT_GRAY32
:
1751 case AV_PIX_FMT_GRAYF32
:
1752 case AV_PIX_FMT_GBRPF32
:
1753 case AV_PIX_FMT_GBRAPF32
: {
1754 const char *rep_tab
[] = {
1755 [FF_VK_REP_NATIVE
] = "r32f",
1756 [FF_VK_REP_FLOAT
] = "r32f",
1757 [FF_VK_REP_INT
] = "r32i",
1758 [FF_VK_REP_UINT
] = "r32ui",
1760 return rep_tab
[rep_fmt
];
1762 case AV_PIX_FMT_GBRAP32
: {
1763 const char *rep_tab
[] = {
1764 [FF_VK_REP_NATIVE
] = "r32ui",
1765 [FF_VK_REP_FLOAT
] = NULL
,
1766 [FF_VK_REP_INT
] = "r32i",
1767 [FF_VK_REP_UINT
] = "r32ui",
1769 return rep_tab
[rep_fmt
];
1771 case AV_PIX_FMT_NV12
:
1772 case AV_PIX_FMT_NV16
:
1773 case AV_PIX_FMT_NV24
: {
1774 const char *rep_tab
[] = {
1775 [FF_VK_REP_NATIVE
] = "rg8ui",
1776 [FF_VK_REP_FLOAT
] = "rg8",
1777 [FF_VK_REP_INT
] = "rg8i",
1778 [FF_VK_REP_UINT
] = "rg8ui",
1780 return rep_tab
[rep_fmt
];
1782 case AV_PIX_FMT_P010
:
1783 case AV_PIX_FMT_P210
:
1784 case AV_PIX_FMT_P410
: {
1785 const char *rep_tab
[] = {
1786 [FF_VK_REP_NATIVE
] = "rgb10_a2ui",
1787 [FF_VK_REP_FLOAT
] = "rgb10_a2",
1788 [FF_VK_REP_INT
] = NULL
,
1789 [FF_VK_REP_UINT
] = "rgb10_a2ui",
1791 return rep_tab
[rep_fmt
];
1793 case AV_PIX_FMT_P012
:
1794 case AV_PIX_FMT_P016
:
1795 case AV_PIX_FMT_P212
:
1796 case AV_PIX_FMT_P216
:
1797 case AV_PIX_FMT_P412
:
1798 case AV_PIX_FMT_P416
: {
1799 const char *rep_tab
[] = {
1800 [FF_VK_REP_NATIVE
] = "rg16ui",
1801 [FF_VK_REP_FLOAT
] = "rg16",
1802 [FF_VK_REP_INT
] = "rg16i",
1803 [FF_VK_REP_UINT
] = "rg16ui",
1805 return rep_tab
[rep_fmt
];
1812 typedef struct ImageViewCtx
{
1814 VkImageView views
[];
1817 static void destroy_imageviews(void *opaque
, uint8_t *data
)
1819 FFVulkanContext
*s
= opaque
;
1820 FFVulkanFunctions
*vk
= &s
->vkfn
;
1821 ImageViewCtx
*iv
= (ImageViewCtx
*)data
;
1823 for (int i
= 0; i
< iv
->nb_views
; i
++)
1824 vk
->DestroyImageView(s
->hwctx
->act_dev
, iv
->views
[i
], s
->hwctx
->alloc
);
1829 static VkFormat
map_fmt_to_rep(VkFormat fmt
, enum FFVkShaderRepFormat rep_fmt
)
1831 #define REPS_FMT(fmt) \
1832 [FF_VK_REP_NATIVE] = fmt ## _UINT, \
1833 [FF_VK_REP_FLOAT] = fmt ## _UNORM, \
1834 [FF_VK_REP_INT] = fmt ## _SINT, \
1835 [FF_VK_REP_UINT] = fmt ## _UINT,
1837 #define REPS_FMT_PACK(fmt, num) \
1838 [FF_VK_REP_NATIVE] = fmt ## _UINT_PACK ## num, \
1839 [FF_VK_REP_FLOAT] = fmt ## _UNORM_PACK ## num, \
1840 [FF_VK_REP_INT] = fmt ## _SINT_PACK ## num, \
1841 [FF_VK_REP_UINT] = fmt ## _UINT_PACK ## num,
1843 const VkFormat fmts_map
[][4] = {
1844 { REPS_FMT_PACK(VK_FORMAT_A2B10G10R10
, 32) },
1845 { REPS_FMT_PACK(VK_FORMAT_A2R10G10B10
, 32) },
1847 VK_FORMAT_B5G6R5_UNORM_PACK16
,
1848 VK_FORMAT_B5G6R5_UNORM_PACK16
,
1849 VK_FORMAT_UNDEFINED
,
1850 VK_FORMAT_UNDEFINED
,
1853 VK_FORMAT_R5G6B5_UNORM_PACK16
,
1854 VK_FORMAT_R5G6B5_UNORM_PACK16
,
1855 VK_FORMAT_UNDEFINED
,
1856 VK_FORMAT_UNDEFINED
,
1858 { REPS_FMT(VK_FORMAT_B8G8R8
) },
1859 { REPS_FMT(VK_FORMAT_B8G8R8A8
) },
1860 { REPS_FMT(VK_FORMAT_R8
) },
1861 { REPS_FMT(VK_FORMAT_R8G8
) },
1862 { REPS_FMT(VK_FORMAT_R8G8B8
) },
1863 { REPS_FMT(VK_FORMAT_R8G8B8A8
) },
1864 { REPS_FMT(VK_FORMAT_R16
) },
1865 { REPS_FMT(VK_FORMAT_R16G16
) },
1866 { REPS_FMT(VK_FORMAT_R16G16B16
) },
1867 { REPS_FMT(VK_FORMAT_R16G16B16A16
) },
1870 VK_FORMAT_R32_SFLOAT
,
1875 VK_FORMAT_R32G32B32_SFLOAT
,
1876 VK_FORMAT_R32G32B32_SFLOAT
,
1877 VK_FORMAT_UNDEFINED
,
1878 VK_FORMAT_UNDEFINED
,
1881 VK_FORMAT_R32G32B32A32_SFLOAT
,
1882 VK_FORMAT_R32G32B32A32_SFLOAT
,
1883 VK_FORMAT_UNDEFINED
,
1884 VK_FORMAT_UNDEFINED
,
1887 VK_FORMAT_R32G32B32_UINT
,
1888 VK_FORMAT_UNDEFINED
,
1889 VK_FORMAT_R32G32B32_SINT
,
1890 VK_FORMAT_R32G32B32_UINT
,
1893 VK_FORMAT_R32G32B32A32_UINT
,
1894 VK_FORMAT_UNDEFINED
,
1895 VK_FORMAT_R32G32B32A32_SINT
,
1896 VK_FORMAT_R32G32B32A32_UINT
,
1899 #undef REPS_FMT_PACK
1902 if (fmt
== VK_FORMAT_UNDEFINED
)
1903 return VK_FORMAT_UNDEFINED
;
1905 for (int i
= 0; i
< FF_ARRAY_ELEMS(fmts_map
); i
++) {
1906 if (fmts_map
[i
][FF_VK_REP_NATIVE
] == fmt
||
1907 fmts_map
[i
][FF_VK_REP_FLOAT
] == fmt
||
1908 fmts_map
[i
][FF_VK_REP_INT
] == fmt
||
1909 fmts_map
[i
][FF_VK_REP_UINT
] == fmt
)
1910 return fmts_map
[i
][rep_fmt
];
1913 return VK_FORMAT_UNDEFINED
;
1916 int ff_vk_create_imageview(FFVulkanContext
*s
,
1917 VkImageView
*img_view
, VkImageAspectFlags
*aspect
,
1918 AVFrame
*f
, int plane
, enum FFVkShaderRepFormat rep_fmt
)
1921 FFVulkanFunctions
*vk
= &s
->vkfn
;
1922 AVHWFramesContext
*hwfc
= (AVHWFramesContext
*)f
->hw_frames_ctx
->data
;
1923 AVVulkanFramesContext
*vkfc
= hwfc
->hwctx
;
1924 const VkFormat
*rep_fmts
= av_vkfmt_from_pixfmt(hwfc
->sw_format
);
1925 AVVkFrame
*vkf
= (AVVkFrame
*)f
->data
[0];
1926 const int nb_images
= ff_vk_count_images(vkf
);
1928 VkImageViewUsageCreateInfo view_usage_info
= {
1929 .sType
= VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO
,
1930 .usage
= vkfc
->usage
&
1931 (~(VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR
|
1932 VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR
)),
1934 VkImageViewCreateInfo view_create_info
= {
1935 .sType
= VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO
,
1936 .pNext
= &view_usage_info
,
1937 .image
= vkf
->img
[FFMIN(plane
, nb_images
- 1)],
1938 .viewType
= VK_IMAGE_VIEW_TYPE_2D
,
1939 .format
= map_fmt_to_rep(rep_fmts
[plane
], rep_fmt
),
1940 .components
= ff_comp_identity_map
,
1941 .subresourceRange
= {
1942 .aspectMask
= ff_vk_aspect_flag(f
, plane
),
1947 if (view_create_info
.format
== VK_FORMAT_UNDEFINED
) {
1948 av_log(s
, AV_LOG_ERROR
, "Unable to find a compatible representation "
1949 "of format %i and mode %i\n",
1950 rep_fmts
[plane
], rep_fmt
);
1951 return AVERROR(EINVAL
);
1954 ret
= vk
->CreateImageView(s
->hwctx
->act_dev
, &view_create_info
,
1955 s
->hwctx
->alloc
, img_view
);
1956 if (ret
!= VK_SUCCESS
) {
1957 av_log(s
, AV_LOG_ERROR
, "Failed to create imageview: %s\n",
1958 ff_vk_ret2str(ret
));
1959 return AVERROR_EXTERNAL
;
1962 *aspect
= view_create_info
.subresourceRange
.aspectMask
;
1967 int ff_vk_create_imageviews(FFVulkanContext
*s
, FFVkExecContext
*e
,
1968 VkImageView views
[AV_NUM_DATA_POINTERS
],
1969 AVFrame
*f
, enum FFVkShaderRepFormat rep_fmt
)
1974 FFVulkanFunctions
*vk
= &s
->vkfn
;
1975 AVHWFramesContext
*hwfc
= (AVHWFramesContext
*)f
->hw_frames_ctx
->data
;
1976 AVVulkanFramesContext
*vkfc
= hwfc
->hwctx
;
1977 const VkFormat
*rep_fmts
= av_vkfmt_from_pixfmt(hwfc
->sw_format
);
1978 AVVkFrame
*vkf
= (AVVkFrame
*)f
->data
[0];
1979 const int nb_images
= ff_vk_count_images(vkf
);
1980 const int nb_planes
= av_pix_fmt_count_planes(hwfc
->sw_format
);
1983 const size_t buf_size
= sizeof(*iv
) + nb_planes
*sizeof(VkImageView
);
1984 iv
= av_mallocz(buf_size
);
1986 return AVERROR(ENOMEM
);
1988 for (int i
= 0; i
< nb_planes
; i
++) {
1989 VkImageViewUsageCreateInfo view_usage_info
= {
1990 .sType
= VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO
,
1991 .usage
= vkfc
->usage
&
1992 (~(VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR
|
1993 VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR
)),
1995 VkImageViewCreateInfo view_create_info
= {
1996 .sType
= VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO
,
1997 .pNext
= &view_usage_info
,
1998 .image
= vkf
->img
[FFMIN(i
, nb_images
- 1)],
1999 .viewType
= VK_IMAGE_VIEW_TYPE_2D
,
2000 .format
= map_fmt_to_rep(rep_fmts
[i
], rep_fmt
),
2001 .components
= ff_comp_identity_map
,
2002 .subresourceRange
= {
2003 .aspectMask
= ff_vk_aspect_flag(f
, i
),
2008 if (view_create_info
.format
== VK_FORMAT_UNDEFINED
) {
2009 av_log(s
, AV_LOG_ERROR
, "Unable to find a compatible representation "
2010 "of format %i and mode %i\n",
2011 rep_fmts
[i
], rep_fmt
);
2012 err
= AVERROR(EINVAL
);
2016 ret
= vk
->CreateImageView(s
->hwctx
->act_dev
, &view_create_info
,
2017 s
->hwctx
->alloc
, &iv
->views
[i
]);
2018 if (ret
!= VK_SUCCESS
) {
2019 av_log(s
, AV_LOG_ERROR
, "Failed to create imageview: %s\n",
2020 ff_vk_ret2str(ret
));
2021 err
= AVERROR_EXTERNAL
;
2028 buf
= av_buffer_create((uint8_t *)iv
, buf_size
, destroy_imageviews
, s
, 0);
2030 err
= AVERROR(ENOMEM
);
2034 /* Add to queue dependencies */
2035 err
= ff_vk_exec_add_dep_buf(s
, e
, &buf
, 1, 0);
2037 av_buffer_unref(&buf
);
2039 memcpy(views
, iv
->views
, nb_planes
*sizeof(*views
));
2044 for (int i
= 0; i
< iv
->nb_views
; i
++)
2045 vk
->DestroyImageView(s
->hwctx
->act_dev
, iv
->views
[i
], s
->hwctx
->alloc
);
2050 void ff_vk_frame_barrier(FFVulkanContext
*s
, FFVkExecContext
*e
,
2051 AVFrame
*pic
, VkImageMemoryBarrier2
*bar
, int *nb_bar
,
2052 VkPipelineStageFlags2 src_stage
,
2053 VkPipelineStageFlags2 dst_stage
,
2054 VkAccessFlagBits2 new_access
,
2055 VkImageLayout new_layout
,
2059 AVVkFrame
*vkf
= (AVVkFrame
*)pic
->data
[0];
2060 const int nb_images
= ff_vk_count_images(vkf
);
2061 for (int i
= 0; i
< e
->nb_frame_deps
; i
++)
2062 if (e
->frame_deps
[i
]->data
[0] == pic
->data
[0]) {
2063 if (e
->frame_update
[i
])
2068 for (int i
= 0; i
< nb_images
; i
++) {
2069 bar
[*nb_bar
] = (VkImageMemoryBarrier2
) {
2070 .sType
= VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2
,
2072 .srcStageMask
= src_stage
,
2073 .dstStageMask
= dst_stage
,
2074 .srcAccessMask
= found
>= 0 ? e
->access_dst
[found
] : vkf
->access
[i
],
2075 .dstAccessMask
= new_access
,
2076 .oldLayout
= found
>= 0 ? e
->layout_dst
[found
] : vkf
->layout
[0],
2077 .newLayout
= new_layout
,
2078 .srcQueueFamilyIndex
= found
>= 0 ? e
->queue_family_dst
[found
] : vkf
->queue_family
[0],
2079 .dstQueueFamilyIndex
= new_qf
,
2080 .image
= vkf
->img
[i
],
2081 .subresourceRange
= (VkImageSubresourceRange
) {
2082 .aspectMask
= VK_IMAGE_ASPECT_COLOR_BIT
,
2090 ff_vk_exec_update_frame(s
, e
, pic
, &bar
[*nb_bar
- nb_images
], NULL
);
2093 int ff_vk_shader_init(FFVulkanContext
*s
, FFVulkanShader
*shd
, const char *name
,
2094 VkPipelineStageFlags stage
,
2095 const char *extensions
[], int nb_extensions
,
2096 int lg_x
, int lg_y
, int lg_z
,
2097 uint32_t required_subgroup_size
)
2099 av_bprint_init(&shd
->src
, 0, AV_BPRINT_SIZE_UNLIMITED
);
2103 shd
->lg_size
[0] = lg_x
;
2104 shd
->lg_size
[1] = lg_y
;
2105 shd
->lg_size
[2] = lg_z
;
2107 switch (shd
->stage
) {
2108 case VK_SHADER_STAGE_ANY_HIT_BIT_KHR
:
2109 case VK_SHADER_STAGE_CALLABLE_BIT_KHR
:
2110 case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR
:
2111 case VK_SHADER_STAGE_INTERSECTION_BIT_KHR
:
2112 case VK_SHADER_STAGE_MISS_BIT_KHR
:
2113 case VK_SHADER_STAGE_RAYGEN_BIT_KHR
:
2114 shd
->bind_point
= VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR
;
2116 case VK_SHADER_STAGE_COMPUTE_BIT
:
2117 shd
->bind_point
= VK_PIPELINE_BIND_POINT_COMPUTE
;
2120 shd
->bind_point
= VK_PIPELINE_BIND_POINT_GRAPHICS
;
2124 if (required_subgroup_size
) {
2125 shd
->subgroup_info
.sType
= VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO
;
2126 shd
->subgroup_info
.requiredSubgroupSize
= required_subgroup_size
;
2129 av_bprintf(&shd
->src
, "/* %s shader: %s */\n",
2130 (stage
== VK_SHADER_STAGE_TASK_BIT_EXT
||
2131 stage
== VK_SHADER_STAGE_MESH_BIT_EXT
) ?
2133 (shd
->bind_point
== VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR
) ?
2135 (shd
->bind_point
== VK_PIPELINE_BIND_POINT_COMPUTE
) ?
2136 "Compute" : "Graphics",
2138 GLSLF(0, #version %i ,460);
2141 /* Common utilities */
2142 GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) );
2144 GLSLC(0, #extension GL_EXT_scalar_block_layout : require );
2145 GLSLC(0, #extension GL_EXT_shader_explicit_arithmetic_types : require );
2146 GLSLC(0, #extension GL_EXT_control_flow_attributes : require );
2147 GLSLC(0, #extension GL_EXT_shader_image_load_formatted : require );
2148 if (s
->extensions
& FF_VK_EXT_EXPECT_ASSUME
) {
2149 GLSLC(0, #extension GL_EXT_expect_assume : require );
2151 GLSLC(0, #define assumeEXT(x) (x) );
2152 GLSLC(0, #define expectEXT(x, c) (x) );
2154 if ((s
->extensions
& FF_VK_EXT_DEBUG_UTILS
) &&
2155 (s
->extensions
& FF_VK_EXT_RELAXED_EXTENDED_INSTR
)) {
2156 GLSLC(0, #extension GL_EXT_debug_printf : require );
2157 GLSLC(0, #define DEBUG );
2160 if (stage
== VK_SHADER_STAGE_TASK_BIT_EXT
||
2161 stage
== VK_SHADER_STAGE_MESH_BIT_EXT
)
2162 GLSLC(0, #extension GL_EXT_mesh_shader : require );
2164 for (int i
= 0; i
< nb_extensions
; i
++)
2165 GLSLF(0, #extension %s : %s ,extensions[i], "require");
2168 GLSLF(0, layout (local_size_x
= %i
, local_size_y
= %i
, local_size_z
= %i
) in
;
2169 , shd
->lg_size
[0], shd
->lg_size
[1], shd
->lg_size
[2]);
2175 void ff_vk_shader_print(void *ctx
, FFVulkanShader
*shd
, int prio
)
2178 const char *p
= shd
->src
.str
;
2179 const char *start
= p
;
2180 const size_t len
= strlen(p
);
2183 av_bprint_init(&buf
, 0, AV_BPRINT_SIZE_UNLIMITED
);
2185 for (int i
= 0; i
< len
; i
++) {
2187 av_bprintf(&buf
, "%i\t", ++line
);
2188 av_bprint_append_data(&buf
, start
, &p
[i
] - start
+ 1);
2193 av_log(ctx
, prio
, "Shader %s: \n%s", shd
->name
, buf
.str
);
2194 av_bprint_finalize(&buf
, NULL
);
2197 static int init_pipeline_layout(FFVulkanContext
*s
, FFVulkanShader
*shd
)
2200 FFVulkanFunctions
*vk
= &s
->vkfn
;
2201 VkPipelineLayoutCreateInfo pipeline_layout_info
;
2203 /* Finally create the pipeline layout */
2204 pipeline_layout_info
= (VkPipelineLayoutCreateInfo
) {
2205 .sType
= VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO
,
2206 .pSetLayouts
= shd
->desc_layout
,
2207 .setLayoutCount
= shd
->nb_descriptor_sets
,
2208 .pushConstantRangeCount
= shd
->push_consts_num
,
2209 .pPushConstantRanges
= shd
->push_consts
,
2212 ret
= vk
->CreatePipelineLayout(s
->hwctx
->act_dev
, &pipeline_layout_info
,
2213 s
->hwctx
->alloc
, &shd
->pipeline_layout
);
2214 if (ret
!= VK_SUCCESS
) {
2215 av_log(s
, AV_LOG_ERROR
, "Unable to init pipeline layout: %s\n",
2216 ff_vk_ret2str(ret
));
2217 return AVERROR_EXTERNAL
;
2223 static int create_shader_module(FFVulkanContext
*s
, FFVulkanShader
*shd
,
2224 VkShaderModule
*mod
,
2225 uint8_t *spirv
, size_t spirv_len
)
2228 FFVulkanFunctions
*vk
= &s
->vkfn
;
2230 VkShaderModuleCreateInfo shader_module_info
= {
2231 .sType
= VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO
,
2234 .pCode
= (void *)spirv
,
2235 .codeSize
= spirv_len
,
2238 ret
= vk
->CreateShaderModule(s
->hwctx
->act_dev
, &shader_module_info
,
2239 s
->hwctx
->alloc
, mod
);
2240 if (ret
!= VK_SUCCESS
) {
2241 av_log(s
, AV_LOG_ERROR
, "Error creating shader module: %s\n",
2242 ff_vk_ret2str(ret
));
2243 return AVERROR_EXTERNAL
;
2249 static int init_compute_pipeline(FFVulkanContext
*s
, FFVulkanShader
*shd
,
2250 VkShaderModule mod
, const char *entrypoint
)
2253 FFVulkanFunctions
*vk
= &s
->vkfn
;
2255 VkComputePipelineCreateInfo pipeline_create_info
= {
2256 .sType
= VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO
,
2257 .flags
= (s
->extensions
& FF_VK_EXT_DESCRIPTOR_BUFFER
) ?
2258 VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT
: 0x0,
2259 .layout
= shd
->pipeline_layout
,
2260 .stage
= (VkPipelineShaderStageCreateInfo
) {
2261 .sType
= VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO
,
2262 .pNext
= shd
->subgroup_info
.requiredSubgroupSize
?
2263 &shd
->subgroup_info
: NULL
,
2264 .pName
= entrypoint
,
2265 .flags
= shd
->subgroup_info
.requiredSubgroupSize
?
2266 VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT
: 0x0,
2267 .stage
= shd
->stage
,
2272 ret
= vk
->CreateComputePipelines(s
->hwctx
->act_dev
, VK_NULL_HANDLE
, 1,
2273 &pipeline_create_info
,
2274 s
->hwctx
->alloc
, &shd
->pipeline
);
2275 if (ret
!= VK_SUCCESS
) {
2276 av_log(s
, AV_LOG_ERROR
, "Unable to init compute pipeline: %s\n",
2277 ff_vk_ret2str(ret
));
2278 return AVERROR_EXTERNAL
;
2284 static int create_shader_object(FFVulkanContext
*s
, FFVulkanShader
*shd
,
2285 uint8_t *spirv
, size_t spirv_len
,
2286 const char *entrypoint
)
2289 FFVulkanFunctions
*vk
= &s
->vkfn
;
2290 size_t shader_size
= 0;
2292 VkShaderCreateInfoEXT shader_obj_create
= {
2293 .sType
= VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT
,
2294 .flags
= shd
->subgroup_info
.requiredSubgroupSize
?
2295 VK_SHADER_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT
: 0x0,
2296 .stage
= shd
->stage
,
2298 .codeType
= VK_SHADER_CODE_TYPE_SPIRV_EXT
,
2300 .codeSize
= spirv_len
,
2301 .pName
= entrypoint
,
2302 .pSetLayouts
= shd
->desc_layout
,
2303 .setLayoutCount
= shd
->nb_descriptor_sets
,
2304 .pushConstantRangeCount
= shd
->push_consts_num
,
2305 .pPushConstantRanges
= shd
->push_consts
,
2306 .pSpecializationInfo
= NULL
,
2309 ret
= vk
->CreateShadersEXT(s
->hwctx
->act_dev
, 1, &shader_obj_create
,
2310 s
->hwctx
->alloc
, &shd
->object
);
2311 if (ret
!= VK_SUCCESS
) {
2312 av_log(s
, AV_LOG_ERROR
, "Unable to create shader object: %s\n",
2313 ff_vk_ret2str(ret
));
2314 return AVERROR_EXTERNAL
;
2317 if (vk
->GetShaderBinaryDataEXT(s
->hwctx
->act_dev
, shd
->object
,
2318 &shader_size
, NULL
) == VK_SUCCESS
)
2319 av_log(s
, AV_LOG_VERBOSE
, "Shader %s size: %zu binary (%zu SPIR-V)\n",
2320 shd
->name
, shader_size
, spirv_len
);
2325 static int init_descriptors(FFVulkanContext
*s
, FFVulkanShader
*shd
)
2328 FFVulkanFunctions
*vk
= &s
->vkfn
;
2330 shd
->desc_layout
= av_malloc_array(shd
->nb_descriptor_sets
,
2331 sizeof(*shd
->desc_layout
));
2332 if (!shd
->desc_layout
)
2333 return AVERROR(ENOMEM
);
2335 if (!(s
->extensions
& FF_VK_EXT_DESCRIPTOR_BUFFER
)) {
2336 int has_singular
= 0;
2337 int max_descriptors
= 0;
2338 for (int i
= 0; i
< shd
->nb_descriptor_sets
; i
++) {
2339 max_descriptors
= FFMAX(max_descriptors
, shd
->desc_set
[i
].nb_bindings
);
2340 if (shd
->desc_set
[i
].singular
)
2343 shd
->use_push
= (s
->extensions
& FF_VK_EXT_PUSH_DESCRIPTOR
) &&
2344 (max_descriptors
<= s
->push_desc_props
.maxPushDescriptors
) &&
2345 (shd
->nb_descriptor_sets
== 1) &&
2346 (has_singular
== 0);
2349 for (int i
= 0; i
< shd
->nb_descriptor_sets
; i
++) {
2350 FFVulkanDescriptorSet
*set
= &shd
->desc_set
[i
];
2351 VkDescriptorSetLayoutCreateInfo desc_layout_create
= {
2352 .sType
= VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO
,
2353 .bindingCount
= set
->nb_bindings
,
2354 .pBindings
= set
->binding
,
2355 .flags
= (s
->extensions
& FF_VK_EXT_DESCRIPTOR_BUFFER
) ?
2356 VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT
:
2358 VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR
:
2362 ret
= vk
->CreateDescriptorSetLayout(s
->hwctx
->act_dev
,
2363 &desc_layout_create
,
2365 &shd
->desc_layout
[i
]);
2366 if (ret
!= VK_SUCCESS
) {
2367 av_log(s
, AV_LOG_ERROR
, "Unable to create descriptor set layout: %s",
2368 ff_vk_ret2str(ret
));
2369 return AVERROR_EXTERNAL
;
2372 if (s
->extensions
& FF_VK_EXT_DESCRIPTOR_BUFFER
) {
2373 vk
->GetDescriptorSetLayoutSizeEXT(s
->hwctx
->act_dev
, shd
->desc_layout
[i
],
2376 set
->aligned_size
= FFALIGN(set
->layout_size
,
2377 s
->desc_buf_props
.descriptorBufferOffsetAlignment
);
2379 for (int j
= 0; j
< set
->nb_bindings
; j
++)
2380 vk
->GetDescriptorSetLayoutBindingOffsetEXT(s
->hwctx
->act_dev
,
2381 shd
->desc_layout
[i
],
2383 &set
->binding_offset
[j
]);
2390 int ff_vk_shader_link(FFVulkanContext
*s
, FFVulkanShader
*shd
,
2391 uint8_t *spirv
, size_t spirv_len
,
2392 const char *entrypoint
)
2395 FFVulkanFunctions
*vk
= &s
->vkfn
;
2397 err
= init_descriptors(s
, shd
);
2401 err
= init_pipeline_layout(s
, shd
);
2405 if (s
->extensions
& FF_VK_EXT_DESCRIPTOR_BUFFER
) {
2406 shd
->bound_buffer_indices
= av_calloc(shd
->nb_descriptor_sets
,
2407 sizeof(*shd
->bound_buffer_indices
));
2408 if (!shd
->bound_buffer_indices
)
2409 return AVERROR(ENOMEM
);
2411 for (int i
= 0; i
< shd
->nb_descriptor_sets
; i
++)
2412 shd
->bound_buffer_indices
[i
] = i
;
2415 if (s
->extensions
& FF_VK_EXT_SHADER_OBJECT
) {
2416 err
= create_shader_object(s
, shd
, spirv
, spirv_len
, entrypoint
);
2421 err
= create_shader_module(s
, shd
, &mod
, spirv
, spirv_len
);
2425 switch (shd
->bind_point
) {
2426 case VK_PIPELINE_BIND_POINT_COMPUTE
:
2427 err
= init_compute_pipeline(s
, shd
, mod
, entrypoint
);
2430 av_log(s
, AV_LOG_ERROR
, "Unsupported shader type: %i\n",
2432 err
= AVERROR(EINVAL
);
2436 vk
->DestroyShaderModule(s
->hwctx
->act_dev
, mod
, s
->hwctx
->alloc
);
2444 static const struct descriptor_props
{
2445 size_t struct_size
; /* Size of the opaque which updates the descriptor */
2448 int mem_quali
; /* Can use a memory qualifier */
2449 int dim_needed
; /* Must indicate dimension */
2450 int buf_content
; /* Must indicate buffer contents */
2451 } descriptor_props
[] = {
2452 [VK_DESCRIPTOR_TYPE_SAMPLER
] = { sizeof(VkDescriptorImageInfo
), "sampler", 1, 0, 0, 0, },
2453 [VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE
] = { sizeof(VkDescriptorImageInfo
), "texture", 1, 0, 1, 0, },
2454 [VK_DESCRIPTOR_TYPE_STORAGE_IMAGE
] = { sizeof(VkDescriptorImageInfo
), "image", 1, 1, 1, 0, },
2455 [VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT
] = { sizeof(VkDescriptorImageInfo
), "subpassInput", 1, 0, 0, 0, },
2456 [VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER
] = { sizeof(VkDescriptorImageInfo
), "sampler", 1, 0, 1, 0, },
2457 [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER
] = { sizeof(VkDescriptorBufferInfo
), NULL
, 1, 0, 0, 1, },
2458 [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
] = { sizeof(VkDescriptorBufferInfo
), "buffer", 0, 1, 0, 1, },
2459 [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC
] = { sizeof(VkDescriptorBufferInfo
), NULL
, 1, 0, 0, 1, },
2460 [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC
] = { sizeof(VkDescriptorBufferInfo
), "buffer", 0, 1, 0, 1, },
2461 [VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER
] = { sizeof(VkBufferView
), "samplerBuffer", 1, 0, 0, 0, },
2462 [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER
] = { sizeof(VkBufferView
), "imageBuffer", 1, 0, 0, 0, },
2465 int ff_vk_shader_add_descriptor_set(FFVulkanContext
*s
, FFVulkanShader
*shd
,
2466 FFVulkanDescriptorSetBinding
*desc
, int nb
,
2467 int singular
, int print_to_shader_only
)
2469 int has_sampler
= 0;
2470 FFVulkanDescriptorSet
*set
;
2472 if (print_to_shader_only
)
2475 /* Actual layout allocated for the pipeline */
2476 set
= av_realloc_array(shd
->desc_set
,
2477 sizeof(*shd
->desc_set
),
2478 shd
->nb_descriptor_sets
+ 1);
2480 return AVERROR(ENOMEM
);
2481 shd
->desc_set
= set
;
2483 set
= &set
[shd
->nb_descriptor_sets
];
2484 memset(set
, 0, sizeof(*set
));
2486 set
->binding
= av_calloc(nb
, sizeof(*set
->binding
));
2488 return AVERROR(ENOMEM
);
2490 set
->binding_offset
= av_calloc(nb
, sizeof(*set
->binding_offset
));
2491 if (!set
->binding_offset
) {
2492 av_freep(&set
->binding
);
2493 return AVERROR(ENOMEM
);
2496 for (int i
= 0; i
< nb
; i
++) {
2497 set
->binding
[i
].binding
= i
;
2498 set
->binding
[i
].descriptorType
= desc
[i
].type
;
2499 set
->binding
[i
].descriptorCount
= FFMAX(desc
[i
].elems
, 1);
2500 set
->binding
[i
].stageFlags
= desc
[i
].stages
;
2501 set
->binding
[i
].pImmutableSamplers
= desc
[i
].samplers
;
2503 if (desc
[i
].type
== VK_DESCRIPTOR_TYPE_SAMPLER
||
2504 desc
[i
].type
== VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER
)
2508 set
->usage
= VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT
|
2509 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT
;
2511 set
->usage
|= VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT
;
2513 if (!(s
->extensions
& FF_VK_EXT_DESCRIPTOR_BUFFER
)) {
2514 for (int i
= 0; i
< nb
; i
++) {
2516 VkDescriptorPoolSize
*desc_pool_size
;
2517 for (j
= 0; j
< shd
->nb_desc_pool_size
; j
++)
2518 if (shd
->desc_pool_size
[j
].type
== desc
[i
].type
)
2520 if (j
>= shd
->nb_desc_pool_size
) {
2521 desc_pool_size
= av_realloc_array(shd
->desc_pool_size
,
2522 sizeof(*desc_pool_size
),
2523 shd
->nb_desc_pool_size
+ 1);
2524 if (!desc_pool_size
)
2525 return AVERROR(ENOMEM
);
2527 shd
->desc_pool_size
= desc_pool_size
;
2528 shd
->nb_desc_pool_size
++;
2529 memset(&desc_pool_size
[j
], 0, sizeof(VkDescriptorPoolSize
));
2531 shd
->desc_pool_size
[j
].type
= desc
[i
].type
;
2532 shd
->desc_pool_size
[j
].descriptorCount
+= FFMAX(desc
[i
].elems
, 1);
2536 set
->singular
= singular
;
2537 set
->nb_bindings
= nb
;
2538 shd
->nb_descriptor_sets
++;
2541 /* Write shader info */
2542 for (int i
= 0; i
< nb
; i
++) {
2543 const struct descriptor_props
*prop
= &descriptor_props
[desc
[i
].type
];
2544 GLSLA("layout (set = %i, binding = %i", FFMAX(shd
->nb_descriptor_sets
- 1, 0), i
);
2546 if (desc
[i
].mem_layout
&&
2547 (desc
[i
].type
!= VK_DESCRIPTOR_TYPE_STORAGE_IMAGE
))
2548 GLSLA(", %s", desc
[i
].mem_layout
);
2552 if (prop
->is_uniform
)
2555 if (prop
->mem_quali
&& desc
[i
].mem_quali
)
2556 GLSLA(" %s", desc
[i
].mem_quali
);
2560 if (desc
[i
].type
== VK_DESCRIPTOR_TYPE_STORAGE_IMAGE
) {
2561 if (desc
[i
].mem_layout
) {
2562 int len
= strlen(desc
[i
].mem_layout
);
2563 if (desc
[i
].mem_layout
[len
- 1] == 'i' &&
2564 desc
[i
].mem_layout
[len
- 2] == 'u') {
2566 } else if (desc
[i
].mem_layout
[len
- 1] == 'i') {
2571 GLSLA("%s", prop
->type
);
2574 if (prop
->dim_needed
)
2575 GLSLA("%iD", desc
[i
].dimensions
);
2577 GLSLA(" %s", desc
[i
].name
);
2579 if (prop
->buf_content
) {
2581 if (desc
[i
].buf_elems
) {
2582 GLSLA("%s", desc
[i
].buf_content
);
2583 GLSLA("[%i];", desc
[i
].buf_elems
);
2585 GLSLA("%s", desc
[i
].buf_content
);
2590 if (desc
[i
].elems
> 0)
2591 GLSLA("[%i]", desc
[i
].elems
);
2601 int ff_vk_shader_register_exec(FFVulkanContext
*s
, FFVkExecPool
*pool
,
2602 FFVulkanShader
*shd
)
2605 FFVulkanShaderData
*sd
;
2607 if (!shd
->nb_descriptor_sets
)
2610 sd
= av_realloc_array(pool
->reg_shd
,
2611 sizeof(*pool
->reg_shd
),
2612 pool
->nb_reg_shd
+ 1);
2614 return AVERROR(ENOMEM
);
2617 sd
= &sd
[pool
->nb_reg_shd
++];
2618 memset(sd
, 0, sizeof(*sd
));
2621 sd
->nb_descriptor_sets
= shd
->nb_descriptor_sets
;
2623 if (s
->extensions
& FF_VK_EXT_DESCRIPTOR_BUFFER
) {
2624 sd
->desc_bind
= av_malloc_array(sd
->nb_descriptor_sets
, sizeof(*sd
->desc_bind
));
2626 return AVERROR(ENOMEM
);
2628 sd
->desc_set_buf
= av_calloc(sd
->nb_descriptor_sets
, sizeof(*sd
->desc_set_buf
));
2629 if (!sd
->desc_set_buf
)
2630 return AVERROR(ENOMEM
);
2632 for (int i
= 0; i
< sd
->nb_descriptor_sets
; i
++) {
2633 FFVulkanDescriptorSet
*set
= &shd
->desc_set
[i
];
2634 FFVulkanDescriptorSetData
*sdb
= &sd
->desc_set_buf
[i
];
2635 int nb
= set
->singular
? 1 : pool
->pool_size
;
2637 err
= ff_vk_create_buf(s
, &sdb
->buf
,
2638 set
->aligned_size
*nb
,
2639 NULL
, NULL
, set
->usage
,
2640 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
|
2641 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
|
2642 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
);
2646 err
= ff_vk_map_buffer(s
, &sdb
->buf
, &sdb
->desc_mem
, 0);
2650 sd
->desc_bind
[i
] = (VkDescriptorBufferBindingInfoEXT
) {
2651 .sType
= VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT
,
2652 .usage
= set
->usage
,
2653 .address
= sdb
->buf
.address
,
2656 } else if (!shd
->use_push
) {
2658 FFVulkanFunctions
*vk
= &s
->vkfn
;
2659 VkDescriptorSetLayout
*tmp_layouts
;
2660 VkDescriptorSetAllocateInfo set_alloc_info
;
2661 VkDescriptorPoolCreateInfo pool_create_info
;
2663 for (int i
= 0; i
< shd
->nb_desc_pool_size
; i
++)
2664 shd
->desc_pool_size
[i
].descriptorCount
*= pool
->pool_size
;
2666 pool_create_info
= (VkDescriptorPoolCreateInfo
) {
2667 .sType
= VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO
,
2669 .pPoolSizes
= shd
->desc_pool_size
,
2670 .poolSizeCount
= shd
->nb_desc_pool_size
,
2671 .maxSets
= sd
->nb_descriptor_sets
*pool
->pool_size
,
2674 ret
= vk
->CreateDescriptorPool(s
->hwctx
->act_dev
, &pool_create_info
,
2675 s
->hwctx
->alloc
, &sd
->desc_pool
);
2676 if (ret
!= VK_SUCCESS
) {
2677 av_log(s
, AV_LOG_ERROR
, "Unable to create descriptor pool: %s\n",
2678 ff_vk_ret2str(ret
));
2679 return AVERROR_EXTERNAL
;
2682 tmp_layouts
= av_malloc_array(pool_create_info
.maxSets
, sizeof(*tmp_layouts
));
2684 return AVERROR(ENOMEM
);
2686 /* Colate each execution context's descriptor set layouts */
2687 for (int i
= 0; i
< pool
->pool_size
; i
++)
2688 for (int j
= 0; j
< sd
->nb_descriptor_sets
; j
++)
2689 tmp_layouts
[i
*sd
->nb_descriptor_sets
+ j
] = shd
->desc_layout
[j
];
2691 set_alloc_info
= (VkDescriptorSetAllocateInfo
) {
2692 .sType
= VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO
,
2693 .descriptorPool
= sd
->desc_pool
,
2694 .pSetLayouts
= tmp_layouts
,
2695 .descriptorSetCount
= pool_create_info
.maxSets
,
2698 sd
->desc_sets
= av_malloc_array(pool_create_info
.maxSets
,
2699 sizeof(*tmp_layouts
));
2700 if (!sd
->desc_sets
) {
2701 av_free(tmp_layouts
);
2702 return AVERROR(ENOMEM
);
2704 ret
= vk
->AllocateDescriptorSets(s
->hwctx
->act_dev
, &set_alloc_info
,
2706 av_free(tmp_layouts
);
2707 if (ret
!= VK_SUCCESS
) {
2708 av_log(s
, AV_LOG_ERROR
, "Unable to allocate descriptor set: %s\n",
2709 ff_vk_ret2str(ret
));
2710 av_freep(&sd
->desc_sets
);
2711 return AVERROR_EXTERNAL
;
2718 static inline FFVulkanShaderData
*get_shd_data(FFVkExecContext
*e
,
2719 FFVulkanShader
*shd
)
2721 for (int i
= 0; i
< e
->parent
->nb_reg_shd
; i
++)
2722 if (e
->parent
->reg_shd
[i
].shd
== shd
)
2723 return &e
->parent
->reg_shd
[i
];
2727 static inline void update_set_descriptor(FFVulkanContext
*s
, FFVkExecContext
*e
,
2728 FFVulkanShader
*shd
, int set
,
2729 int bind_idx
, int array_idx
,
2730 VkDescriptorGetInfoEXT
*desc_get_info
,
2733 FFVulkanFunctions
*vk
= &s
->vkfn
;
2734 FFVulkanDescriptorSet
*desc_set
= &shd
->desc_set
[set
];
2735 FFVulkanShaderData
*sd
= get_shd_data(e
, shd
);
2736 const size_t exec_offset
= desc_set
->singular
? 0 : desc_set
->aligned_size
*e
->idx
;
2738 void *desc
= sd
->desc_set_buf
[set
].desc_mem
+ /* Base */
2739 exec_offset
+ /* Execution context */
2740 desc_set
->binding_offset
[bind_idx
] + /* Descriptor binding */
2741 array_idx
*desc_size
; /* Array position */
2743 vk
->GetDescriptorEXT(s
->hwctx
->act_dev
, desc_get_info
, desc_size
, desc
);
2746 static inline void update_set_pool_write(FFVulkanContext
*s
, FFVkExecContext
*e
,
2747 FFVulkanShader
*shd
, int set
,
2748 VkWriteDescriptorSet
*write_info
)
2750 FFVulkanFunctions
*vk
= &s
->vkfn
;
2751 FFVulkanDescriptorSet
*desc_set
= &shd
->desc_set
[set
];
2752 FFVulkanShaderData
*sd
= get_shd_data(e
, shd
);
2754 if (desc_set
->singular
) {
2755 for (int i
= 0; i
< e
->parent
->pool_size
; i
++) {
2756 write_info
->dstSet
= sd
->desc_sets
[i
*sd
->nb_descriptor_sets
+ set
];
2757 vk
->UpdateDescriptorSets(s
->hwctx
->act_dev
, 1, write_info
, 0, NULL
);
2760 if (shd
->use_push
) {
2761 vk
->CmdPushDescriptorSetKHR(e
->buf
,
2763 shd
->pipeline_layout
,
2767 write_info
->dstSet
= sd
->desc_sets
[e
->idx
*sd
->nb_descriptor_sets
+ set
];
2768 vk
->UpdateDescriptorSets(s
->hwctx
->act_dev
, 1, write_info
, 0, NULL
);
2773 int ff_vk_shader_update_img(FFVulkanContext
*s
, FFVkExecContext
*e
,
2774 FFVulkanShader
*shd
, int set
, int bind
, int offs
,
2775 VkImageView view
, VkImageLayout layout
,
2778 FFVulkanDescriptorSet
*desc_set
= &shd
->desc_set
[set
];
2780 if (s
->extensions
& FF_VK_EXT_DESCRIPTOR_BUFFER
) {
2781 VkDescriptorGetInfoEXT desc_get_info
= {
2782 .sType
= VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT
,
2783 .type
= desc_set
->binding
[bind
].descriptorType
,
2785 VkDescriptorImageInfo desc_img_info
= {
2788 .imageLayout
= layout
,
2792 switch (desc_get_info
.type
) {
2793 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE
:
2794 desc_get_info
.data
.pSampledImage
= &desc_img_info
;
2795 desc_size
= s
->desc_buf_props
.sampledImageDescriptorSize
;
2797 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE
:
2798 desc_get_info
.data
.pStorageImage
= &desc_img_info
;
2799 desc_size
= s
->desc_buf_props
.storageImageDescriptorSize
;
2801 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT
:
2802 desc_get_info
.data
.pInputAttachmentImage
= &desc_img_info
;
2803 desc_size
= s
->desc_buf_props
.inputAttachmentDescriptorSize
;
2805 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER
:
2806 desc_get_info
.data
.pCombinedImageSampler
= &desc_img_info
;
2807 desc_size
= s
->desc_buf_props
.combinedImageSamplerDescriptorSize
;
2810 av_log(s
, AV_LOG_ERROR
, "Invalid descriptor type at set %i binding %i: %i!\n",
2811 set
, bind
, desc_get_info
.type
);
2812 return AVERROR(EINVAL
);
2816 update_set_descriptor(s
, e
, shd
, set
, bind
, offs
,
2817 &desc_get_info
, desc_size
);
2819 VkDescriptorImageInfo desc_pool_write_info_img
= {
2822 .imageLayout
= layout
,
2824 VkWriteDescriptorSet desc_pool_write_info
= {
2825 .sType
= VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET
,
2827 .descriptorCount
= 1,
2828 .dstArrayElement
= offs
,
2829 .descriptorType
= desc_set
->binding
[bind
].descriptorType
,
2830 .pImageInfo
= &desc_pool_write_info_img
,
2832 update_set_pool_write(s
, e
, shd
, set
, &desc_pool_write_info
);
2838 void ff_vk_shader_update_img_array(FFVulkanContext
*s
, FFVkExecContext
*e
,
2839 FFVulkanShader
*shd
, AVFrame
*f
,
2840 VkImageView
*views
, int set
, int binding
,
2841 VkImageLayout layout
, VkSampler sampler
)
2843 AVHWFramesContext
*hwfc
= (AVHWFramesContext
*)f
->hw_frames_ctx
->data
;
2844 const int nb_planes
= av_pix_fmt_count_planes(hwfc
->sw_format
);
2846 for (int i
= 0; i
< nb_planes
; i
++)
2847 ff_vk_shader_update_img(s
, e
, shd
, set
, binding
, i
,
2848 views
[i
], layout
, sampler
);
2851 int ff_vk_shader_update_desc_buffer(FFVulkanContext
*s
, FFVkExecContext
*e
,
2852 FFVulkanShader
*shd
,
2853 int set
, int bind
, int elem
,
2854 FFVkBuffer
*buf
, VkDeviceSize offset
, VkDeviceSize len
,
2857 FFVulkanDescriptorSet
*desc_set
= &shd
->desc_set
[set
];
2859 if (s
->extensions
& FF_VK_EXT_DESCRIPTOR_BUFFER
) {
2860 VkDescriptorGetInfoEXT desc_get_info
= {
2861 .sType
= VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT
,
2862 .type
= desc_set
->binding
[bind
].descriptorType
,
2864 VkDescriptorAddressInfoEXT desc_buf_info
= {
2865 .sType
= VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT
,
2866 .address
= buf
->address
+ offset
,
2872 switch (desc_get_info
.type
) {
2873 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER
:
2874 desc_get_info
.data
.pUniformBuffer
= &desc_buf_info
;
2875 desc_size
= s
->desc_buf_props
.uniformBufferDescriptorSize
;
2877 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
:
2878 desc_get_info
.data
.pStorageBuffer
= &desc_buf_info
;
2879 desc_size
= s
->desc_buf_props
.storageBufferDescriptorSize
;
2881 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER
:
2882 desc_get_info
.data
.pUniformTexelBuffer
= &desc_buf_info
;
2883 desc_size
= s
->desc_buf_props
.uniformTexelBufferDescriptorSize
;
2885 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER
:
2886 desc_get_info
.data
.pStorageTexelBuffer
= &desc_buf_info
;
2887 desc_size
= s
->desc_buf_props
.storageTexelBufferDescriptorSize
;
2890 av_log(s
, AV_LOG_ERROR
, "Invalid descriptor type at set %i binding %i: %i!\n",
2891 set
, bind
, desc_get_info
.type
);
2892 return AVERROR(EINVAL
);
2896 update_set_descriptor(s
, e
, shd
, set
, bind
, elem
, &desc_get_info
, desc_size
);
2898 VkDescriptorBufferInfo desc_pool_write_info_buf
= {
2900 .offset
= buf
->virtual_offset
+ offset
,
2903 VkWriteDescriptorSet desc_pool_write_info
= {
2904 .sType
= VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET
,
2906 .descriptorCount
= 1,
2907 .dstArrayElement
= elem
,
2908 .descriptorType
= desc_set
->binding
[bind
].descriptorType
,
2909 .pBufferInfo
= &desc_pool_write_info_buf
,
2911 update_set_pool_write(s
, e
, shd
, set
, &desc_pool_write_info
);
2917 void ff_vk_shader_update_push_const(FFVulkanContext
*s
, FFVkExecContext
*e
,
2918 FFVulkanShader
*shd
,
2919 VkShaderStageFlagBits stage
,
2920 int offset
, size_t size
, void *src
)
2922 FFVulkanFunctions
*vk
= &s
->vkfn
;
2923 vk
->CmdPushConstants(e
->buf
, shd
->pipeline_layout
,
2924 stage
, offset
, size
, src
);
2927 void ff_vk_exec_bind_shader(FFVulkanContext
*s
, FFVkExecContext
*e
,
2928 FFVulkanShader
*shd
)
2930 FFVulkanFunctions
*vk
= &s
->vkfn
;
2931 VkDeviceSize offsets
[1024];
2932 FFVulkanShaderData
*sd
= get_shd_data(e
, shd
);
2934 if (s
->extensions
& FF_VK_EXT_SHADER_OBJECT
) {
2935 VkShaderStageFlagBits stages
= shd
->stage
;
2936 vk
->CmdBindShadersEXT(e
->buf
, 1, &stages
, &shd
->object
);
2938 vk
->CmdBindPipeline(e
->buf
, shd
->bind_point
, shd
->pipeline
);
2941 if (sd
&& sd
->nb_descriptor_sets
) {
2942 if (s
->extensions
& FF_VK_EXT_DESCRIPTOR_BUFFER
) {
2943 for (int i
= 0; i
< sd
->nb_descriptor_sets
; i
++)
2944 offsets
[i
] = shd
->desc_set
[i
].singular
? 0 : shd
->desc_set
[i
].aligned_size
*e
->idx
;
2946 /* Bind descriptor buffers */
2947 vk
->CmdBindDescriptorBuffersEXT(e
->buf
, sd
->nb_descriptor_sets
, sd
->desc_bind
);
2948 /* Binding offsets */
2949 vk
->CmdSetDescriptorBufferOffsetsEXT(e
->buf
, shd
->bind_point
, shd
->pipeline_layout
,
2950 0, sd
->nb_descriptor_sets
,
2951 shd
->bound_buffer_indices
, offsets
);
2952 } else if (!shd
->use_push
) {
2953 vk
->CmdBindDescriptorSets(e
->buf
, shd
->bind_point
, shd
->pipeline_layout
,
2954 0, sd
->nb_descriptor_sets
,
2955 &sd
->desc_sets
[e
->idx
*sd
->nb_descriptor_sets
],
2961 void ff_vk_shader_free(FFVulkanContext
*s
, FFVulkanShader
*shd
)
2963 FFVulkanFunctions
*vk
= &s
->vkfn
;
2965 av_bprint_finalize(&shd
->src
, NULL
);
2968 if (shd
->shader
.module
)
2969 vk
->DestroyShaderModule(s
->hwctx
->act_dev
, shd
->shader
.module
,
2974 vk
->DestroyShaderEXT(s
->hwctx
->act_dev
, shd
->object
, s
->hwctx
->alloc
);
2976 vk
->DestroyPipeline(s
->hwctx
->act_dev
, shd
->pipeline
, s
->hwctx
->alloc
);
2977 if (shd
->pipeline_layout
)
2978 vk
->DestroyPipelineLayout(s
->hwctx
->act_dev
, shd
->pipeline_layout
,
2981 for (int i
= 0; i
< shd
->nb_descriptor_sets
; i
++) {
2982 FFVulkanDescriptorSet
*set
= &shd
->desc_set
[i
];
2983 av_free(set
->binding
);
2984 av_free(set
->binding_offset
);
2987 if (shd
->desc_layout
) {
2988 for (int i
= 0; i
< shd
->nb_descriptor_sets
; i
++)
2989 if (shd
->desc_layout
[i
])
2990 vk
->DestroyDescriptorSetLayout(s
->hwctx
->act_dev
, shd
->desc_layout
[i
],
2994 av_freep(&shd
->desc_pool_size
);
2995 av_freep(&shd
->desc_layout
);
2996 av_freep(&shd
->desc_set
);
2997 av_freep(&shd
->bound_buffer_indices
);
2998 av_freep(&shd
->push_consts
);
2999 shd
->push_consts_num
= 0;
3002 void ff_vk_uninit(FFVulkanContext
*s
)
3004 av_freep(&s
->query_props
);
3005 av_freep(&s
->qf_props
);
3006 av_freep(&s
->video_props
);
3007 av_freep(&s
->coop_mat_props
);
3008 av_freep(&s
->host_image_copy_layouts
);
3010 av_buffer_unref(&s
->device_ref
);
3011 av_buffer_unref(&s
->frames_ref
);
3014 int ff_vk_init(FFVulkanContext
*s
, void *log_parent
,
3015 AVBufferRef
*device_ref
, AVBufferRef
*frames_ref
)
3019 static const AVClass vulkan_context_class
= {
3021 .version
= LIBAVUTIL_VERSION_INT
,
3022 .parent_log_context_offset
= offsetof(FFVulkanContext
, log_parent
),
3025 memset(s
, 0, sizeof(*s
));
3026 s
->log_parent
= log_parent
;
3027 s
->class = &vulkan_context_class
;
3030 s
->frames_ref
= av_buffer_ref(frames_ref
);
3032 return AVERROR(ENOMEM
);
3034 s
->frames
= (AVHWFramesContext
*)s
->frames_ref
->data
;
3035 s
->hwfc
= s
->frames
->hwctx
;
3037 device_ref
= s
->frames
->device_ref
;
3040 s
->device_ref
= av_buffer_ref(device_ref
);
3041 if (!s
->device_ref
) {
3043 return AVERROR(ENOMEM
);
3046 s
->device
= (AVHWDeviceContext
*)s
->device_ref
->data
;
3047 s
->hwctx
= s
->device
->hwctx
;
3049 s
->extensions
= ff_vk_extensions_to_mask(s
->hwctx
->enabled_dev_extensions
,
3050 s
->hwctx
->nb_enabled_dev_extensions
);
3051 s
->extensions
|= ff_vk_extensions_to_mask(s
->hwctx
->enabled_inst_extensions
,
3052 s
->hwctx
->nb_enabled_inst_extensions
);
3054 err
= ff_vk_load_functions(s
->device
, &s
->vkfn
, s
->extensions
, 1, 1);
3060 err
= ff_vk_load_props(s
);