avcodec/x86/h264_idct: Fix ff_h264_luma_dc_dequant_idct_sse2 checkasm failures
[ffmpeg.git] / libavutil / hwcontext_vulkan.c
1 /*
2 * Copyright (c) Lynne
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #define VK_NO_PROTOTYPES
22 #define VK_ENABLE_BETA_EXTENSIONS
23
24 #ifdef _WIN32
25 #include <windows.h> /* Included to prevent conflicts with CreateSemaphore */
26 #include <versionhelpers.h>
27 #include "compat/w32dlfcn.h"
28 #else
29 #include <dlfcn.h>
30 #include <unistd.h>
31 #endif
32
33 #include "thread.h"
34
35 #include "config.h"
36 #include "pixdesc.h"
37 #include "avstring.h"
38 #include "imgutils.h"
39 #include "hwcontext.h"
40 #include "hwcontext_internal.h"
41 #include "hwcontext_vulkan.h"
42 #include "mem.h"
43
44 #include "vulkan.h"
45 #include "vulkan_loader.h"
46
47 #if CONFIG_VAAPI
48 #include "hwcontext_vaapi.h"
49 #endif
50
51 #if CONFIG_LIBDRM
52 #if CONFIG_VAAPI
53 #include <va/va_drmcommon.h>
54 #endif
55 #ifdef __linux__
56 #include <sys/sysmacros.h>
57 #endif
58 #include <sys/stat.h>
59 #include <xf86drm.h>
60 #include <drm_fourcc.h>
61 #include "hwcontext_drm.h"
62 #endif
63
64 #if HAVE_LINUX_DMA_BUF_H
65 #include <sys/ioctl.h>
66 #include <linux/dma-buf.h>
67 #endif
68
69 #if CONFIG_CUDA
70 #include "hwcontext_cuda_internal.h"
71 #include "cuda_check.h"
72 #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
73 #endif
74
75 typedef struct VulkanDeviceFeatures {
76 VkPhysicalDeviceFeatures2 device;
77
78 VkPhysicalDeviceVulkan11Features vulkan_1_1;
79 VkPhysicalDeviceVulkan12Features vulkan_1_2;
80 VkPhysicalDeviceVulkan13Features vulkan_1_3;
81 VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore;
82 VkPhysicalDeviceShaderSubgroupRotateFeaturesKHR subgroup_rotate;
83 VkPhysicalDeviceHostImageCopyFeaturesEXT host_image_copy;
84
85 #ifdef VK_EXT_zero_initialize_device_memory
86 VkPhysicalDeviceZeroInitializeDeviceMemoryFeaturesEXT zero_initialize;
87 #endif
88
89 #ifdef VK_KHR_shader_expect_assume
90 VkPhysicalDeviceShaderExpectAssumeFeaturesKHR expect_assume;
91 #endif
92
93 VkPhysicalDeviceVideoMaintenance1FeaturesKHR video_maintenance_1;
94 #ifdef VK_KHR_video_maintenance2
95 VkPhysicalDeviceVideoMaintenance2FeaturesKHR video_maintenance_2;
96 #endif
97 #ifdef VK_KHR_video_decode_vp9
98 VkPhysicalDeviceVideoDecodeVP9FeaturesKHR vp9_decode;
99 #endif
100 #ifdef VK_KHR_video_encode_av1
101 VkPhysicalDeviceVideoEncodeAV1FeaturesKHR av1_encode;
102 #endif
103
104 VkPhysicalDeviceShaderObjectFeaturesEXT shader_object;
105 VkPhysicalDeviceCooperativeMatrixFeaturesKHR cooperative_matrix;
106 VkPhysicalDeviceDescriptorBufferFeaturesEXT descriptor_buffer;
107 VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float;
108
109 #ifdef VK_KHR_shader_relaxed_extended_instruction
110 VkPhysicalDeviceShaderRelaxedExtendedInstructionFeaturesKHR relaxed_extended_instruction;
111 #endif
112 } VulkanDeviceFeatures;
113
114 typedef struct VulkanDevicePriv {
115 /**
116 * The public AVVulkanDeviceContext. See hwcontext_vulkan.h for it.
117 */
118 AVVulkanDeviceContext p;
119
120 /* Vulkan library and loader functions */
121 void *libvulkan;
122
123 FFVulkanContext vkctx;
124 AVVulkanDeviceQueueFamily *compute_qf;
125 AVVulkanDeviceQueueFamily *transfer_qf;
126
127 /* Properties */
128 VkPhysicalDeviceProperties2 props;
129 VkPhysicalDeviceMemoryProperties mprops;
130 VkPhysicalDeviceExternalMemoryHostPropertiesEXT hprops;
131 VkPhysicalDeviceDriverProperties dprops;
132
133 /* Opaque FD external semaphore properties */
134 VkExternalSemaphoreProperties ext_sem_props_opaque;
135
136 /* Enabled features */
137 VulkanDeviceFeatures feats;
138
139 /* Queues */
140 pthread_mutex_t **qf_mutex;
141 uint32_t nb_tot_qfs;
142 uint32_t img_qfs[64];
143 uint32_t nb_img_qfs;
144
145 /* Debug callback */
146 VkDebugUtilsMessengerEXT debug_ctx;
147
148 /* Settings */
149 int use_linear_images;
150
151 /* Option to allocate all image planes in a single allocation */
152 int contiguous_planes;
153
154 /* Disable multiplane images */
155 int disable_multiplane;
156
157 /* Prefer memcpy over dynamic host pointer imports */
158 int avoid_host_import;
159
160 /* Maximum queues */
161 int limit_queues;
162 } VulkanDevicePriv;
163
164 typedef struct VulkanFramesPriv {
165 /**
166 * The public AVVulkanFramesContext. See hwcontext_vulkan.h for it.
167 */
168 AVVulkanFramesContext p;
169
170 /* Image conversions */
171 FFVkExecPool compute_exec;
172
173 /* Image transfers */
174 FFVkExecPool upload_exec;
175 FFVkExecPool download_exec;
176
177 /* Temporary buffer pools */
178 AVBufferPool *tmp;
179
180 /* Modifier info list to free at uninit */
181 VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
182
183 /* Properties for DRM modifier for each plane in the image */
184 VkDrmFormatModifierPropertiesEXT drm_format_modifier_properties[5];
185 } VulkanFramesPriv;
186
187 typedef struct AVVkFrameInternal {
188 pthread_mutex_t update_mutex;
189
190 #if CONFIG_CUDA
191 /* Importing external memory into cuda is really expensive so we keep the
192 * memory imported all the time */
193 AVBufferRef *cuda_fc_ref; /* Need to keep it around for uninit */
194 CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS];
195 CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS];
196 CUarray cu_array[AV_NUM_DATA_POINTERS];
197 CUexternalSemaphore cu_sem[AV_NUM_DATA_POINTERS];
198 #ifdef _WIN32
199 HANDLE ext_mem_handle[AV_NUM_DATA_POINTERS];
200 HANDLE ext_sem_handle[AV_NUM_DATA_POINTERS];
201 #endif
202 #endif
203 } AVVkFrameInternal;
204
205 /* Initialize all structs in VulkanDeviceFeatures */
206 static void device_features_init(AVHWDeviceContext *ctx, VulkanDeviceFeatures *feats)
207 {
208 VulkanDevicePriv *p = ctx->hwctx;
209 FFVulkanContext *s = &p->vkctx;
210
211 feats->device = (VkPhysicalDeviceFeatures2) {
212 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
213 };
214
215 FF_VK_STRUCT_EXT(s, &feats->device, &feats->vulkan_1_1, FF_VK_EXT_NO_FLAG,
216 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
217 FF_VK_STRUCT_EXT(s, &feats->device, &feats->vulkan_1_2, FF_VK_EXT_NO_FLAG,
218 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
219 FF_VK_STRUCT_EXT(s, &feats->device, &feats->vulkan_1_3, FF_VK_EXT_NO_FLAG,
220 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES);
221
222 FF_VK_STRUCT_EXT(s, &feats->device, &feats->timeline_semaphore, FF_VK_EXT_PORTABILITY_SUBSET,
223 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES);
224 FF_VK_STRUCT_EXT(s, &feats->device, &feats->subgroup_rotate, FF_VK_EXT_SUBGROUP_ROTATE,
225 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_ROTATE_FEATURES_KHR);
226 FF_VK_STRUCT_EXT(s, &feats->device, &feats->host_image_copy, FF_VK_EXT_HOST_IMAGE_COPY,
227 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_IMAGE_COPY_FEATURES_EXT);
228
229 #ifdef VK_EXT_zero_initialize_device_memory
230 FF_VK_STRUCT_EXT(s, &feats->device, &feats->zero_initialize, FF_VK_EXT_ZERO_INITIALIZE,
231 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ZERO_INITIALIZE_DEVICE_MEMORY_FEATURES_EXT);
232 #endif
233
234 #ifdef VK_KHR_shader_expect_assume
235 FF_VK_STRUCT_EXT(s, &feats->device, &feats->expect_assume, FF_VK_EXT_EXPECT_ASSUME,
236 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_EXPECT_ASSUME_FEATURES_KHR);
237 #endif
238
239 FF_VK_STRUCT_EXT(s, &feats->device, &feats->video_maintenance_1, FF_VK_EXT_VIDEO_MAINTENANCE_1,
240 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_MAINTENANCE_1_FEATURES_KHR);
241 #ifdef VK_KHR_video_maintenance2
242 FF_VK_STRUCT_EXT(s, &feats->device, &feats->video_maintenance_2, FF_VK_EXT_VIDEO_MAINTENANCE_2,
243 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_MAINTENANCE_2_FEATURES_KHR);
244 #endif
245 #ifdef VK_KHR_video_decode_vp9
246 FF_VK_STRUCT_EXT(s, &feats->device, &feats->vp9_decode, FF_VK_EXT_VIDEO_DECODE_VP9,
247 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_DECODE_VP9_FEATURES_KHR);
248 #endif
249 #ifdef VK_KHR_video_encode_av1
250 FF_VK_STRUCT_EXT(s, &feats->device, &feats->av1_encode, FF_VK_EXT_VIDEO_ENCODE_AV1,
251 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_ENCODE_AV1_FEATURES_KHR);
252 #endif
253
254 FF_VK_STRUCT_EXT(s, &feats->device, &feats->shader_object, FF_VK_EXT_SHADER_OBJECT,
255 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_FEATURES_EXT);
256 FF_VK_STRUCT_EXT(s, &feats->device, &feats->cooperative_matrix, FF_VK_EXT_COOP_MATRIX,
257 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR);
258 FF_VK_STRUCT_EXT(s, &feats->device, &feats->descriptor_buffer, FF_VK_EXT_DESCRIPTOR_BUFFER,
259 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT);
260 FF_VK_STRUCT_EXT(s, &feats->device, &feats->atomic_float, FF_VK_EXT_ATOMIC_FLOAT,
261 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT);
262
263 #ifdef VK_KHR_shader_relaxed_extended_instruction
264 FF_VK_STRUCT_EXT(s, &feats->device, &feats->relaxed_extended_instruction, FF_VK_EXT_RELAXED_EXTENDED_INSTR,
265 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_RELAXED_EXTENDED_INSTRUCTION_FEATURES_KHR);
266 #endif
267 }
268
269 /* Copy all needed device features */
270 static void device_features_copy_needed(VulkanDeviceFeatures *dst, VulkanDeviceFeatures *src)
271 {
272 #define COPY_VAL(VAL) \
273 do { \
274 dst->VAL = src->VAL; \
275 } while (0) \
276
277 COPY_VAL(device.features.shaderImageGatherExtended);
278 COPY_VAL(device.features.shaderStorageImageReadWithoutFormat);
279 COPY_VAL(device.features.shaderStorageImageWriteWithoutFormat);
280 COPY_VAL(device.features.fragmentStoresAndAtomics);
281 COPY_VAL(device.features.vertexPipelineStoresAndAtomics);
282 COPY_VAL(device.features.shaderInt64);
283 COPY_VAL(device.features.shaderInt16);
284 COPY_VAL(device.features.shaderFloat64);
285 COPY_VAL(device.features.shaderStorageImageReadWithoutFormat);
286 COPY_VAL(device.features.shaderStorageImageWriteWithoutFormat);
287
288 COPY_VAL(vulkan_1_1.samplerYcbcrConversion);
289 COPY_VAL(vulkan_1_1.storagePushConstant16);
290 COPY_VAL(vulkan_1_1.storageBuffer16BitAccess);
291 COPY_VAL(vulkan_1_1.uniformAndStorageBuffer16BitAccess);
292
293 COPY_VAL(vulkan_1_2.timelineSemaphore);
294 COPY_VAL(vulkan_1_2.scalarBlockLayout);
295 COPY_VAL(vulkan_1_2.bufferDeviceAddress);
296 COPY_VAL(vulkan_1_2.hostQueryReset);
297 COPY_VAL(vulkan_1_2.storagePushConstant8);
298 COPY_VAL(vulkan_1_2.shaderInt8);
299 COPY_VAL(vulkan_1_2.storageBuffer8BitAccess);
300 COPY_VAL(vulkan_1_2.uniformAndStorageBuffer8BitAccess);
301 COPY_VAL(vulkan_1_2.shaderFloat16);
302 COPY_VAL(vulkan_1_2.shaderBufferInt64Atomics);
303 COPY_VAL(vulkan_1_2.shaderSharedInt64Atomics);
304 COPY_VAL(vulkan_1_2.vulkanMemoryModel);
305 COPY_VAL(vulkan_1_2.vulkanMemoryModelDeviceScope);
306 COPY_VAL(vulkan_1_2.uniformBufferStandardLayout);
307 COPY_VAL(vulkan_1_2.runtimeDescriptorArray);
308
309 COPY_VAL(vulkan_1_3.dynamicRendering);
310 COPY_VAL(vulkan_1_3.maintenance4);
311 COPY_VAL(vulkan_1_3.synchronization2);
312 COPY_VAL(vulkan_1_3.computeFullSubgroups);
313 COPY_VAL(vulkan_1_3.subgroupSizeControl);
314 COPY_VAL(vulkan_1_3.shaderZeroInitializeWorkgroupMemory);
315 COPY_VAL(vulkan_1_3.dynamicRendering);
316
317 COPY_VAL(timeline_semaphore.timelineSemaphore);
318 COPY_VAL(subgroup_rotate.shaderSubgroupRotate);
319 COPY_VAL(host_image_copy.hostImageCopy);
320
321 #ifdef VK_EXT_zero_initialize_device_memory
322 COPY_VAL(zero_initialize.zeroInitializeDeviceMemory);
323 #endif
324
325 COPY_VAL(video_maintenance_1.videoMaintenance1);
326 #ifdef VK_KHR_video_maintenance2
327 COPY_VAL(video_maintenance_2.videoMaintenance2);
328 #endif
329
330 #ifdef VK_KHR_video_decode_vp9
331 COPY_VAL(vp9_decode.videoDecodeVP9);
332 #endif
333
334 #ifdef VK_KHR_video_encode_av1
335 COPY_VAL(av1_encode.videoEncodeAV1);
336 #endif
337
338 COPY_VAL(shader_object.shaderObject);
339
340 COPY_VAL(cooperative_matrix.cooperativeMatrix);
341
342 COPY_VAL(descriptor_buffer.descriptorBuffer);
343 COPY_VAL(descriptor_buffer.descriptorBufferPushDescriptors);
344
345 COPY_VAL(atomic_float.shaderBufferFloat32Atomics);
346 COPY_VAL(atomic_float.shaderBufferFloat32AtomicAdd);
347
348 #ifdef VK_KHR_shader_relaxed_extended_instruction
349 COPY_VAL(relaxed_extended_instruction.shaderRelaxedExtendedInstruction);
350 #endif
351
352 #ifdef VK_KHR_shader_expect_assume
353 COPY_VAL(expect_assume.shaderExpectAssume);
354 #endif
355
356 #undef COPY_VAL
357 }
358
359 #define ASPECT_2PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT)
360 #define ASPECT_3PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)
361
362 static const struct FFVkFormatEntry {
363 VkFormat vkf;
364 enum AVPixelFormat pixfmt;
365 VkImageAspectFlags aspect;
366 int vk_planes;
367 int nb_images;
368 int nb_images_fallback;
369 const VkFormat fallback[5];
370 } vk_formats_list[] = {
371 /* Gray formats */
372 { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GRAY8, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8_UNORM } },
373 { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } },
374 { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY12, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } },
375 { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY14, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } },
376 { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } },
377 { VK_FORMAT_R32_UINT, AV_PIX_FMT_GRAY32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32_UINT } },
378 { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GRAYF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32_SFLOAT } },
379
380 /* RGB formats */
381 { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGRA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } },
382 { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGBA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
383 { VK_FORMAT_R8G8B8_UNORM, AV_PIX_FMT_RGB24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8_UNORM } },
384 { VK_FORMAT_B8G8R8_UNORM, AV_PIX_FMT_BGR24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8_UNORM } },
385 { VK_FORMAT_R16G16B16_UNORM, AV_PIX_FMT_RGB48, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16_UNORM } },
386 { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_RGBA64, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
387 { VK_FORMAT_R5G6B5_UNORM_PACK16, AV_PIX_FMT_RGB565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
388 { VK_FORMAT_B5G6R5_UNORM_PACK16, AV_PIX_FMT_BGR565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
389 { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGR0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } },
390 { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGB0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
391 { VK_FORMAT_A2R10G10B10_UNORM_PACK32, AV_PIX_FMT_X2RGB10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } },
392 { VK_FORMAT_A2B10G10R10_UNORM_PACK32, AV_PIX_FMT_X2BGR10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_A2B10G10R10_UNORM_PACK32 } },
393 { VK_FORMAT_R32G32B32_SFLOAT, AV_PIX_FMT_RGBF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32G32B32_SFLOAT } },
394 { VK_FORMAT_R32G32B32A32_SFLOAT, AV_PIX_FMT_RGBAF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32G32B32A32_SFLOAT } },
395 { VK_FORMAT_R32G32B32_UINT, AV_PIX_FMT_RGB96, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32G32B32_UINT } },
396 { VK_FORMAT_R32G32B32A32_UINT, AV_PIX_FMT_RGBA128, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32G32B32A32_UINT } },
397
398 /* Planar RGB */
399 { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GBRP, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
400 { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRP10, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
401 { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRP12, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
402 { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRP14, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
403 { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRP16, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
404 { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRPF32, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
405
406 /* Planar RGB + Alpha */
407 { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GBRAP, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
408 { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP10, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
409 { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP12, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
410 { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP14, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
411 { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP16, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
412 { VK_FORMAT_R32_UINT, AV_PIX_FMT_GBRAP32, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R32_UINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32_UINT } },
413 { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRAPF32, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
414
415 /* Bayer */
416 { VK_FORMAT_R16_UNORM, AV_PIX_FMT_BAYER_RGGB16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } },
417
418 /* Two-plane 420 YUV at 8, 10, 12 and 16 bits */
419 { VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, AV_PIX_FMT_NV12, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
420 { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P010, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
421 { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P012, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
422 { VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, AV_PIX_FMT_P016, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
423
424 /* Two-plane 422 YUV at 8, 10 and 16 bits */
425 { VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, AV_PIX_FMT_NV16, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
426 { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P210, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
427 { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P212, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
428 { VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, AV_PIX_FMT_P216, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
429
430 /* Two-plane 444 YUV at 8, 10 and 16 bits */
431 { VK_FORMAT_G8_B8R8_2PLANE_444_UNORM, AV_PIX_FMT_NV24, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
432 { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P410, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
433 { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P412, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
434 { VK_FORMAT_G16_B16R16_2PLANE_444_UNORM, AV_PIX_FMT_P416, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
435
436 /* Three-plane 420, 422, 444 at 8, 10, 12 and 16 bits */
437 { VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
438 { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
439 { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
440 { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
441 { VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
442 { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
443 { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
444 { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
445 { VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
446 { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
447 { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
448 { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
449
450 /* Single plane 422 at 8, 10, 12 and 16 bits */
451 { VK_FORMAT_G8B8G8R8_422_UNORM, AV_PIX_FMT_YUYV422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
452 { VK_FORMAT_B8G8R8G8_422_UNORM, AV_PIX_FMT_UYVY422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
453 { VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16, AV_PIX_FMT_Y210, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
454 { VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16, AV_PIX_FMT_Y212, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
455 { VK_FORMAT_G16B16G16R16_422_UNORM, AV_PIX_FMT_Y216, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
456
457 /* Planar YUVA 420 at 8, 10 and 16 bits */
458 { VK_FORMAT_R8_UNORM, AV_PIX_FMT_YUVA420P, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
459 { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA420P10, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
460 { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA420P16, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
461
462 /* Planar YUVA 422 at 8, 10, 12 and 16 bits */
463 { VK_FORMAT_R8_UNORM, AV_PIX_FMT_YUVA422P, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
464 { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA422P10, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
465 { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA422P12, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
466 { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA422P16, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
467
468 /* Planar YUVA 444 at 8, 10, 12 and 16 bits */
469 { VK_FORMAT_R8_UNORM, AV_PIX_FMT_YUVA444P, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
470 { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA444P10, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
471 { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA444P12, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
472 { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA444P16, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
473
474 /* Single plane 444 at 8, 10, 12 and 16 bits */
475 { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_UYVA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } },
476 { VK_FORMAT_A2R10G10B10_UNORM_PACK32, AV_PIX_FMT_XV30, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
477 { VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16, AV_PIX_FMT_XV36, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
478 { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_XV48, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
479 };
480 static const int nb_vk_formats_list = FF_ARRAY_ELEMS(vk_formats_list);
481
482 const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p)
483 {
484 for (int i = 0; i < nb_vk_formats_list; i++)
485 if (vk_formats_list[i].pixfmt == p)
486 return vk_formats_list[i].fallback;
487 return NULL;
488 }
489
490 static const struct FFVkFormatEntry *vk_find_format_entry(enum AVPixelFormat p)
491 {
492 for (int i = 0; i < nb_vk_formats_list; i++)
493 if (vk_formats_list[i].pixfmt == p)
494 return &vk_formats_list[i];
495 return NULL;
496 }
497
498 static int vkfmt_from_pixfmt2(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p,
499 VkImageTiling tiling,
500 VkFormat fmts[AV_NUM_DATA_POINTERS], /* Output format list */
501 int *nb_images, /* Output number of images */
502 VkImageAspectFlags *aspect, /* Output aspect */
503 VkImageUsageFlags *supported_usage, /* Output supported usage */
504 int disable_multiplane, int need_storage)
505 {
506 VulkanDevicePriv *priv = dev_ctx->hwctx;
507 AVVulkanDeviceContext *hwctx = &priv->p;
508 FFVulkanFunctions *vk = &priv->vkctx.vkfn;
509
510 const VkFormatFeatureFlagBits2 basic_flags = VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT |
511 VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT |
512 VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT;
513
514 for (int i = 0; i < nb_vk_formats_list; i++) {
515 if (vk_formats_list[i].pixfmt == p) {
516 VkFormatProperties3 fprops = {
517 .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3,
518 };
519 VkFormatProperties2 prop = {
520 .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
521 .pNext = &fprops,
522 };
523 VkFormatFeatureFlagBits2 feats_primary, feats_secondary;
524 int basics_primary = 0, basics_secondary = 0;
525 int storage_primary = 0, storage_secondary = 0;
526
527 vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev,
528 vk_formats_list[i].vkf,
529 &prop);
530
531 feats_primary = tiling == VK_IMAGE_TILING_LINEAR ?
532 fprops.linearTilingFeatures : fprops.optimalTilingFeatures;
533 basics_primary = (feats_primary & basic_flags) == basic_flags;
534 storage_primary = !!(feats_primary & VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT);
535
536 if (vk_formats_list[i].vkf != vk_formats_list[i].fallback[0]) {
537 vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev,
538 vk_formats_list[i].fallback[0],
539 &prop);
540 feats_secondary = tiling == VK_IMAGE_TILING_LINEAR ?
541 fprops.linearTilingFeatures : fprops.optimalTilingFeatures;
542 basics_secondary = (feats_secondary & basic_flags) == basic_flags;
543 storage_secondary = !!(feats_secondary & VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT);
544 } else {
545 basics_secondary = basics_primary;
546 storage_secondary = storage_primary;
547 }
548
549 if (basics_primary &&
550 !(disable_multiplane && vk_formats_list[i].vk_planes > 1) &&
551 (!need_storage || (need_storage && (storage_primary | storage_secondary)))) {
552 if (fmts) {
553 if (vk_formats_list[i].nb_images > 1) {
554 for (int j = 0; j < vk_formats_list[i].nb_images_fallback; j++)
555 fmts[j] = vk_formats_list[i].fallback[j];
556 } else {
557 fmts[0] = vk_formats_list[i].vkf;
558 }
559 }
560 if (nb_images)
561 *nb_images = 1;
562 if (aspect)
563 *aspect = vk_formats_list[i].aspect;
564 if (supported_usage)
565 *supported_usage = ff_vk_map_feats_to_usage(feats_primary) |
566 ((need_storage && (storage_primary | storage_secondary)) ?
567 VK_IMAGE_USAGE_STORAGE_BIT : 0);
568 return 0;
569 } else if (basics_secondary &&
570 (!need_storage || (need_storage && storage_secondary))) {
571 if (fmts) {
572 for (int j = 0; j < vk_formats_list[i].nb_images_fallback; j++)
573 fmts[j] = vk_formats_list[i].fallback[j];
574 }
575 if (nb_images)
576 *nb_images = vk_formats_list[i].nb_images_fallback;
577 if (aspect)
578 *aspect = vk_formats_list[i].aspect;
579 if (supported_usage)
580 *supported_usage = ff_vk_map_feats_to_usage(feats_secondary);
581 return 0;
582 } else {
583 return AVERROR(ENOTSUP);
584 }
585 }
586 }
587
588 return AVERROR(EINVAL);
589 }
590
591 #if CONFIG_VULKAN_STATIC
592 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr(VkInstance instance,
593 const char *pName);
594 #endif
595
596 static int load_libvulkan(AVHWDeviceContext *ctx)
597 {
598 VulkanDevicePriv *p = ctx->hwctx;
599 AVVulkanDeviceContext *hwctx = &p->p;
600
601 #if CONFIG_VULKAN_STATIC
602 hwctx->get_proc_addr = vkGetInstanceProcAddr;
603 #else
604 static const char *lib_names[] = {
605 #if defined(_WIN32)
606 "vulkan-1.dll",
607 #elif defined(__APPLE__)
608 "libvulkan.dylib",
609 "libvulkan.1.dylib",
610 "libMoltenVK.dylib",
611 #else
612 "libvulkan.so.1",
613 "libvulkan.so",
614 #endif
615 };
616
617 for (int i = 0; i < FF_ARRAY_ELEMS(lib_names); i++) {
618 p->libvulkan = dlopen(lib_names[i], RTLD_NOW | RTLD_LOCAL);
619 if (p->libvulkan)
620 break;
621 }
622
623 if (!p->libvulkan) {
624 av_log(ctx, AV_LOG_ERROR, "Unable to open the libvulkan library!\n");
625 return AVERROR_UNKNOWN;
626 }
627
628 hwctx->get_proc_addr = (PFN_vkGetInstanceProcAddr)dlsym(p->libvulkan, "vkGetInstanceProcAddr");
629 #endif /* CONFIG_VULKAN_STATIC */
630
631 return 0;
632 }
633
634 typedef struct VulkanOptExtension {
635 const char *name;
636 FFVulkanExtensions flag;
637 } VulkanOptExtension;
638
639 static const VulkanOptExtension optional_instance_exts[] = {
640 { VK_EXT_LAYER_SETTINGS_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
641 #ifdef __APPLE__
642 { VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
643 #endif
644 };
645
646 static const VulkanOptExtension optional_device_exts[] = {
647 /* Misc or required by other extensions */
648 { VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME, FF_VK_EXT_PORTABILITY_SUBSET },
649 { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_PUSH_DESCRIPTOR },
650 { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER },
651 { VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME, FF_VK_EXT_DEVICE_DRM },
652 { VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME, FF_VK_EXT_ATOMIC_FLOAT },
653 { VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME, FF_VK_EXT_COOP_MATRIX },
654 { VK_EXT_SHADER_OBJECT_EXTENSION_NAME, FF_VK_EXT_SHADER_OBJECT },
655 { VK_KHR_SHADER_SUBGROUP_ROTATE_EXTENSION_NAME, FF_VK_EXT_SUBGROUP_ROTATE },
656 { VK_EXT_HOST_IMAGE_COPY_EXTENSION_NAME, FF_VK_EXT_HOST_IMAGE_COPY },
657 #ifdef VK_EXT_zero_initialize_device_memory
658 { VK_EXT_ZERO_INITIALIZE_DEVICE_MEMORY_EXTENSION_NAME, FF_VK_EXT_ZERO_INITIALIZE },
659 #endif
660 #ifdef VK_KHR_shader_expect_assume
661 { VK_KHR_SHADER_EXPECT_ASSUME_EXTENSION_NAME, FF_VK_EXT_EXPECT_ASSUME },
662 #endif
663 { VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME, FF_VK_EXT_VIDEO_MAINTENANCE_1 },
664 #ifdef VK_KHR_video_maintenance2
665 { VK_KHR_VIDEO_MAINTENANCE_2_EXTENSION_NAME, FF_VK_EXT_VIDEO_MAINTENANCE_2 },
666 #endif
667
668 /* Imports/exports */
669 { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY },
670 { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_DMABUF_MEMORY },
671 { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, FF_VK_EXT_DRM_MODIFIER_FLAGS },
672 { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_SEM },
673 { VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_HOST_MEMORY },
674 #ifdef _WIN32
675 { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY },
676 { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM },
677 #endif
678
679 /* Video encoding/decoding */
680 { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_QUEUE },
681 { VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_QUEUE },
682 { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_QUEUE },
683 { VK_KHR_VIDEO_ENCODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H264 },
684 { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 },
685 { VK_KHR_VIDEO_ENCODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H265 },
686 { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 },
687 #ifdef VK_KHR_video_decode_vp9
688 { VK_KHR_VIDEO_DECODE_VP9_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_VP9 },
689 #endif
690 #ifdef VK_KHR_video_encode_av1
691 { VK_KHR_VIDEO_ENCODE_AV1_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_AV1 },
692 #endif
693 { VK_KHR_VIDEO_DECODE_AV1_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_AV1 },
694 };
695
696 static VkBool32 VKAPI_CALL vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
697 VkDebugUtilsMessageTypeFlagsEXT messageType,
698 const VkDebugUtilsMessengerCallbackDataEXT *data,
699 void *priv)
700 {
701 int l;
702 AVHWDeviceContext *ctx = priv;
703
704 /* Ignore false positives */
705 switch (data->messageIdNumber) {
706 case 0x086974c1: /* BestPractices-vkCreateCommandPool-command-buffer-reset */
707 case 0xfd92477a: /* BestPractices-vkAllocateMemory-small-allocation */
708 case 0x618ab1e7: /* VUID-VkImageViewCreateInfo-usage-02275 */
709 case 0x30f4ac70: /* VUID-VkImageCreateInfo-pNext-06811 */
710 return VK_FALSE;
711 default:
712 break;
713 }
714
715 switch (severity) {
716 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l = AV_LOG_VERBOSE; break;
717 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l = AV_LOG_INFO; break;
718 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l = AV_LOG_WARNING; break;
719 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: l = AV_LOG_ERROR; break;
720 default: l = AV_LOG_DEBUG; break;
721 }
722
723 av_log(ctx, l, "%s\n", data->pMessage);
724 for (int i = 0; i < data->cmdBufLabelCount; i++)
725 av_log(ctx, l, "\t%i: %s\n", i, data->pCmdBufLabels[i].pLabelName);
726
727 return VK_FALSE;
728 }
729
730 #define ADD_VAL_TO_LIST(list, count, val) \
731 do { \
732 list = av_realloc_array(list, ++count, sizeof(*list)); \
733 if (!list) { \
734 err = AVERROR(ENOMEM); \
735 goto fail; \
736 } \
737 list[count - 1] = av_strdup(val); \
738 if (!list[count - 1]) { \
739 err = AVERROR(ENOMEM); \
740 goto fail; \
741 } \
742 } while(0)
743
744 #define RELEASE_PROPS(props, count) \
745 if (props) { \
746 for (int i = 0; i < count; i++) \
747 av_free((void *)((props)[i])); \
748 av_free((void *)props); \
749 }
750
751 static int vulkan_device_has_rebar(AVHWDeviceContext *ctx)
752 {
753 VulkanDevicePriv *p = ctx->hwctx;
754 VkDeviceSize max_vram = 0, max_visible_vram = 0;
755
756 /* Get device memory properties */
757 av_assert0(p->mprops.memoryTypeCount);
758 for (int i = 0; i < p->mprops.memoryTypeCount; i++) {
759 const VkMemoryType type = p->mprops.memoryTypes[i];
760 const VkMemoryHeap heap = p->mprops.memoryHeaps[type.heapIndex];
761 if (!(type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT))
762 continue;
763 max_vram = FFMAX(max_vram, heap.size);
764 if (type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
765 max_visible_vram = FFMAX(max_visible_vram, heap.size);
766 }
767
768 return max_vram - max_visible_vram < 1024; /* 1 kB tolerance */
769 }
770
771 enum FFVulkanDebugMode {
772 FF_VULKAN_DEBUG_NONE = 0,
773 /* Standard GPU-assisted validation */
774 FF_VULKAN_DEBUG_VALIDATE = 1,
775 /* Passes printfs in shaders to the debug callback */
776 FF_VULKAN_DEBUG_PRINTF = 2,
777 /* Enables extra printouts */
778 FF_VULKAN_DEBUG_PRACTICES = 3,
779 /* Disables validation but keeps shader debug info and optimizations */
780 FF_VULKAN_DEBUG_PROFILE = 4,
781
782 FF_VULKAN_DEBUG_NB,
783 };
784
785 static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts,
786 const char * const **dst, uint32_t *num,
787 enum FFVulkanDebugMode debug_mode)
788 {
789 const char *tstr;
790 const char **extension_names = NULL;
791 VulkanDevicePriv *p = ctx->hwctx;
792 AVVulkanDeviceContext *hwctx = &p->p;
793 FFVulkanFunctions *vk = &p->vkctx.vkfn;
794 int err = 0, found, extensions_found = 0;
795
796 const char *mod;
797 int optional_exts_num;
798 uint32_t sup_ext_count;
799 char *user_exts_str = NULL;
800 AVDictionaryEntry *user_exts;
801 VkExtensionProperties *sup_ext;
802 const VulkanOptExtension *optional_exts;
803
804 if (!dev) {
805 mod = "instance";
806 optional_exts = optional_instance_exts;
807 optional_exts_num = FF_ARRAY_ELEMS(optional_instance_exts);
808 user_exts = av_dict_get(opts, "instance_extensions", NULL, 0);
809 if (user_exts) {
810 user_exts_str = av_strdup(user_exts->value);
811 if (!user_exts_str) {
812 err = AVERROR(ENOMEM);
813 goto fail;
814 }
815 }
816 vk->EnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL);
817 sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
818 if (!sup_ext)
819 return AVERROR(ENOMEM);
820 vk->EnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext);
821 } else {
822 mod = "device";
823 optional_exts = optional_device_exts;
824 optional_exts_num = FF_ARRAY_ELEMS(optional_device_exts);
825 user_exts = av_dict_get(opts, "device_extensions", NULL, 0);
826 if (user_exts) {
827 user_exts_str = av_strdup(user_exts->value);
828 if (!user_exts_str) {
829 err = AVERROR(ENOMEM);
830 goto fail;
831 }
832 }
833 vk->EnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
834 &sup_ext_count, NULL);
835 sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
836 if (!sup_ext)
837 return AVERROR(ENOMEM);
838 vk->EnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
839 &sup_ext_count, sup_ext);
840 }
841
842 for (int i = 0; i < optional_exts_num; i++) {
843 tstr = optional_exts[i].name;
844 found = 0;
845
846 /* Intel has had a bad descriptor buffer implementation for a while */
847 if (p->dprops.driverID == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA &&
848 !strcmp(tstr, VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME))
849 continue;
850
851 /* Check if the device has ReBAR for host image copies */
852 if (!strcmp(tstr, VK_EXT_HOST_IMAGE_COPY_EXTENSION_NAME) &&
853 !vulkan_device_has_rebar(ctx))
854 continue;
855
856 if (dev &&
857 ((debug_mode == FF_VULKAN_DEBUG_VALIDATE) ||
858 (debug_mode == FF_VULKAN_DEBUG_PRINTF) ||
859 (debug_mode == FF_VULKAN_DEBUG_PRACTICES)) &&
860 !strcmp(tstr, VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME)) {
861 continue;
862 }
863
864 for (int j = 0; j < sup_ext_count; j++) {
865 if (!strcmp(tstr, sup_ext[j].extensionName)) {
866 found = 1;
867 break;
868 }
869 }
870 if (!found)
871 continue;
872
873 av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr);
874 p->vkctx.extensions |= optional_exts[i].flag;
875 ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
876 }
877
878 if (!dev &&
879 ((debug_mode == FF_VULKAN_DEBUG_VALIDATE) ||
880 (debug_mode == FF_VULKAN_DEBUG_PRINTF) ||
881 (debug_mode == FF_VULKAN_DEBUG_PRACTICES))) {
882 tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
883 found = 0;
884 for (int j = 0; j < sup_ext_count; j++) {
885 if (!strcmp(tstr, sup_ext[j].extensionName)) {
886 found = 1;
887 break;
888 }
889 }
890 if (found) {
891 av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr);
892 ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
893 } else {
894 av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n",
895 tstr);
896 err = AVERROR(EINVAL);
897 goto fail;
898 }
899 }
900
901 #ifdef VK_KHR_shader_relaxed_extended_instruction
902 if (((debug_mode == FF_VULKAN_DEBUG_PRINTF) ||
903 (debug_mode == FF_VULKAN_DEBUG_PROFILE)) && dev) {
904 tstr = VK_KHR_SHADER_RELAXED_EXTENDED_INSTRUCTION_EXTENSION_NAME;
905 found = 0;
906 for (int j = 0; j < sup_ext_count; j++) {
907 if (!strcmp(tstr, sup_ext[j].extensionName)) {
908 found = 1;
909 break;
910 }
911 }
912 if (found) {
913 av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr);
914 ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
915 } else {
916 av_log(ctx, AV_LOG_ERROR, "Debug_printf/profile enabled, but extension \"%s\" not found!\n",
917 tstr);
918 err = AVERROR(EINVAL);
919 goto fail;
920 }
921 }
922 #endif
923
924 if (user_exts_str) {
925 char *save, *token = av_strtok(user_exts_str, "+", &save);
926 while (token) {
927 found = 0;
928 for (int j = 0; j < sup_ext_count; j++) {
929 if (!strcmp(token, sup_ext[j].extensionName)) {
930 found = 1;
931 break;
932 }
933 }
934 if (found) {
935 av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, token);
936 ADD_VAL_TO_LIST(extension_names, extensions_found, token);
937 } else {
938 av_log(ctx, AV_LOG_WARNING, "%s extension \"%s\" not found, excluding.\n",
939 mod, token);
940 }
941 token = av_strtok(NULL, "+", &save);
942 }
943 }
944
945 *dst = extension_names;
946 *num = extensions_found;
947
948 av_free(user_exts_str);
949 av_free(sup_ext);
950 return 0;
951
952 fail:
953 RELEASE_PROPS(extension_names, extensions_found);
954 av_free(user_exts_str);
955 av_free(sup_ext);
956 return err;
957 }
958
959 static int check_layers(AVHWDeviceContext *ctx, AVDictionary *opts,
960 const char * const **dst, uint32_t *num,
961 enum FFVulkanDebugMode *debug_mode)
962 {
963 int err = 0;
964 VulkanDevicePriv *priv = ctx->hwctx;
965 FFVulkanFunctions *vk = &priv->vkctx.vkfn;
966
967 static const char layer_standard_validation[] = { "VK_LAYER_KHRONOS_validation" };
968 int layer_standard_validation_found = 0;
969
970 uint32_t sup_layer_count;
971 VkLayerProperties *sup_layers;
972
973 AVDictionaryEntry *user_layers = av_dict_get(opts, "layers", NULL, 0);
974 char *user_layers_str = NULL;
975 char *save, *token;
976
977 const char **enabled_layers = NULL;
978 uint32_t enabled_layers_count = 0;
979
980 AVDictionaryEntry *debug_opt = av_dict_get(opts, "debug", NULL, 0);
981 enum FFVulkanDebugMode mode;
982
983 *debug_mode = mode = FF_VULKAN_DEBUG_NONE;
984
985 /* Get a list of all layers */
986 vk->EnumerateInstanceLayerProperties(&sup_layer_count, NULL);
987 sup_layers = av_malloc_array(sup_layer_count, sizeof(VkLayerProperties));
988 if (!sup_layers)
989 return AVERROR(ENOMEM);
990 vk->EnumerateInstanceLayerProperties(&sup_layer_count, sup_layers);
991
992 av_log(ctx, AV_LOG_VERBOSE, "Supported layers:\n");
993 for (int i = 0; i < sup_layer_count; i++)
994 av_log(ctx, AV_LOG_VERBOSE, "\t%s\n", sup_layers[i].layerName);
995
996 /* If no user layers or debug layers are given, return */
997 if (!debug_opt && !user_layers)
998 goto end;
999
1000 /* Check for any properly supported validation layer */
1001 if (debug_opt) {
1002 if (!strcmp(debug_opt->value, "profile")) {
1003 mode = FF_VULKAN_DEBUG_PROFILE;
1004 } else if (!strcmp(debug_opt->value, "printf")) {
1005 mode = FF_VULKAN_DEBUG_PRINTF;
1006 } else if (!strcmp(debug_opt->value, "validate")) {
1007 mode = FF_VULKAN_DEBUG_VALIDATE;
1008 } else if (!strcmp(debug_opt->value, "practices")) {
1009 mode = FF_VULKAN_DEBUG_PRACTICES;
1010 } else {
1011 char *end_ptr = NULL;
1012 int idx = strtol(debug_opt->value, &end_ptr, 10);
1013 if (end_ptr == debug_opt->value || end_ptr[0] != '\0' ||
1014 idx < 0 || idx >= FF_VULKAN_DEBUG_NB) {
1015 av_log(ctx, AV_LOG_ERROR, "Invalid debugging mode \"%s\"\n",
1016 debug_opt->value);
1017 err = AVERROR(EINVAL);
1018 goto end;
1019 }
1020 mode = idx;
1021 }
1022 }
1023
1024 /* If mode is VALIDATE or PRINTF, try to find the standard validation layer extension */
1025 if ((mode == FF_VULKAN_DEBUG_VALIDATE) ||
1026 (mode == FF_VULKAN_DEBUG_PRINTF) ||
1027 (mode == FF_VULKAN_DEBUG_PRACTICES)) {
1028 for (int i = 0; i < sup_layer_count; i++) {
1029 if (!strcmp(layer_standard_validation, sup_layers[i].layerName)) {
1030 av_log(ctx, AV_LOG_VERBOSE, "Standard validation layer %s is enabled\n",
1031 layer_standard_validation);
1032 ADD_VAL_TO_LIST(enabled_layers, enabled_layers_count, layer_standard_validation);
1033 *debug_mode = mode;
1034 layer_standard_validation_found = 1;
1035 break;
1036 }
1037 }
1038 if (!layer_standard_validation_found) {
1039 av_log(ctx, AV_LOG_ERROR,
1040 "Validation Layer \"%s\" not supported\n", layer_standard_validation);
1041 err = AVERROR(ENOTSUP);
1042 goto end;
1043 }
1044 } else if (mode == FF_VULKAN_DEBUG_PROFILE) {
1045 *debug_mode = mode;
1046 }
1047
1048 /* Process any custom layers enabled */
1049 if (user_layers) {
1050 int found;
1051
1052 user_layers_str = av_strdup(user_layers->value);
1053 if (!user_layers_str) {
1054 err = AVERROR(ENOMEM);
1055 goto fail;
1056 }
1057
1058 token = av_strtok(user_layers_str, "+", &save);
1059 while (token) {
1060 found = 0;
1061
1062 /* If debug=1/2 was specified as an option, skip this layer */
1063 if (!strcmp(layer_standard_validation, token) && layer_standard_validation_found) {
1064 token = av_strtok(NULL, "+", &save);
1065 break;
1066 }
1067
1068 /* Try to find the layer in the list of supported layers */
1069 for (int j = 0; j < sup_layer_count; j++) {
1070 if (!strcmp(token, sup_layers[j].layerName)) {
1071 found = 1;
1072 break;
1073 }
1074 }
1075
1076 if (found) {
1077 av_log(ctx, AV_LOG_VERBOSE, "Using layer: %s\n", token);
1078 ADD_VAL_TO_LIST(enabled_layers, enabled_layers_count, token);
1079
1080 /* If debug was not set as an option, force it */
1081 if (!strcmp(layer_standard_validation, token))
1082 *debug_mode = FF_VULKAN_DEBUG_VALIDATE;
1083 } else {
1084 av_log(ctx, AV_LOG_ERROR,
1085 "Layer \"%s\" not supported\n", token);
1086 err = AVERROR(EINVAL);
1087 goto end;
1088 }
1089
1090 token = av_strtok(NULL, "+", &save);
1091 }
1092 }
1093
1094 fail:
1095 end:
1096 av_free(sup_layers);
1097 av_free(user_layers_str);
1098
1099 if (err < 0) {
1100 RELEASE_PROPS(enabled_layers, enabled_layers_count);
1101 } else {
1102 *dst = enabled_layers;
1103 *num = enabled_layers_count;
1104 }
1105
1106 return err;
1107 }
1108
1109 /* Creates a VkInstance */
1110 static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts,
1111 enum FFVulkanDebugMode *debug_mode)
1112 {
1113 int err = 0;
1114 VkResult ret;
1115 VulkanDevicePriv *p = ctx->hwctx;
1116 AVVulkanDeviceContext *hwctx = &p->p;
1117 FFVulkanFunctions *vk = &p->vkctx.vkfn;
1118 VkApplicationInfo application_info = {
1119 .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
1120 .pApplicationName = "ffmpeg",
1121 .applicationVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
1122 LIBAVUTIL_VERSION_MINOR,
1123 LIBAVUTIL_VERSION_MICRO),
1124 .pEngineName = "libavutil",
1125 .apiVersion = VK_API_VERSION_1_3,
1126 .engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
1127 LIBAVUTIL_VERSION_MINOR,
1128 LIBAVUTIL_VERSION_MICRO),
1129 };
1130 VkValidationFeaturesEXT validation_features = {
1131 .sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT,
1132 };
1133 VkInstanceCreateInfo inst_props = {
1134 .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
1135 .pApplicationInfo = &application_info,
1136 };
1137
1138 if (!hwctx->get_proc_addr) {
1139 err = load_libvulkan(ctx);
1140 if (err < 0)
1141 return err;
1142 }
1143
1144 err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 0, 0);
1145 if (err < 0) {
1146 av_log(ctx, AV_LOG_ERROR, "Unable to load instance enumeration functions!\n");
1147 return err;
1148 }
1149
1150 err = check_layers(ctx, opts, &inst_props.ppEnabledLayerNames,
1151 &inst_props.enabledLayerCount, debug_mode);
1152 if (err)
1153 goto fail;
1154
1155 /* Check for present/missing extensions */
1156 err = check_extensions(ctx, 0, opts, &inst_props.ppEnabledExtensionNames,
1157 &inst_props.enabledExtensionCount, *debug_mode);
1158 hwctx->enabled_inst_extensions = inst_props.ppEnabledExtensionNames;
1159 hwctx->nb_enabled_inst_extensions = inst_props.enabledExtensionCount;
1160 if (err < 0)
1161 goto fail;
1162
1163 /* Enable debug features if needed */
1164 if (*debug_mode == FF_VULKAN_DEBUG_VALIDATE) {
1165 static const VkValidationFeatureEnableEXT feat_list_validate[] = {
1166 VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT,
1167 VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT,
1168 VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT,
1169 };
1170 validation_features.pEnabledValidationFeatures = feat_list_validate;
1171 validation_features.enabledValidationFeatureCount = FF_ARRAY_ELEMS(feat_list_validate);
1172 inst_props.pNext = &validation_features;
1173 } else if (*debug_mode == FF_VULKAN_DEBUG_PRINTF) {
1174 static const VkValidationFeatureEnableEXT feat_list_debug[] = {
1175 VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT,
1176 VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT,
1177 VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT,
1178 };
1179 validation_features.pEnabledValidationFeatures = feat_list_debug;
1180 validation_features.enabledValidationFeatureCount = FF_ARRAY_ELEMS(feat_list_debug);
1181 inst_props.pNext = &validation_features;
1182 } else if (*debug_mode == FF_VULKAN_DEBUG_PRACTICES) {
1183 static const VkValidationFeatureEnableEXT feat_list_practices[] = {
1184 VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT,
1185 VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT,
1186 };
1187 validation_features.pEnabledValidationFeatures = feat_list_practices;
1188 validation_features.enabledValidationFeatureCount = FF_ARRAY_ELEMS(feat_list_practices);
1189 inst_props.pNext = &validation_features;
1190 }
1191
1192 #ifdef __APPLE__
1193 for (int i = 0; i < inst_props.enabledExtensionCount; i++) {
1194 if (!strcmp(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME,
1195 inst_props.ppEnabledExtensionNames[i])) {
1196 inst_props.flags |= VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR;
1197 break;
1198 }
1199 }
1200 #endif
1201
1202 /* Try to create the instance */
1203 ret = vk->CreateInstance(&inst_props, hwctx->alloc, &hwctx->inst);
1204
1205 /* Check for errors */
1206 if (ret != VK_SUCCESS) {
1207 av_log(ctx, AV_LOG_ERROR, "Instance creation failure: %s\n",
1208 ff_vk_ret2str(ret));
1209 err = AVERROR_EXTERNAL;
1210 goto fail;
1211 }
1212
1213 err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 1, 0);
1214 if (err < 0) {
1215 av_log(ctx, AV_LOG_ERROR, "Unable to load instance functions!\n");
1216 goto fail;
1217 }
1218
1219 /* Setup debugging callback if needed */
1220 if ((*debug_mode == FF_VULKAN_DEBUG_VALIDATE) ||
1221 (*debug_mode == FF_VULKAN_DEBUG_PRINTF) ||
1222 (*debug_mode == FF_VULKAN_DEBUG_PRACTICES)) {
1223 VkDebugUtilsMessengerCreateInfoEXT dbg = {
1224 .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
1225 .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
1226 VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
1227 VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
1228 VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
1229 .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
1230 VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
1231 VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
1232 .pfnUserCallback = vk_dbg_callback,
1233 .pUserData = ctx,
1234 };
1235
1236 vk->CreateDebugUtilsMessengerEXT(hwctx->inst, &dbg,
1237 hwctx->alloc, &p->debug_ctx);
1238 }
1239
1240 err = 0;
1241
1242 fail:
1243 RELEASE_PROPS(inst_props.ppEnabledLayerNames, inst_props.enabledLayerCount);
1244 return err;
1245 }
1246
1247 typedef struct VulkanDeviceSelection {
1248 uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */
1249 int has_uuid;
1250 uint32_t drm_major; /* Will use this second unless !has_drm */
1251 uint32_t drm_minor; /* Will use this second unless !has_drm */
1252 uint32_t has_drm; /* has drm node info */
1253 const char *name; /* Will use this third unless NULL */
1254 uint32_t pci_device; /* Will use this fourth unless 0x0 */
1255 uint32_t vendor_id; /* Last resort to find something deterministic */
1256 int index; /* Finally fall back to index */
1257 } VulkanDeviceSelection;
1258
1259 static const char *vk_dev_type(enum VkPhysicalDeviceType type)
1260 {
1261 switch (type) {
1262 case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: return "integrated";
1263 case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: return "discrete";
1264 case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: return "virtual";
1265 case VK_PHYSICAL_DEVICE_TYPE_CPU: return "software";
1266 default: return "unknown";
1267 }
1268 }
1269
1270 /* Finds a device */
1271 static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
1272 {
1273 int err = 0, choice = -1;
1274 uint32_t num;
1275 VkResult ret;
1276 VulkanDevicePriv *p = ctx->hwctx;
1277 AVVulkanDeviceContext *hwctx = &p->p;
1278 FFVulkanFunctions *vk = &p->vkctx.vkfn;
1279 VkPhysicalDevice *devices = NULL;
1280 VkPhysicalDeviceIDProperties *idp = NULL;
1281 VkPhysicalDeviceProperties2 *prop = NULL;
1282 VkPhysicalDeviceDriverProperties *driver_prop = NULL;
1283 VkPhysicalDeviceDrmPropertiesEXT *drm_prop = NULL;
1284
1285 ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, NULL);
1286 if (ret != VK_SUCCESS || !num) {
1287 av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", ff_vk_ret2str(ret));
1288 return AVERROR(ENODEV);
1289 }
1290
1291 devices = av_malloc_array(num, sizeof(VkPhysicalDevice));
1292 if (!devices)
1293 return AVERROR(ENOMEM);
1294
1295 ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, devices);
1296 if (ret != VK_SUCCESS) {
1297 av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n",
1298 ff_vk_ret2str(ret));
1299 err = AVERROR(ENODEV);
1300 goto end;
1301 }
1302
1303 prop = av_calloc(num, sizeof(*prop));
1304 if (!prop) {
1305 err = AVERROR(ENOMEM);
1306 goto end;
1307 }
1308
1309 idp = av_calloc(num, sizeof(*idp));
1310 if (!idp) {
1311 err = AVERROR(ENOMEM);
1312 goto end;
1313 }
1314
1315 driver_prop = av_calloc(num, sizeof(*driver_prop));
1316 if (!driver_prop) {
1317 err = AVERROR(ENOMEM);
1318 goto end;
1319 }
1320
1321 if (p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) {
1322 drm_prop = av_calloc(num, sizeof(*drm_prop));
1323 if (!drm_prop) {
1324 err = AVERROR(ENOMEM);
1325 goto end;
1326 }
1327 }
1328
1329 av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n");
1330 for (int i = 0; i < num; i++) {
1331 if (p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) {
1332 drm_prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT;
1333 driver_prop[i].pNext = &drm_prop[i];
1334 }
1335 driver_prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES;
1336 idp[i].pNext = &driver_prop[i];
1337 idp[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
1338 prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1339 prop[i].pNext = &idp[i];
1340
1341 vk->GetPhysicalDeviceProperties2(devices[i], &prop[i]);
1342 av_log(ctx, AV_LOG_VERBOSE, " %d: %s (%s) (0x%x)\n", i,
1343 prop[i].properties.deviceName,
1344 vk_dev_type(prop[i].properties.deviceType),
1345 prop[i].properties.deviceID);
1346 }
1347
1348 if (select->has_uuid) {
1349 for (int i = 0; i < num; i++) {
1350 if (!strncmp(idp[i].deviceUUID, select->uuid, VK_UUID_SIZE)) {
1351 choice = i;
1352 goto end;
1353 }
1354 }
1355 av_log(ctx, AV_LOG_ERROR, "Unable to find device by given UUID!\n");
1356 err = AVERROR(ENODEV);
1357 goto end;
1358 } else if ((p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) && select->has_drm) {
1359 for (int i = 0; i < num; i++) {
1360 if ((select->drm_major == drm_prop[i].primaryMajor &&
1361 select->drm_minor == drm_prop[i].primaryMinor) ||
1362 (select->drm_major == drm_prop[i].renderMajor &&
1363 select->drm_minor == drm_prop[i].renderMinor)) {
1364 choice = i;
1365 goto end;
1366 }
1367 }
1368 av_log(ctx, AV_LOG_ERROR, "Unable to find device by given DRM node numbers %i:%i!\n",
1369 select->drm_major, select->drm_minor);
1370 err = AVERROR(ENODEV);
1371 goto end;
1372 } else if (select->name) {
1373 av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name);
1374 for (int i = 0; i < num; i++) {
1375 if (strstr(prop[i].properties.deviceName, select->name)) {
1376 choice = i;
1377 goto end;
1378 }
1379 }
1380 av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n",
1381 select->name);
1382 err = AVERROR(ENODEV);
1383 goto end;
1384 } else if (select->pci_device) {
1385 av_log(ctx, AV_LOG_VERBOSE, "Requested device: 0x%x\n", select->pci_device);
1386 for (int i = 0; i < num; i++) {
1387 if (select->pci_device == prop[i].properties.deviceID) {
1388 choice = i;
1389 goto end;
1390 }
1391 }
1392 av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID 0x%x!\n",
1393 select->pci_device);
1394 err = AVERROR(EINVAL);
1395 goto end;
1396 } else if (select->vendor_id) {
1397 av_log(ctx, AV_LOG_VERBOSE, "Requested vendor: 0x%x\n", select->vendor_id);
1398 for (int i = 0; i < num; i++) {
1399 if (select->vendor_id == prop[i].properties.vendorID) {
1400 choice = i;
1401 goto end;
1402 }
1403 }
1404 av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID 0x%x!\n",
1405 select->vendor_id);
1406 err = AVERROR(ENODEV);
1407 goto end;
1408 } else {
1409 if (select->index < num) {
1410 choice = select->index;
1411 goto end;
1412 }
1413 av_log(ctx, AV_LOG_ERROR, "Unable to find device with index %i!\n",
1414 select->index);
1415 err = AVERROR(ENODEV);
1416 goto end;
1417 }
1418
1419 end:
1420 if (choice > -1) {
1421 av_log(ctx, AV_LOG_VERBOSE, "Device %d selected: %s (%s) (0x%x)\n",
1422 choice, prop[choice].properties.deviceName,
1423 vk_dev_type(prop[choice].properties.deviceType),
1424 prop[choice].properties.deviceID);
1425 hwctx->phys_dev = devices[choice];
1426 p->props = prop[choice];
1427 p->props.pNext = NULL;
1428 p->dprops = driver_prop[choice];
1429 p->dprops.pNext = NULL;
1430 }
1431
1432 av_free(devices);
1433 av_free(prop);
1434 av_free(idp);
1435 av_free(drm_prop);
1436 av_free(driver_prop);
1437
1438 return err;
1439 }
1440
1441 /* Picks the least used qf with the fewest unneeded flags, or -1 if none found */
1442 static inline int pick_queue_family(VkQueueFamilyProperties2 *qf, uint32_t num_qf,
1443 VkQueueFlagBits flags)
1444 {
1445 int index = -1;
1446 uint32_t min_score = UINT32_MAX;
1447
1448 for (int i = 0; i < num_qf; i++) {
1449 VkQueueFlagBits qflags = qf[i].queueFamilyProperties.queueFlags;
1450
1451 /* Per the spec, reporting transfer caps is optional for these 2 types */
1452 if ((flags & VK_QUEUE_TRANSFER_BIT) &&
1453 (qflags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)))
1454 qflags |= VK_QUEUE_TRANSFER_BIT;
1455
1456 if (qflags & flags) {
1457 uint32_t score = av_popcount(qflags) + qf[i].queueFamilyProperties.timestampValidBits;
1458 if (score < min_score) {
1459 index = i;
1460 min_score = score;
1461 }
1462 }
1463 }
1464
1465 if (index > -1)
1466 qf[index].queueFamilyProperties.timestampValidBits++;
1467
1468 return index;
1469 }
1470
1471 static inline int pick_video_queue_family(VkQueueFamilyProperties2 *qf,
1472 VkQueueFamilyVideoPropertiesKHR *qf_vid, uint32_t num_qf,
1473 VkVideoCodecOperationFlagsKHR flags)
1474 {
1475 int index = -1;
1476 uint32_t min_score = UINT32_MAX;
1477
1478 for (int i = 0; i < num_qf; i++) {
1479 const VkQueueFlags qflags = qf[i].queueFamilyProperties.queueFlags;
1480 const VkVideoCodecOperationFlagsKHR vflags = qf_vid[i].videoCodecOperations;
1481
1482 if (!(qflags & (VK_QUEUE_VIDEO_ENCODE_BIT_KHR | VK_QUEUE_VIDEO_DECODE_BIT_KHR)))
1483 continue;
1484
1485 if (vflags & flags) {
1486 uint32_t score = av_popcount(vflags) + qf[i].queueFamilyProperties.timestampValidBits;
1487 if (score < min_score) {
1488 index = i;
1489 min_score = score;
1490 }
1491 }
1492 }
1493
1494 if (index > -1)
1495 qf[index].queueFamilyProperties.timestampValidBits++;
1496
1497 return index;
1498 }
1499
1500 static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
1501 {
1502 uint32_t num;
1503 VulkanDevicePriv *p = ctx->hwctx;
1504 AVVulkanDeviceContext *hwctx = &p->p;
1505 FFVulkanFunctions *vk = &p->vkctx.vkfn;
1506
1507 VkQueueFamilyProperties2 *qf = NULL;
1508 VkQueueFamilyVideoPropertiesKHR *qf_vid = NULL;
1509
1510 /* First get the number of queue families */
1511 vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL);
1512 if (!num) {
1513 av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
1514 return AVERROR_EXTERNAL;
1515 }
1516
1517 /* Then allocate memory */
1518 qf = av_malloc_array(num, sizeof(VkQueueFamilyProperties2));
1519 if (!qf)
1520 return AVERROR(ENOMEM);
1521
1522 qf_vid = av_malloc_array(num, sizeof(VkQueueFamilyVideoPropertiesKHR));
1523 if (!qf_vid)
1524 return AVERROR(ENOMEM);
1525
1526 for (uint32_t i = 0; i < num; i++) {
1527 qf_vid[i] = (VkQueueFamilyVideoPropertiesKHR) {
1528 .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR,
1529 };
1530 qf[i] = (VkQueueFamilyProperties2) {
1531 .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
1532 .pNext = p->vkctx.extensions & FF_VK_EXT_VIDEO_QUEUE ? &qf_vid[i] : NULL,
1533 };
1534 }
1535
1536 /* Finally retrieve the queue families */
1537 vk->GetPhysicalDeviceQueueFamilyProperties2(hwctx->phys_dev, &num, qf);
1538
1539 av_log(ctx, AV_LOG_VERBOSE, "Queue families:\n");
1540 for (int i = 0; i < num; i++) {
1541 av_log(ctx, AV_LOG_VERBOSE, " %i:%s%s%s%s%s%s%s%s (queues: %i)\n", i,
1542 ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? " graphics" : "",
1543 ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_COMPUTE_BIT) ? " compute" : "",
1544 ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_TRANSFER_BIT) ? " transfer" : "",
1545 ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) ? " encode" : "",
1546 ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) ? " decode" : "",
1547 ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? " sparse" : "",
1548 ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_OPTICAL_FLOW_BIT_NV) ? " optical_flow" : "",
1549 ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_PROTECTED_BIT) ? " protected" : "",
1550 qf[i].queueFamilyProperties.queueCount);
1551
1552 /* We use this field to keep a score of how many times we've used that
1553 * queue family in order to make better choices. */
1554 qf[i].queueFamilyProperties.timestampValidBits = 0;
1555 }
1556
1557 hwctx->nb_qf = 0;
1558
1559 /* Pick each queue family to use. */
1560 #define PICK_QF(type, vid_op) \
1561 do { \
1562 uint32_t i; \
1563 uint32_t idx; \
1564 \
1565 if (vid_op) \
1566 idx = pick_video_queue_family(qf, qf_vid, num, vid_op); \
1567 else \
1568 idx = pick_queue_family(qf, num, type); \
1569 \
1570 if (idx == -1) \
1571 continue; \
1572 \
1573 for (i = 0; i < hwctx->nb_qf; i++) { \
1574 if (hwctx->qf[i].idx == idx) { \
1575 hwctx->qf[i].flags |= type; \
1576 hwctx->qf[i].video_caps |= vid_op; \
1577 break; \
1578 } \
1579 } \
1580 if (i == hwctx->nb_qf) { \
1581 hwctx->qf[i].idx = idx; \
1582 hwctx->qf[i].num = qf[idx].queueFamilyProperties.queueCount; \
1583 if (p->limit_queues || \
1584 p->dprops.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY) { \
1585 int max = p->limit_queues; \
1586 if (type == VK_QUEUE_GRAPHICS_BIT) \
1587 hwctx->qf[i].num = FFMIN(hwctx->qf[i].num, \
1588 max ? max : 1); \
1589 else if (max) \
1590 hwctx->qf[i].num = FFMIN(hwctx->qf[i].num, max); \
1591 } \
1592 hwctx->qf[i].flags = type; \
1593 hwctx->qf[i].video_caps = vid_op; \
1594 hwctx->nb_qf++; \
1595 } \
1596 } while (0)
1597
1598 PICK_QF(VK_QUEUE_GRAPHICS_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
1599 PICK_QF(VK_QUEUE_COMPUTE_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
1600 PICK_QF(VK_QUEUE_TRANSFER_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
1601
1602 PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR);
1603 PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR);
1604
1605 PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR);
1606 PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR);
1607
1608 #ifdef VK_KHR_video_decode_vp9
1609 PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR);
1610 #endif
1611
1612 #ifdef VK_KHR_video_encode_av1
1613 PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR);
1614 #endif
1615 PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR);
1616
1617 av_free(qf);
1618 av_free(qf_vid);
1619
1620 #undef PICK_QF
1621
1622 cd->pQueueCreateInfos = av_malloc_array(hwctx->nb_qf,
1623 sizeof(VkDeviceQueueCreateInfo));
1624 if (!cd->pQueueCreateInfos)
1625 return AVERROR(ENOMEM);
1626
1627 for (uint32_t i = 0; i < hwctx->nb_qf; i++) {
1628 int dup = 0;
1629 float *weights = NULL;
1630 VkDeviceQueueCreateInfo *pc;
1631 for (uint32_t j = 0; j < cd->queueCreateInfoCount; j++) {
1632 if (hwctx->qf[i].idx == cd->pQueueCreateInfos[j].queueFamilyIndex) {
1633 dup = 1;
1634 break;
1635 }
1636 }
1637 if (dup)
1638 continue;
1639
1640 weights = av_malloc_array(hwctx->qf[i].num, sizeof(float));
1641 if (!weights) {
1642 for (uint32_t j = 0; j < cd->queueCreateInfoCount; j++)
1643 av_free((void *)cd->pQueueCreateInfos[i].pQueuePriorities);
1644 av_free((void *)cd->pQueueCreateInfos);
1645 return AVERROR(ENOMEM);
1646 }
1647
1648 for (uint32_t j = 0; j < hwctx->qf[i].num; j++)
1649 weights[j] = 1.0;
1650
1651 pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos;
1652 pc[cd->queueCreateInfoCount++] = (VkDeviceQueueCreateInfo) {
1653 .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
1654 .queueFamilyIndex = hwctx->qf[i].idx,
1655 .queueCount = hwctx->qf[i].num,
1656 .pQueuePriorities = weights,
1657 };
1658 }
1659
1660 #if FF_API_VULKAN_FIXED_QUEUES
1661 FF_DISABLE_DEPRECATION_WARNINGS
1662 /* Setup deprecated fields */
1663 hwctx->queue_family_index = -1;
1664 hwctx->queue_family_comp_index = -1;
1665 hwctx->queue_family_tx_index = -1;
1666 hwctx->queue_family_encode_index = -1;
1667 hwctx->queue_family_decode_index = -1;
1668
1669 #define SET_OLD_QF(field, nb_field, type) \
1670 do { \
1671 if (field < 0 && hwctx->qf[i].flags & type) { \
1672 field = hwctx->qf[i].idx; \
1673 nb_field = hwctx->qf[i].num; \
1674 } \
1675 } while (0)
1676
1677 for (uint32_t i = 0; i < hwctx->nb_qf; i++) {
1678 SET_OLD_QF(hwctx->queue_family_index, hwctx->nb_graphics_queues, VK_QUEUE_GRAPHICS_BIT);
1679 SET_OLD_QF(hwctx->queue_family_comp_index, hwctx->nb_comp_queues, VK_QUEUE_COMPUTE_BIT);
1680 SET_OLD_QF(hwctx->queue_family_tx_index, hwctx->nb_tx_queues, VK_QUEUE_TRANSFER_BIT);
1681 SET_OLD_QF(hwctx->queue_family_encode_index, hwctx->nb_encode_queues, VK_QUEUE_VIDEO_ENCODE_BIT_KHR);
1682 SET_OLD_QF(hwctx->queue_family_decode_index, hwctx->nb_decode_queues, VK_QUEUE_VIDEO_DECODE_BIT_KHR);
1683 }
1684
1685 #undef SET_OLD_QF
1686 FF_ENABLE_DEPRECATION_WARNINGS
1687 #endif
1688
1689 return 0;
1690 }
1691
1692 /* Only resources created by vulkan_device_create should be released here,
1693 * resources created by vulkan_device_init should be released by
1694 * vulkan_device_uninit, to make sure we don't free user provided resources,
1695 * and there is no leak.
1696 */
1697 static void vulkan_device_free(AVHWDeviceContext *ctx)
1698 {
1699 VulkanDevicePriv *p = ctx->hwctx;
1700 AVVulkanDeviceContext *hwctx = &p->p;
1701 FFVulkanFunctions *vk = &p->vkctx.vkfn;
1702
1703 if (hwctx->act_dev)
1704 vk->DestroyDevice(hwctx->act_dev, hwctx->alloc);
1705
1706 if (p->debug_ctx)
1707 vk->DestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx,
1708 hwctx->alloc);
1709
1710 if (hwctx->inst)
1711 vk->DestroyInstance(hwctx->inst, hwctx->alloc);
1712
1713 if (p->libvulkan)
1714 dlclose(p->libvulkan);
1715
1716 RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions);
1717 RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions);
1718 }
1719
1720 static void vulkan_device_uninit(AVHWDeviceContext *ctx)
1721 {
1722 VulkanDevicePriv *p = ctx->hwctx;
1723
1724 for (uint32_t i = 0; i < p->nb_tot_qfs; i++) {
1725 pthread_mutex_destroy(p->qf_mutex[i]);
1726 av_freep(&p->qf_mutex[i]);
1727 }
1728 av_freep(&p->qf_mutex);
1729
1730 ff_vk_uninit(&p->vkctx);
1731 }
1732
1733 static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
1734 VulkanDeviceSelection *dev_select,
1735 int disable_multiplane,
1736 AVDictionary *opts, int flags)
1737 {
1738 int err = 0;
1739 VkResult ret;
1740 AVDictionaryEntry *opt_d;
1741 VulkanDevicePriv *p = ctx->hwctx;
1742 AVVulkanDeviceContext *hwctx = &p->p;
1743 FFVulkanFunctions *vk = &p->vkctx.vkfn;
1744 enum FFVulkanDebugMode debug_mode = FF_VULKAN_DEBUG_NONE;
1745 VulkanDeviceFeatures supported_feats = { 0 };
1746 VkDeviceCreateInfo dev_info = {
1747 .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
1748 };
1749
1750 /* Create an instance if not given one */
1751 if ((err = create_instance(ctx, opts, &debug_mode)))
1752 goto end;
1753
1754 /* Find a physical device (if not given one) */
1755 if ((err = find_device(ctx, dev_select)))
1756 goto end;
1757
1758 /* Get supported memory types */
1759 vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
1760
1761 /* Find and enable extensions for the physical device */
1762 if ((err = check_extensions(ctx, 1, opts, &dev_info.ppEnabledExtensionNames,
1763 &dev_info.enabledExtensionCount, debug_mode))) {
1764 for (int i = 0; i < dev_info.queueCreateInfoCount; i++)
1765 av_free((void *)dev_info.pQueueCreateInfos[i].pQueuePriorities);
1766 av_free((void *)dev_info.pQueueCreateInfos);
1767 goto end;
1768 }
1769
1770 /* Get all supported features for the physical device */
1771 device_features_init(ctx, &supported_feats);
1772 vk->GetPhysicalDeviceFeatures2(hwctx->phys_dev, &supported_feats.device);
1773
1774 /* Copy all needed features from those supported and activate them */
1775 device_features_init(ctx, &p->feats);
1776 device_features_copy_needed(&p->feats, &supported_feats);
1777 dev_info.pNext = p->feats.device.pNext;
1778 dev_info.pEnabledFeatures = &p->feats.device.features;
1779
1780 /* Limit queues to a given number if needed */
1781 opt_d = av_dict_get(opts, "limit_queues", NULL, 0);
1782 if (opt_d)
1783 p->limit_queues = strtol(opt_d->value, NULL, 10);
1784
1785 /* Setup enabled queue families */
1786 if ((err = setup_queue_families(ctx, &dev_info)))
1787 goto end;
1788
1789 /* Finally create the device */
1790 ret = vk->CreateDevice(hwctx->phys_dev, &dev_info, hwctx->alloc,
1791 &hwctx->act_dev);
1792
1793 for (int i = 0; i < dev_info.queueCreateInfoCount; i++)
1794 av_free((void *)dev_info.pQueueCreateInfos[i].pQueuePriorities);
1795 av_free((void *)dev_info.pQueueCreateInfos);
1796
1797 if (ret != VK_SUCCESS) {
1798 av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n",
1799 ff_vk_ret2str(ret));
1800 for (int i = 0; i < dev_info.enabledExtensionCount; i++)
1801 av_free((void *)dev_info.ppEnabledExtensionNames[i]);
1802 av_free((void *)dev_info.ppEnabledExtensionNames);
1803 err = AVERROR_EXTERNAL;
1804 goto end;
1805 }
1806
1807 /* Tiled images setting, use them by default */
1808 opt_d = av_dict_get(opts, "linear_images", NULL, 0);
1809 if (opt_d)
1810 p->use_linear_images = strtol(opt_d->value, NULL, 10);
1811
1812 /* The disable_multiplane argument takes precedent over the option */
1813 p->disable_multiplane = disable_multiplane;
1814 if (!p->disable_multiplane) {
1815 opt_d = av_dict_get(opts, "disable_multiplane", NULL, 0);
1816 if (opt_d)
1817 p->disable_multiplane = strtol(opt_d->value, NULL, 10);
1818 }
1819
1820 /* Disable host pointer imports (by default on nvidia) */
1821 p->avoid_host_import = p->dprops.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY;
1822 opt_d = av_dict_get(opts, "avoid_host_import", NULL, 0);
1823 if (opt_d)
1824 p->avoid_host_import = strtol(opt_d->value, NULL, 10);
1825
1826 /* Set the public device feature struct and its pNext chain */
1827 hwctx->device_features = p->feats.device;
1828
1829 /* Set the list of all active extensions */
1830 hwctx->enabled_dev_extensions = dev_info.ppEnabledExtensionNames;
1831 hwctx->nb_enabled_dev_extensions = dev_info.enabledExtensionCount;
1832
1833 /* The extension lists need to be freed */
1834 ctx->free = vulkan_device_free;
1835
1836 end:
1837 return err;
1838 }
1839
1840 static void lock_queue(AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index)
1841 {
1842 VulkanDevicePriv *p = ctx->hwctx;
1843 pthread_mutex_lock(&p->qf_mutex[queue_family][index]);
1844 }
1845
1846 static void unlock_queue(AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index)
1847 {
1848 VulkanDevicePriv *p = ctx->hwctx;
1849 pthread_mutex_unlock(&p->qf_mutex[queue_family][index]);
1850 }
1851
1852 static int vulkan_device_init(AVHWDeviceContext *ctx)
1853 {
1854 int err = 0;
1855 uint32_t qf_num;
1856 VulkanDevicePriv *p = ctx->hwctx;
1857 AVVulkanDeviceContext *hwctx = &p->p;
1858 FFVulkanFunctions *vk = &p->vkctx.vkfn;
1859 VkQueueFamilyProperties2 *qf;
1860 VkQueueFamilyVideoPropertiesKHR *qf_vid;
1861 VkPhysicalDeviceExternalSemaphoreInfo ext_sem_props_info;
1862 int graph_index, comp_index, tx_index, enc_index, dec_index;
1863
1864 /* Set device extension flags */
1865 for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++) {
1866 for (int j = 0; j < FF_ARRAY_ELEMS(optional_device_exts); j++) {
1867 if (!strcmp(hwctx->enabled_dev_extensions[i],
1868 optional_device_exts[j].name)) {
1869 p->vkctx.extensions |= optional_device_exts[j].flag;
1870 break;
1871 }
1872 }
1873 }
1874
1875 err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 1, 1);
1876 if (err < 0) {
1877 av_log(ctx, AV_LOG_ERROR, "Unable to load functions!\n");
1878 return err;
1879 }
1880
1881 p->props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1882 p->props.pNext = &p->hprops;
1883 p->hprops.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT;
1884 p->hprops.pNext = &p->dprops;
1885 p->dprops.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES;
1886
1887 vk->GetPhysicalDeviceProperties2(hwctx->phys_dev, &p->props);
1888 av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n",
1889 p->props.properties.deviceName);
1890 av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n");
1891 av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyRowPitchAlignment: %"PRIu64"\n",
1892 p->props.properties.limits.optimalBufferCopyRowPitchAlignment);
1893 av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %"SIZE_SPECIFIER"\n",
1894 p->props.properties.limits.minMemoryMapAlignment);
1895 av_log(ctx, AV_LOG_VERBOSE, " nonCoherentAtomSize: %"PRIu64"\n",
1896 p->props.properties.limits.nonCoherentAtomSize);
1897 if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY)
1898 av_log(ctx, AV_LOG_VERBOSE, " minImportedHostPointerAlignment: %"PRIu64"\n",
1899 p->hprops.minImportedHostPointerAlignment);
1900
1901 vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL);
1902 if (!qf_num) {
1903 av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
1904 return AVERROR_EXTERNAL;
1905 }
1906
1907 ext_sem_props_info = (VkPhysicalDeviceExternalSemaphoreInfo) {
1908 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_SEMAPHORE_INFO,
1909 };
1910
1911 /* Opaque FD semaphore properties */
1912 ext_sem_props_info.handleType =
1913 #ifdef _WIN32
1914 IsWindows8OrGreater()
1915 ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
1916 : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT;
1917 #else
1918 VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
1919 #endif
1920 p->ext_sem_props_opaque.sType = VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES;
1921 vk->GetPhysicalDeviceExternalSemaphoreProperties(hwctx->phys_dev,
1922 &ext_sem_props_info,
1923 &p->ext_sem_props_opaque);
1924
1925 qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties2));
1926 if (!qf)
1927 return AVERROR(ENOMEM);
1928
1929 qf_vid = av_malloc_array(qf_num, sizeof(VkQueueFamilyVideoPropertiesKHR));
1930 if (!qf_vid) {
1931 av_free(qf);
1932 return AVERROR(ENOMEM);
1933 }
1934
1935 for (uint32_t i = 0; i < qf_num; i++) {
1936 qf_vid[i] = (VkQueueFamilyVideoPropertiesKHR) {
1937 .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR,
1938 };
1939 qf[i] = (VkQueueFamilyProperties2) {
1940 .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
1941 .pNext = p->vkctx.extensions & FF_VK_EXT_VIDEO_QUEUE ? &qf_vid[i] : NULL,
1942 };
1943 }
1944
1945 vk->GetPhysicalDeviceQueueFamilyProperties2(hwctx->phys_dev, &qf_num, qf);
1946
1947 p->qf_mutex = av_calloc(qf_num, sizeof(*p->qf_mutex));
1948 if (!p->qf_mutex) {
1949 err = AVERROR(ENOMEM);
1950 goto end;
1951 }
1952 p->nb_tot_qfs = qf_num;
1953
1954 for (uint32_t i = 0; i < qf_num; i++) {
1955 p->qf_mutex[i] = av_calloc(qf[i].queueFamilyProperties.queueCount,
1956 sizeof(**p->qf_mutex));
1957 if (!p->qf_mutex[i]) {
1958 err = AVERROR(ENOMEM);
1959 goto end;
1960 }
1961 for (uint32_t j = 0; j < qf[i].queueFamilyProperties.queueCount; j++) {
1962 err = pthread_mutex_init(&p->qf_mutex[i][j], NULL);
1963 if (err != 0) {
1964 av_log(ctx, AV_LOG_ERROR, "pthread_mutex_init failed : %s\n",
1965 av_err2str(err));
1966 err = AVERROR(err);
1967 goto end;
1968 }
1969 }
1970 }
1971
1972 #if FF_API_VULKAN_FIXED_QUEUES
1973 FF_DISABLE_DEPRECATION_WARNINGS
1974 graph_index = hwctx->nb_graphics_queues ? hwctx->queue_family_index : -1;
1975 comp_index = hwctx->nb_comp_queues ? hwctx->queue_family_comp_index : -1;
1976 tx_index = hwctx->nb_tx_queues ? hwctx->queue_family_tx_index : -1;
1977 dec_index = hwctx->nb_decode_queues ? hwctx->queue_family_decode_index : -1;
1978 enc_index = hwctx->nb_encode_queues ? hwctx->queue_family_encode_index : -1;
1979
1980 #define CHECK_QUEUE(type, required, fidx, ctx_qf, qc) \
1981 do { \
1982 if (ctx_qf < 0 && required) { \
1983 av_log(ctx, AV_LOG_ERROR, "%s queue family is required, but marked as missing" \
1984 " in the context!\n", type); \
1985 err = AVERROR(EINVAL); \
1986 goto end; \
1987 } else if (fidx < 0 || ctx_qf < 0) { \
1988 break; \
1989 } else if (ctx_qf >= qf_num) { \
1990 av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
1991 type, ctx_qf, qf_num); \
1992 err = AVERROR(EINVAL); \
1993 goto end; \
1994 } \
1995 \
1996 av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (queues: %i)" \
1997 " for%s%s%s%s%s\n", \
1998 ctx_qf, qc, \
1999 ctx_qf == graph_index ? " graphics" : "", \
2000 ctx_qf == comp_index ? " compute" : "", \
2001 ctx_qf == tx_index ? " transfers" : "", \
2002 ctx_qf == enc_index ? " encode" : "", \
2003 ctx_qf == dec_index ? " decode" : ""); \
2004 graph_index = (ctx_qf == graph_index) ? -1 : graph_index; \
2005 comp_index = (ctx_qf == comp_index) ? -1 : comp_index; \
2006 tx_index = (ctx_qf == tx_index) ? -1 : tx_index; \
2007 enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \
2008 dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \
2009 } while (0)
2010
2011 CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index, hwctx->nb_graphics_queues);
2012 CHECK_QUEUE("compute", 1, comp_index, hwctx->queue_family_comp_index, hwctx->nb_comp_queues);
2013 CHECK_QUEUE("upload", 1, tx_index, hwctx->queue_family_tx_index, hwctx->nb_tx_queues);
2014 CHECK_QUEUE("decode", 0, dec_index, hwctx->queue_family_decode_index, hwctx->nb_decode_queues);
2015 CHECK_QUEUE("encode", 0, enc_index, hwctx->queue_family_encode_index, hwctx->nb_encode_queues);
2016
2017 #undef CHECK_QUEUE
2018
2019 /* Update the new queue family fields. If non-zero already,
2020 * it means API users have set it. */
2021 if (!hwctx->nb_qf) {
2022 #define ADD_QUEUE(ctx_qf, qc, flag) \
2023 do { \
2024 if (ctx_qf != -1) { \
2025 hwctx->qf[hwctx->nb_qf++] = (AVVulkanDeviceQueueFamily) { \
2026 .idx = ctx_qf, \
2027 .num = qc, \
2028 .flags = flag, \
2029 }; \
2030 } \
2031 } while (0)
2032
2033 ADD_QUEUE(hwctx->queue_family_index, hwctx->nb_graphics_queues, VK_QUEUE_GRAPHICS_BIT);
2034 ADD_QUEUE(hwctx->queue_family_comp_index, hwctx->nb_comp_queues, VK_QUEUE_COMPUTE_BIT);
2035 ADD_QUEUE(hwctx->queue_family_tx_index, hwctx->nb_tx_queues, VK_QUEUE_TRANSFER_BIT);
2036 ADD_QUEUE(hwctx->queue_family_decode_index, hwctx->nb_decode_queues, VK_QUEUE_VIDEO_DECODE_BIT_KHR);
2037 ADD_QUEUE(hwctx->queue_family_encode_index, hwctx->nb_encode_queues, VK_QUEUE_VIDEO_ENCODE_BIT_KHR);
2038 #undef ADD_QUEUE
2039 }
2040 FF_ENABLE_DEPRECATION_WARNINGS
2041 #endif
2042
2043 for (int i = 0; i < hwctx->nb_qf; i++) {
2044 if (!hwctx->qf[i].video_caps &&
2045 hwctx->qf[i].flags & (VK_QUEUE_VIDEO_DECODE_BIT_KHR |
2046 VK_QUEUE_VIDEO_ENCODE_BIT_KHR)) {
2047 hwctx->qf[i].video_caps = qf_vid[hwctx->qf[i].idx].videoCodecOperations;
2048 }
2049 }
2050
2051 /* Setup array for pQueueFamilyIndices with used queue families */
2052 p->nb_img_qfs = 0;
2053 for (int i = 0; i < hwctx->nb_qf; i++) {
2054 int seen = 0;
2055 /* Make sure each entry is unique
2056 * (VUID-VkBufferCreateInfo-sharingMode-01419) */
2057 for (int j = (i - 1); j >= 0; j--) {
2058 if (hwctx->qf[i].idx == hwctx->qf[j].idx) {
2059 seen = 1;
2060 break;
2061 }
2062 }
2063 if (!seen)
2064 p->img_qfs[p->nb_img_qfs++] = hwctx->qf[i].idx;
2065 }
2066
2067 if (!hwctx->lock_queue)
2068 hwctx->lock_queue = lock_queue;
2069 if (!hwctx->unlock_queue)
2070 hwctx->unlock_queue = unlock_queue;
2071
2072 /* Re-query device capabilities, in case the device was created externally */
2073 vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
2074
2075 p->vkctx.device = ctx;
2076 p->vkctx.hwctx = hwctx;
2077
2078 ff_vk_load_props(&p->vkctx);
2079 p->compute_qf = ff_vk_qf_find(&p->vkctx, VK_QUEUE_COMPUTE_BIT, 0);
2080 p->transfer_qf = ff_vk_qf_find(&p->vkctx, VK_QUEUE_TRANSFER_BIT, 0);
2081
2082 /* Re-query device capabilities, in case the device was created externally */
2083 vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
2084
2085 end:
2086 av_free(qf_vid);
2087 av_free(qf);
2088 return err;
2089 }
2090
2091 static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device,
2092 AVDictionary *opts, int flags)
2093 {
2094 VulkanDeviceSelection dev_select = { 0 };
2095 if (device && device[0]) {
2096 char *end = NULL;
2097 dev_select.index = strtol(device, &end, 10);
2098 if (end == device) {
2099 dev_select.index = 0;
2100 dev_select.name = device;
2101 }
2102 }
2103
2104 return vulkan_device_create_internal(ctx, &dev_select, 0, opts, flags);
2105 }
2106
2107 static int vulkan_device_derive(AVHWDeviceContext *ctx,
2108 AVHWDeviceContext *src_ctx,
2109 AVDictionary *opts, int flags)
2110 {
2111 av_unused VulkanDeviceSelection dev_select = { 0 };
2112
2113 /* If there's only one device on the system, then even if its not covered
2114 * by the following checks (e.g. non-PCIe ARM GPU), having an empty
2115 * dev_select will mean it'll get picked. */
2116 switch(src_ctx->type) {
2117 #if CONFIG_VAAPI
2118 case AV_HWDEVICE_TYPE_VAAPI: {
2119 AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
2120 VADisplay dpy = src_hwctx->display;
2121 #if VA_CHECK_VERSION(1, 15, 0)
2122 VAStatus vas;
2123 VADisplayAttribute attr = {
2124 .type = VADisplayPCIID,
2125 };
2126 #endif
2127 const char *vendor;
2128
2129 #if VA_CHECK_VERSION(1, 15, 0)
2130 vas = vaGetDisplayAttributes(dpy, &attr, 1);
2131 if (vas == VA_STATUS_SUCCESS && attr.flags != VA_DISPLAY_ATTRIB_NOT_SUPPORTED)
2132 dev_select.pci_device = (attr.value & 0xFFFF);
2133 #endif
2134
2135 if (!dev_select.pci_device) {
2136 vendor = vaQueryVendorString(dpy);
2137 if (!vendor) {
2138 av_log(ctx, AV_LOG_ERROR, "Unable to get device info from VAAPI!\n");
2139 return AVERROR_EXTERNAL;
2140 }
2141
2142 if (strstr(vendor, "AMD"))
2143 dev_select.vendor_id = 0x1002;
2144 }
2145
2146 return vulkan_device_create_internal(ctx, &dev_select, 0, opts, flags);
2147 }
2148 #endif
2149 #if CONFIG_LIBDRM
2150 case AV_HWDEVICE_TYPE_DRM: {
2151 int err;
2152 struct stat drm_node_info;
2153 drmDevice *drm_dev_info;
2154 AVDRMDeviceContext *src_hwctx = src_ctx->hwctx;
2155
2156 err = fstat(src_hwctx->fd, &drm_node_info);
2157 if (err) {
2158 av_log(ctx, AV_LOG_ERROR, "Unable to get node info from DRM fd: %s!\n",
2159 av_err2str(AVERROR(errno)));
2160 return AVERROR_EXTERNAL;
2161 }
2162
2163 dev_select.drm_major = major(drm_node_info.st_dev);
2164 dev_select.drm_minor = minor(drm_node_info.st_dev);
2165 dev_select.has_drm = 1;
2166
2167 err = drmGetDevice(src_hwctx->fd, &drm_dev_info);
2168 if (err) {
2169 av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd: %s!\n",
2170 av_err2str(AVERROR(errno)));
2171 return AVERROR_EXTERNAL;
2172 }
2173
2174 if (drm_dev_info->bustype == DRM_BUS_PCI)
2175 dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
2176
2177 drmFreeDevice(&drm_dev_info);
2178
2179 return vulkan_device_create_internal(ctx, &dev_select, 0, opts, flags);
2180 }
2181 #endif
2182 #if CONFIG_CUDA
2183 case AV_HWDEVICE_TYPE_CUDA: {
2184 AVHWDeviceContext *cuda_cu = src_ctx;
2185 AVCUDADeviceContext *src_hwctx = src_ctx->hwctx;
2186 AVCUDADeviceContextInternal *cu_internal = src_hwctx->internal;
2187 CudaFunctions *cu = cu_internal->cuda_dl;
2188
2189 int ret = CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid,
2190 cu_internal->cuda_device));
2191 if (ret < 0) {
2192 av_log(ctx, AV_LOG_ERROR, "Unable to get UUID from CUDA!\n");
2193 return AVERROR_EXTERNAL;
2194 }
2195
2196 dev_select.has_uuid = 1;
2197
2198 /*
2199 * CUDA is not able to import multiplane images, so always derive a
2200 * Vulkan device with multiplane disabled.
2201 */
2202 return vulkan_device_create_internal(ctx, &dev_select, 1, opts, flags);
2203 }
2204 #endif
2205 default:
2206 return AVERROR(ENOSYS);
2207 }
2208 }
2209
2210 static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
2211 const void *hwconfig,
2212 AVHWFramesConstraints *constraints)
2213 {
2214 int count = 0;
2215 VulkanDevicePriv *p = ctx->hwctx;
2216
2217 for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) {
2218 count += vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt,
2219 p->use_linear_images ? VK_IMAGE_TILING_LINEAR :
2220 VK_IMAGE_TILING_OPTIMAL,
2221 NULL, NULL, NULL, NULL, p->disable_multiplane, 1) >= 0;
2222 }
2223
2224 constraints->valid_sw_formats = av_malloc_array(count + 1,
2225 sizeof(enum AVPixelFormat));
2226 if (!constraints->valid_sw_formats)
2227 return AVERROR(ENOMEM);
2228
2229 count = 0;
2230 for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) {
2231 if (vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt,
2232 p->use_linear_images ? VK_IMAGE_TILING_LINEAR :
2233 VK_IMAGE_TILING_OPTIMAL,
2234 NULL, NULL, NULL, NULL, p->disable_multiplane, 1) >= 0) {
2235 constraints->valid_sw_formats[count++] = vk_formats_list[i].pixfmt;
2236 }
2237 }
2238
2239 constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
2240
2241 constraints->min_width = 1;
2242 constraints->min_height = 1;
2243 constraints->max_width = p->props.properties.limits.maxImageDimension2D;
2244 constraints->max_height = p->props.properties.limits.maxImageDimension2D;
2245
2246 constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat));
2247 if (!constraints->valid_hw_formats)
2248 return AVERROR(ENOMEM);
2249
2250 constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN;
2251 constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
2252
2253 return 0;
2254 }
2255
2256 static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req,
2257 VkMemoryPropertyFlagBits req_flags, const void *alloc_extension,
2258 VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
2259 {
2260 VkResult ret;
2261 int index = -1;
2262 VulkanDevicePriv *p = ctx->hwctx;
2263 FFVulkanFunctions *vk = &p->vkctx.vkfn;
2264 AVVulkanDeviceContext *dev_hwctx = &p->p;
2265 VkMemoryAllocateInfo alloc_info = {
2266 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
2267 .pNext = alloc_extension,
2268 .allocationSize = req->size,
2269 };
2270
2271 /* The vulkan spec requires memory types to be sorted in the "optimal"
2272 * order, so the first matching type we find will be the best/fastest one */
2273 for (int i = 0; i < p->mprops.memoryTypeCount; i++) {
2274 const VkMemoryType *type = &p->mprops.memoryTypes[i];
2275
2276 /* The memory type must be supported by the requirements (bitfield) */
2277 if (!(req->memoryTypeBits & (1 << i)))
2278 continue;
2279
2280 /* The memory type flags must include our properties */
2281 if ((type->propertyFlags & req_flags) != req_flags)
2282 continue;
2283
2284 /* The memory type must be large enough */
2285 if (req->size > p->mprops.memoryHeaps[type->heapIndex].size)
2286 continue;
2287
2288 /* Found a suitable memory type */
2289 index = i;
2290 break;
2291 }
2292
2293 if (index < 0) {
2294 av_log(ctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
2295 req_flags);
2296 return AVERROR(EINVAL);
2297 }
2298
2299 alloc_info.memoryTypeIndex = index;
2300
2301 ret = vk->AllocateMemory(dev_hwctx->act_dev, &alloc_info,
2302 dev_hwctx->alloc, mem);
2303 if (ret != VK_SUCCESS) {
2304 av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
2305 ff_vk_ret2str(ret));
2306 return AVERROR(ENOMEM);
2307 }
2308
2309 *mem_flags |= p->mprops.memoryTypes[index].propertyFlags;
2310
2311 return 0;
2312 }
2313
2314 static void vulkan_free_internal(AVVkFrame *f)
2315 {
2316 av_unused AVVkFrameInternal *internal = f->internal;
2317
2318 #if CONFIG_CUDA
2319 if (internal->cuda_fc_ref) {
2320 AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
2321 int planes = av_pix_fmt_count_planes(cuda_fc->sw_format);
2322 AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
2323 AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
2324 AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
2325 CudaFunctions *cu = cu_internal->cuda_dl;
2326
2327 for (int i = 0; i < planes; i++) {
2328 if (internal->cu_sem[i])
2329 CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[i]));
2330 if (internal->cu_mma[i])
2331 CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i]));
2332 if (internal->ext_mem[i])
2333 CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[i]));
2334 #ifdef _WIN32
2335 if (internal->ext_sem_handle[i])
2336 CloseHandle(internal->ext_sem_handle[i]);
2337 if (internal->ext_mem_handle[i])
2338 CloseHandle(internal->ext_mem_handle[i]);
2339 #endif
2340 }
2341
2342 av_buffer_unref(&internal->cuda_fc_ref);
2343 }
2344 #endif
2345
2346 pthread_mutex_destroy(&internal->update_mutex);
2347 av_freep(&f->internal);
2348 }
2349
2350 static void vulkan_frame_free(AVHWFramesContext *hwfc, AVVkFrame *f)
2351 {
2352 VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
2353 AVVulkanDeviceContext *hwctx = &p->p;
2354 FFVulkanFunctions *vk = &p->vkctx.vkfn;
2355 int nb_images = ff_vk_count_images(f);
2356 int nb_sems = 0;
2357
2358 while (nb_sems < FF_ARRAY_ELEMS(f->sem) && f->sem[nb_sems])
2359 nb_sems++;
2360
2361 if (nb_sems) {
2362 VkSemaphoreWaitInfo sem_wait = {
2363 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
2364 .flags = 0x0,
2365 .pSemaphores = f->sem,
2366 .pValues = f->sem_value,
2367 .semaphoreCount = nb_sems,
2368 };
2369
2370 vk->WaitSemaphores(hwctx->act_dev, &sem_wait, UINT64_MAX);
2371 }
2372
2373 vulkan_free_internal(f);
2374
2375 for (int i = 0; i < nb_images; i++) {
2376 vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
2377 vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
2378 vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
2379 }
2380
2381 av_free(f);
2382 }
2383
2384 static void vulkan_frame_free_cb(void *opaque, uint8_t *data)
2385 {
2386 vulkan_frame_free(opaque, (AVVkFrame*)data);
2387 }
2388
2389 static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
2390 void *alloc_pnext, size_t alloc_pnext_stride)
2391 {
2392 int img_cnt = 0, err;
2393 VkResult ret;
2394 AVHWDeviceContext *ctx = hwfc->device_ctx;
2395 VulkanDevicePriv *p = ctx->hwctx;
2396 AVVulkanDeviceContext *hwctx = &p->p;
2397 FFVulkanFunctions *vk = &p->vkctx.vkfn;
2398 VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } };
2399
2400 while (f->img[img_cnt]) {
2401 int use_ded_mem;
2402 VkImageMemoryRequirementsInfo2 req_desc = {
2403 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
2404 .image = f->img[img_cnt],
2405 };
2406 VkMemoryDedicatedAllocateInfo ded_alloc = {
2407 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
2408 .pNext = (void *)(((uint8_t *)alloc_pnext) + img_cnt*alloc_pnext_stride),
2409 };
2410 VkMemoryDedicatedRequirements ded_req = {
2411 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
2412 };
2413 VkMemoryRequirements2 req = {
2414 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
2415 .pNext = &ded_req,
2416 };
2417
2418 vk->GetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req);
2419
2420 if (f->tiling == VK_IMAGE_TILING_LINEAR)
2421 req.memoryRequirements.size = FFALIGN(req.memoryRequirements.size,
2422 p->props.properties.limits.minMemoryMapAlignment);
2423
2424 /* In case the implementation prefers/requires dedicated allocation */
2425 use_ded_mem = ded_req.prefersDedicatedAllocation |
2426 ded_req.requiresDedicatedAllocation;
2427 if (use_ded_mem)
2428 ded_alloc.image = f->img[img_cnt];
2429
2430 /* Allocate memory */
2431 if ((err = alloc_mem(ctx, &req.memoryRequirements,
2432 f->tiling == VK_IMAGE_TILING_LINEAR ?
2433 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
2434 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
2435 use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
2436 &f->flags, &f->mem[img_cnt])))
2437 return err;
2438
2439 f->size[img_cnt] = req.memoryRequirements.size;
2440 bind_info[img_cnt].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
2441 bind_info[img_cnt].image = f->img[img_cnt];
2442 bind_info[img_cnt].memory = f->mem[img_cnt];
2443
2444 img_cnt++;
2445 }
2446
2447 /* Bind the allocated memory to the images */
2448 ret = vk->BindImageMemory2(hwctx->act_dev, img_cnt, bind_info);
2449 if (ret != VK_SUCCESS) {
2450 av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
2451 ff_vk_ret2str(ret));
2452 return AVERROR_EXTERNAL;
2453 }
2454
2455 return 0;
2456 }
2457
2458 enum PrepMode {
2459 PREP_MODE_GENERAL,
2460 PREP_MODE_WRITE,
2461 PREP_MODE_EXTERNAL_EXPORT,
2462 PREP_MODE_EXTERNAL_IMPORT,
2463 PREP_MODE_DECODING_DST,
2464 PREP_MODE_DECODING_DPB,
2465 PREP_MODE_ENCODING_DPB,
2466 };
2467
2468 static void switch_new_props(enum PrepMode pmode, VkImageLayout *new_layout,
2469 VkAccessFlags2 *new_access)
2470 {
2471 switch (pmode) {
2472 case PREP_MODE_GENERAL:
2473 *new_layout = VK_IMAGE_LAYOUT_GENERAL;
2474 *new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
2475 break;
2476 case PREP_MODE_WRITE:
2477 *new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
2478 *new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
2479 break;
2480 case PREP_MODE_EXTERNAL_IMPORT:
2481 *new_layout = VK_IMAGE_LAYOUT_GENERAL;
2482 *new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
2483 break;
2484 case PREP_MODE_EXTERNAL_EXPORT:
2485 *new_layout = VK_IMAGE_LAYOUT_GENERAL;
2486 *new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
2487 break;
2488 case PREP_MODE_DECODING_DST:
2489 *new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR;
2490 *new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
2491 break;
2492 case PREP_MODE_DECODING_DPB:
2493 *new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR;
2494 *new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
2495 break;
2496 case PREP_MODE_ENCODING_DPB:
2497 *new_layout = VK_IMAGE_LAYOUT_VIDEO_ENCODE_DPB_KHR;
2498 *new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
2499 break;
2500 }
2501 }
2502
2503 static int switch_layout(AVHWFramesContext *hwfc, FFVkExecPool *ectx,
2504 AVVkFrame *frame, enum PrepMode pmode)
2505 {
2506 int err;
2507 VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
2508 FFVulkanFunctions *vk = &p->vkctx.vkfn;
2509 VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
2510 int nb_img_bar = 0;
2511
2512 VkImageLayout new_layout;
2513 VkAccessFlags2 new_access;
2514 switch_new_props(pmode, &new_layout, &new_access);
2515
2516 uint32_t dst_qf = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0];
2517 VkPipelineStageFlagBits2 src_stage = VK_PIPELINE_STAGE_2_NONE;
2518 if (pmode == PREP_MODE_EXTERNAL_EXPORT) {
2519 dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
2520 src_stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
2521 }
2522
2523 /* This is dirty - but it works. The vulkan.c dependency system doesn't
2524 * free non-refcounted frames, and non-refcounted hardware frames cannot
2525 * happen anywhere outside of here. */
2526 AVBufferRef tmp_ref = {
2527 .data = (uint8_t *)hwfc,
2528 };
2529 AVFrame tmp_frame = {
2530 .data[0] = (uint8_t *)frame,
2531 .hw_frames_ctx = &tmp_ref,
2532 };
2533
2534 VkCommandBuffer cmd_buf;
2535 FFVkExecContext *exec = ff_vk_exec_get(&p->vkctx, ectx);
2536 cmd_buf = exec->buf;
2537 ff_vk_exec_start(&p->vkctx, exec);
2538
2539 err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, &tmp_frame,
2540 VK_PIPELINE_STAGE_2_NONE,
2541 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT);
2542 if (err < 0)
2543 return err;
2544
2545 ff_vk_frame_barrier(&p->vkctx, exec, &tmp_frame, img_bar, &nb_img_bar,
2546 src_stage,
2547 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
2548 new_access, new_layout, dst_qf);
2549
2550 vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
2551 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
2552 .pImageMemoryBarriers = img_bar,
2553 .imageMemoryBarrierCount = nb_img_bar,
2554 });
2555
2556 err = ff_vk_exec_submit(&p->vkctx, exec);
2557 if (err < 0)
2558 return err;
2559
2560 /* We can do this because there are no real dependencies */
2561 ff_vk_exec_discard_deps(&p->vkctx, exec);
2562
2563 return 0;
2564 }
2565
2566 static int switch_layout_host(AVHWFramesContext *hwfc, FFVkExecPool *ectx,
2567 AVVkFrame *frame, enum PrepMode pmode)
2568 {
2569 VkResult ret;
2570 VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
2571 FFVulkanFunctions *vk = &p->vkctx.vkfn;
2572 VkHostImageLayoutTransitionInfo layout_change[AV_NUM_DATA_POINTERS];
2573 int nb_images = ff_vk_count_images(frame);
2574
2575 VkImageLayout new_layout;
2576 VkAccessFlags2 new_access;
2577 switch_new_props(pmode, &new_layout, &new_access);
2578
2579 int i;
2580 for (i = 0; i < p->vkctx.host_image_props.copyDstLayoutCount; i++) {
2581 if (p->vkctx.host_image_props.pCopyDstLayouts[i] == new_layout)
2582 break;
2583 }
2584 if (i == p->vkctx.host_image_props.copyDstLayoutCount)
2585 return AVERROR(ENOTSUP);
2586
2587 for (i = 0; i < nb_images; i++) {
2588 layout_change[i] = (VkHostImageLayoutTransitionInfo) {
2589 .sType = VK_STRUCTURE_TYPE_HOST_IMAGE_LAYOUT_TRANSITION_INFO,
2590 .image = frame->img[i],
2591 .oldLayout = frame->layout[i],
2592 .newLayout = new_layout,
2593 .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
2594 .subresourceRange.layerCount = 1,
2595 .subresourceRange.levelCount = 1,
2596 };
2597 frame->layout[i] = new_layout;
2598 }
2599
2600 ret = vk->TransitionImageLayoutEXT(p->vkctx.hwctx->act_dev,
2601 nb_images, layout_change);
2602 if (ret != VK_SUCCESS) {
2603 av_log(hwfc, AV_LOG_ERROR, "Unable to prepare frame: %s\n",
2604 ff_vk_ret2str(ret));
2605 return AVERROR_EXTERNAL;
2606 }
2607
2608 return 0;
2609 }
2610
2611 static int prepare_frame(AVHWFramesContext *hwfc, FFVkExecPool *ectx,
2612 AVVkFrame *frame, enum PrepMode pmode)
2613 {
2614 int err = 0;
2615 AVVulkanFramesContext *hwfc_vk = hwfc->hwctx;
2616 if (hwfc_vk->usage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT &&
2617 (pmode != PREP_MODE_EXTERNAL_EXPORT) &&
2618 (pmode != PREP_MODE_EXTERNAL_IMPORT))
2619 err = switch_layout_host(hwfc, ectx, frame, pmode);
2620
2621 if (err != AVERROR(ENOTSUP))
2622 return err;
2623
2624 return switch_layout(hwfc, ectx, frame, pmode);
2625 }
2626
2627 static inline void get_plane_wh(uint32_t *w, uint32_t *h, enum AVPixelFormat format,
2628 int frame_w, int frame_h, int plane)
2629 {
2630 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
2631
2632 /* Currently always true unless gray + alpha support is added */
2633 if (!plane || (plane == 3) || desc->flags & AV_PIX_FMT_FLAG_RGB ||
2634 !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) {
2635 *w = frame_w;
2636 *h = frame_h;
2637 return;
2638 }
2639
2640 *w = AV_CEIL_RSHIFT(frame_w, desc->log2_chroma_w);
2641 *h = AV_CEIL_RSHIFT(frame_h, desc->log2_chroma_h);
2642 }
2643
2644 static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
2645 VkImageTiling tiling, VkImageUsageFlagBits usage,
2646 VkImageCreateFlags flags, int nb_layers,
2647 void *create_pnext)
2648 {
2649 int err;
2650 VkResult ret;
2651 AVVulkanFramesContext *hwfc_vk = hwfc->hwctx;
2652 AVHWDeviceContext *ctx = hwfc->device_ctx;
2653 VulkanDevicePriv *p = ctx->hwctx;
2654 AVVulkanDeviceContext *hwctx = &p->p;
2655 FFVulkanFunctions *vk = &p->vkctx.vkfn;
2656 AVVkFrame *f;
2657
2658 VkSemaphoreTypeCreateInfo sem_type_info = {
2659 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
2660 .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
2661 .initialValue = 0,
2662 };
2663 VkSemaphoreCreateInfo sem_spawn = {
2664 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
2665 .pNext = &sem_type_info,
2666 };
2667
2668 VkExportSemaphoreCreateInfo ext_sem_info_opaque = {
2669 .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
2670 #ifdef _WIN32
2671 .handleTypes = IsWindows8OrGreater()
2672 ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
2673 : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
2674 #else
2675 .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
2676 #endif
2677 };
2678
2679 /* Check if exporting is supported before chaining any structs */
2680 if (p->ext_sem_props_opaque.externalSemaphoreFeatures & VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT) {
2681 if (p->vkctx.extensions & (FF_VK_EXT_EXTERNAL_WIN32_SEM | FF_VK_EXT_EXTERNAL_FD_SEM))
2682 ff_vk_link_struct(&sem_type_info, &ext_sem_info_opaque);
2683 }
2684
2685 f = av_vk_frame_alloc();
2686 if (!f) {
2687 av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
2688 return AVERROR(ENOMEM);
2689 }
2690
2691 // TODO: check width and height for alignment in case of multiplanar (must be mod-2 if subsampled)
2692
2693 /* Create the images */
2694 for (int i = 0; (hwfc_vk->format[i] != VK_FORMAT_UNDEFINED); i++) {
2695 VkImageCreateInfo create_info = {
2696 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2697 .pNext = create_pnext,
2698 .imageType = VK_IMAGE_TYPE_2D,
2699 .format = hwfc_vk->format[i],
2700 .extent.depth = 1,
2701 .mipLevels = 1,
2702 .arrayLayers = nb_layers,
2703 .flags = flags,
2704 .tiling = tiling,
2705 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
2706 .usage = usage,
2707 .samples = VK_SAMPLE_COUNT_1_BIT,
2708 .pQueueFamilyIndices = p->img_qfs,
2709 .queueFamilyIndexCount = p->nb_img_qfs,
2710 .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
2711 VK_SHARING_MODE_EXCLUSIVE,
2712 };
2713
2714 get_plane_wh(&create_info.extent.width, &create_info.extent.height,
2715 hwfc->sw_format, hwfc->width, hwfc->height, i);
2716
2717 ret = vk->CreateImage(hwctx->act_dev, &create_info,
2718 hwctx->alloc, &f->img[i]);
2719 if (ret != VK_SUCCESS) {
2720 av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
2721 ff_vk_ret2str(ret));
2722 err = AVERROR(EINVAL);
2723 goto fail;
2724 }
2725
2726 /* Create semaphore */
2727 ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn,
2728 hwctx->alloc, &f->sem[i]);
2729 if (ret != VK_SUCCESS) {
2730 av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
2731 ff_vk_ret2str(ret));
2732 err = AVERROR_EXTERNAL;
2733 goto fail;
2734 }
2735
2736 f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0];
2737 f->layout[i] = create_info.initialLayout;
2738 f->access[i] = 0x0;
2739 f->sem_value[i] = 0;
2740 }
2741
2742 f->flags = 0x0;
2743 f->tiling = tiling;
2744
2745 *frame = f;
2746 return 0;
2747
2748 fail:
2749 vulkan_frame_free(hwfc, f);
2750 return err;
2751 }
2752
2753 /* Checks if an export flag is enabled, and if it is ORs it with *iexp */
2754 static void try_export_flags(AVHWFramesContext *hwfc,
2755 VkExternalMemoryHandleTypeFlags *comp_handle_types,
2756 VkExternalMemoryHandleTypeFlags *iexp,
2757 VkExternalMemoryHandleTypeFlagBits exp)
2758 {
2759 VkResult ret;
2760 AVVulkanFramesContext *hwctx = hwfc->hwctx;
2761 VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
2762 AVVulkanDeviceContext *dev_hwctx = &p->p;
2763 FFVulkanFunctions *vk = &p->vkctx.vkfn;
2764
2765 const VkImageDrmFormatModifierListCreateInfoEXT *drm_mod_info =
2766 ff_vk_find_struct(hwctx->create_pnext,
2767 VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
2768 int has_mods = hwctx->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && drm_mod_info;
2769 int nb_mods;
2770
2771 VkExternalImageFormatProperties eprops = {
2772 .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
2773 };
2774 VkImageFormatProperties2 props = {
2775 .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
2776 .pNext = &eprops,
2777 };
2778 VkPhysicalDeviceImageDrmFormatModifierInfoEXT phy_dev_mod_info = {
2779 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
2780 .pNext = NULL,
2781 .pQueueFamilyIndices = p->img_qfs,
2782 .queueFamilyIndexCount = p->nb_img_qfs,
2783 .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
2784 VK_SHARING_MODE_EXCLUSIVE,
2785 };
2786 VkPhysicalDeviceExternalImageFormatInfo enext = {
2787 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
2788 .handleType = exp,
2789 .pNext = has_mods ? &phy_dev_mod_info : NULL,
2790 };
2791 VkPhysicalDeviceImageFormatInfo2 pinfo = {
2792 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
2793 .pNext = !exp ? NULL : &enext,
2794 .format = vk_find_format_entry(hwfc->sw_format)->vkf,
2795 .type = VK_IMAGE_TYPE_2D,
2796 .tiling = hwctx->tiling,
2797 .usage = hwctx->usage,
2798 .flags = VK_IMAGE_CREATE_ALIAS_BIT,
2799 };
2800
2801 nb_mods = has_mods ? drm_mod_info->drmFormatModifierCount : 1;
2802 for (int i = 0; i < nb_mods; i++) {
2803 if (has_mods)
2804 phy_dev_mod_info.drmFormatModifier = drm_mod_info->pDrmFormatModifiers[i];
2805
2806 ret = vk->GetPhysicalDeviceImageFormatProperties2(dev_hwctx->phys_dev,
2807 &pinfo, &props);
2808
2809 if (ret == VK_SUCCESS) {
2810 *iexp |= exp;
2811 *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes;
2812 }
2813 }
2814 }
2815
2816 static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size)
2817 {
2818 int err;
2819 AVVkFrame *f;
2820 AVBufferRef *avbuf = NULL;
2821 AVHWFramesContext *hwfc = opaque;
2822 VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
2823 VulkanFramesPriv *fp = hwfc->hwctx;
2824 AVVulkanFramesContext *hwctx = &fp->p;
2825 VkExternalMemoryHandleTypeFlags e = 0x0;
2826 VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS];
2827
2828 VkExternalMemoryImageCreateInfo eiinfo = {
2829 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
2830 .pNext = hwctx->create_pnext,
2831 };
2832
2833 #ifdef _WIN32
2834 if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY)
2835 try_export_flags(hwfc, &eiinfo.handleTypes, &e, IsWindows8OrGreater()
2836 ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
2837 : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT);
2838 #else
2839 if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY)
2840 try_export_flags(hwfc, &eiinfo.handleTypes, &e,
2841 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
2842
2843 if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_DMABUF_MEMORY &&
2844 hwctx->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
2845 try_export_flags(hwfc, &eiinfo.handleTypes, &e,
2846 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2847 #endif
2848
2849 for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) {
2850 eminfo[i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
2851 eminfo[i].pNext = hwctx->alloc_pnext[i];
2852 eminfo[i].handleTypes = e;
2853 }
2854
2855 err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags,
2856 hwctx->nb_layers,
2857 eiinfo.handleTypes ? &eiinfo : hwctx->create_pnext);
2858 if (err)
2859 return NULL;
2860
2861 err = alloc_bind_mem(hwfc, f, eminfo, sizeof(*eminfo));
2862 if (err)
2863 goto fail;
2864
2865 if ( (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) &&
2866 !(hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR))
2867 err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_DECODING_DPB);
2868 else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)
2869 err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_DECODING_DST);
2870 else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR)
2871 err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_ENCODING_DPB);
2872 else if (hwctx->usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT)
2873 err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_WRITE);
2874 else
2875 err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_GENERAL);
2876 if (err)
2877 goto fail;
2878
2879 avbuf = av_buffer_create((uint8_t *)f, sizeof(AVVkFrame),
2880 vulkan_frame_free_cb, hwfc, 0);
2881 if (!avbuf)
2882 goto fail;
2883
2884 return avbuf;
2885
2886 fail:
2887 vulkan_frame_free(hwfc, f);
2888 return NULL;
2889 }
2890
2891 static void lock_frame(AVHWFramesContext *fc, AVVkFrame *vkf)
2892 {
2893 pthread_mutex_lock(&vkf->internal->update_mutex);
2894 }
2895
2896 static void unlock_frame(AVHWFramesContext *fc, AVVkFrame *vkf)
2897 {
2898 pthread_mutex_unlock(&vkf->internal->update_mutex);
2899 }
2900
2901 static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
2902 {
2903 VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
2904 VulkanFramesPriv *fp = hwfc->hwctx;
2905
2906 if (fp->modifier_info) {
2907 if (fp->modifier_info->pDrmFormatModifiers)
2908 av_freep(&fp->modifier_info->pDrmFormatModifiers);
2909 av_freep(&fp->modifier_info);
2910 }
2911
2912 ff_vk_exec_pool_free(&p->vkctx, &fp->compute_exec);
2913 ff_vk_exec_pool_free(&p->vkctx, &fp->upload_exec);
2914 ff_vk_exec_pool_free(&p->vkctx, &fp->download_exec);
2915
2916 av_buffer_pool_uninit(&fp->tmp);
2917 }
2918
2919 static int vulkan_frames_init(AVHWFramesContext *hwfc)
2920 {
2921 int err;
2922 AVVkFrame *f;
2923 VulkanFramesPriv *fp = hwfc->hwctx;
2924 AVVulkanFramesContext *hwctx = &fp->p;
2925 VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
2926 AVVulkanDeviceContext *dev_hwctx = &p->p;
2927 VkImageUsageFlags supported_usage;
2928 FFVulkanFunctions *vk = &p->vkctx.vkfn;
2929 const struct FFVkFormatEntry *fmt;
2930 int disable_multiplane = p->disable_multiplane ||
2931 (hwctx->flags & AV_VK_FRAME_FLAG_DISABLE_MULTIPLANE);
2932
2933 /* Defaults */
2934 if (!hwctx->nb_layers)
2935 hwctx->nb_layers = 1;
2936
2937 /* VK_IMAGE_TILING_OPTIMAL == 0, can't check for it really */
2938 if (p->use_linear_images &&
2939 (hwctx->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT))
2940 hwctx->tiling = VK_IMAGE_TILING_LINEAR;
2941
2942
2943 fmt = vk_find_format_entry(hwfc->sw_format);
2944 if (!fmt) {
2945 av_log(hwfc, AV_LOG_ERROR, "Unsupported pixel format: %s!\n",
2946 av_get_pix_fmt_name(hwfc->sw_format));
2947 return AVERROR(EINVAL);
2948 }
2949
2950 if (hwctx->format[0] != VK_FORMAT_UNDEFINED) {
2951 if (hwctx->format[0] != fmt->vkf) {
2952 for (int i = 0; i < fmt->nb_images_fallback; i++) {
2953 if (hwctx->format[i] != fmt->fallback[i]) {
2954 av_log(hwfc, AV_LOG_ERROR, "Incompatible Vulkan format given "
2955 "for the current sw_format %s!\n",
2956 av_get_pix_fmt_name(hwfc->sw_format));
2957 return AVERROR(EINVAL);
2958 }
2959 }
2960 }
2961
2962 /* Check if the sw_format itself is supported */
2963 err = vkfmt_from_pixfmt2(hwfc->device_ctx, hwfc->sw_format,
2964 hwctx->tiling, NULL,
2965 NULL, NULL, &supported_usage, 0,
2966 !hwctx->usage ||
2967 (hwctx->usage & VK_IMAGE_USAGE_STORAGE_BIT));
2968 if (err < 0) {
2969 av_log(hwfc, AV_LOG_ERROR, "Unsupported sw format: %s!\n",
2970 av_get_pix_fmt_name(hwfc->sw_format));
2971 return AVERROR(EINVAL);
2972 }
2973 } else {
2974 err = vkfmt_from_pixfmt2(hwfc->device_ctx, hwfc->sw_format,
2975 hwctx->tiling, hwctx->format, NULL,
2976 NULL, &supported_usage,
2977 disable_multiplane,
2978 !hwctx->usage ||
2979 (hwctx->usage & VK_IMAGE_USAGE_STORAGE_BIT));
2980 if (err < 0)
2981 return err;
2982 }
2983
2984 /* Image usage flags */
2985 hwctx->usage |= supported_usage & (VK_IMAGE_USAGE_TRANSFER_DST_BIT |
2986 VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
2987 VK_IMAGE_USAGE_STORAGE_BIT |
2988 VK_IMAGE_USAGE_SAMPLED_BIT);
2989
2990 if (p->vkctx.extensions & FF_VK_EXT_HOST_IMAGE_COPY &&
2991 !(p->dprops.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY))
2992 hwctx->usage |= supported_usage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT;
2993
2994 /* Enables encoding of images, if supported by format and extensions */
2995 if ((supported_usage & VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR) &&
2996 (p->vkctx.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE |
2997 FF_VK_EXT_VIDEO_MAINTENANCE_1)))
2998 hwctx->usage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR;
2999
3000 /* Image creation flags.
3001 * Only fill them in automatically if the image is not going to be used as
3002 * a DPB-only image, and we have SAMPLED/STORAGE bits set. */
3003 if (!hwctx->img_flags) {
3004 int is_lone_dpb = ((hwctx->usage & VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR) ||
3005 ((hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) &&
3006 !(hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)));
3007 int sampleable = hwctx->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
3008 VK_IMAGE_USAGE_STORAGE_BIT);
3009 hwctx->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
3010 if (sampleable && !is_lone_dpb) {
3011 hwctx->img_flags |= VK_IMAGE_CREATE_ALIAS_BIT;
3012 if ((fmt->vk_planes > 1) && (hwctx->format[0] == fmt->vkf))
3013 hwctx->img_flags |= VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
3014 }
3015 }
3016
3017 /* If the image has an ENCODE_SRC usage, and the maintenance1
3018 * extension is supported, check if it has a profile list.
3019 * If there's no profile list, or it has no encode operations,
3020 * then allow creating the image with no specific profile. */
3021 if ((hwctx->usage & VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR) &&
3022 (p->vkctx.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE |
3023 FF_VK_EXT_VIDEO_MAINTENANCE_1))) {
3024 const VkVideoProfileListInfoKHR *pl;
3025 pl = ff_vk_find_struct(hwctx->create_pnext, VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR);
3026 if (!pl) {
3027 hwctx->img_flags |= VK_IMAGE_CREATE_VIDEO_PROFILE_INDEPENDENT_BIT_KHR;
3028 } else {
3029 uint32_t i;
3030 for (i = 0; i < pl->profileCount; i++) {
3031 /* Video ops start at exactly 0x00010000 */
3032 if (pl->pProfiles[i].videoCodecOperation & 0xFFFF0000)
3033 break;
3034 }
3035 if (i == pl->profileCount)
3036 hwctx->img_flags |= VK_IMAGE_CREATE_VIDEO_PROFILE_INDEPENDENT_BIT_KHR;
3037 }
3038 }
3039
3040 if (!hwctx->lock_frame)
3041 hwctx->lock_frame = lock_frame;
3042
3043 if (!hwctx->unlock_frame)
3044 hwctx->unlock_frame = unlock_frame;
3045
3046 err = ff_vk_exec_pool_init(&p->vkctx, p->compute_qf, &fp->compute_exec,
3047 p->compute_qf->num, 0, 0, 0, NULL);
3048 if (err)
3049 return err;
3050
3051 err = ff_vk_exec_pool_init(&p->vkctx, p->transfer_qf, &fp->upload_exec,
3052 p->transfer_qf->num*2, 0, 0, 0, NULL);
3053 if (err)
3054 return err;
3055
3056 err = ff_vk_exec_pool_init(&p->vkctx, p->transfer_qf, &fp->download_exec,
3057 p->transfer_qf->num, 0, 0, 0, NULL);
3058 if (err)
3059 return err;
3060
3061 /* Test to see if allocation will fail */
3062 err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags,
3063 hwctx->nb_layers, hwctx->create_pnext);
3064 if (err)
3065 return err;
3066
3067 /* Collect `VkDrmFormatModifierPropertiesEXT` for each plane. Required for DRM export. */
3068 if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS && hwctx->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
3069 VkImageDrmFormatModifierPropertiesEXT drm_mod = {
3070 .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
3071 };
3072 err = vk->GetImageDrmFormatModifierPropertiesEXT(dev_hwctx->act_dev, f->img[0],
3073 &drm_mod);
3074 if (err != VK_SUCCESS) {
3075 av_log(hwfc, AV_LOG_ERROR, "Failed to get image DRM format modifier properties");
3076 vulkan_frame_free(hwfc, f);
3077 return AVERROR_EXTERNAL;
3078 }
3079 for (int i = 0; i < fmt->vk_planes; ++i) {
3080 VkDrmFormatModifierPropertiesListEXT modp;
3081 VkFormatProperties2 fmtp;
3082 VkDrmFormatModifierPropertiesEXT *mod_props = NULL;
3083
3084 modp = (VkDrmFormatModifierPropertiesListEXT) {
3085 .sType = VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT,
3086 };
3087 fmtp = (VkFormatProperties2) {
3088 .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
3089 .pNext = &modp,
3090 };
3091
3092 /* query drmFormatModifierCount by keeping pDrmFormatModifierProperties NULL */
3093 vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->phys_dev, fmt->fallback[i], &fmtp);
3094
3095 modp.pDrmFormatModifierProperties =
3096 av_calloc(modp.drmFormatModifierCount, sizeof(*modp.pDrmFormatModifierProperties));
3097 if (!modp.pDrmFormatModifierProperties) {
3098 vulkan_frame_free(hwfc, f);
3099 return AVERROR(ENOMEM);
3100 }
3101 vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->phys_dev, fmt->fallback[i], &fmtp);
3102
3103 for (uint32_t i = 0; i < modp.drmFormatModifierCount; ++i) {
3104 VkDrmFormatModifierPropertiesEXT *m = &modp.pDrmFormatModifierProperties[i];
3105 if (m->drmFormatModifier == drm_mod.drmFormatModifier) {
3106 mod_props = m;
3107 break;
3108 }
3109 }
3110
3111 if (mod_props == NULL) {
3112 av_log(hwfc, AV_LOG_ERROR, "No DRM format modifier properties found for modifier 0x%016"PRIx64"\n",
3113 drm_mod.drmFormatModifier);
3114 av_free(modp.pDrmFormatModifierProperties);
3115 vulkan_frame_free(hwfc, f);
3116 return AVERROR_EXTERNAL;
3117 }
3118
3119 fp->drm_format_modifier_properties[i] = *mod_props;
3120 av_free(modp.pDrmFormatModifierProperties);
3121 }
3122 }
3123
3124 vulkan_frame_free(hwfc, f);
3125
3126 /* If user did not specify a pool, hwfc->pool will be set to the internal one
3127 * in hwcontext.c just after this gets called */
3128 if (!hwfc->pool) {
3129 ffhwframesctx(hwfc)->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame),
3130 hwfc, vulkan_pool_alloc,
3131 NULL);
3132 if (!ffhwframesctx(hwfc)->pool_internal)
3133 return AVERROR(ENOMEM);
3134 }
3135
3136 return 0;
3137 }
3138
3139 static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
3140 {
3141 frame->buf[0] = av_buffer_pool_get(hwfc->pool);
3142 if (!frame->buf[0])
3143 return AVERROR(ENOMEM);
3144
3145 frame->data[0] = frame->buf[0]->data;
3146 frame->format = AV_PIX_FMT_VULKAN;
3147 frame->width = hwfc->width;
3148 frame->height = hwfc->height;
3149
3150 return 0;
3151 }
3152
3153 static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc,
3154 enum AVHWFrameTransferDirection dir,
3155 enum AVPixelFormat **formats)
3156 {
3157 enum AVPixelFormat *fmts;
3158 int n = 2;
3159
3160 #if CONFIG_CUDA
3161 n++;
3162 #endif
3163 fmts = av_malloc_array(n, sizeof(*fmts));
3164 if (!fmts)
3165 return AVERROR(ENOMEM);
3166
3167 n = 0;
3168 fmts[n++] = hwfc->sw_format;
3169 #if CONFIG_CUDA
3170 fmts[n++] = AV_PIX_FMT_CUDA;
3171 #endif
3172 fmts[n++] = AV_PIX_FMT_NONE;
3173
3174 *formats = fmts;
3175 return 0;
3176 }
3177
3178 #if CONFIG_LIBDRM
3179 static void vulkan_unmap_from_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
3180 {
3181 vulkan_frame_free(hwfc, hwmap->priv);
3182 }
3183
3184 static const struct {
3185 uint32_t drm_fourcc;
3186 VkFormat vk_format;
3187 } vulkan_drm_format_map[] = {
3188 { DRM_FORMAT_R8, VK_FORMAT_R8_UNORM },
3189 { DRM_FORMAT_R16, VK_FORMAT_R16_UNORM },
3190 { DRM_FORMAT_GR88, VK_FORMAT_R8G8_UNORM },
3191 { DRM_FORMAT_RG88, VK_FORMAT_R8G8_UNORM },
3192 { DRM_FORMAT_GR1616, VK_FORMAT_R16G16_UNORM },
3193 { DRM_FORMAT_RG1616, VK_FORMAT_R16G16_UNORM },
3194 { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM },
3195 { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM },
3196 { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM },
3197 { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM },
3198 { DRM_FORMAT_ARGB2101010, VK_FORMAT_A2B10G10R10_UNORM_PACK32 },
3199 { DRM_FORMAT_ABGR2101010, VK_FORMAT_A2R10G10B10_UNORM_PACK32 },
3200 { DRM_FORMAT_XRGB2101010, VK_FORMAT_A2B10G10R10_UNORM_PACK32 },
3201 { DRM_FORMAT_XBGR2101010, VK_FORMAT_A2R10G10B10_UNORM_PACK32 },
3202
3203 // All these DRM_FORMATs were added in the same libdrm commit.
3204 #ifdef DRM_FORMAT_XYUV8888
3205 { DRM_FORMAT_XYUV8888, VK_FORMAT_R8G8B8A8_UNORM },
3206 { DRM_FORMAT_XVYU2101010, VK_FORMAT_A2R10G10B10_UNORM_PACK32 } ,
3207 { DRM_FORMAT_XVYU12_16161616, VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16 } ,
3208 { DRM_FORMAT_XVYU16161616, VK_FORMAT_R16G16B16A16_UNORM } ,
3209 #endif
3210 };
3211
3212 static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc)
3213 {
3214 for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
3215 if (vulkan_drm_format_map[i].drm_fourcc == drm_fourcc)
3216 return vulkan_drm_format_map[i].vk_format;
3217 return VK_FORMAT_UNDEFINED;
3218 }
3219
3220 static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame,
3221 const AVFrame *src, int flags)
3222 {
3223 int err = 0;
3224 VkResult ret;
3225 AVVkFrame *f;
3226 int bind_counts = 0;
3227 AVHWDeviceContext *ctx = hwfc->device_ctx;
3228 VulkanDevicePriv *p = ctx->hwctx;
3229 AVVulkanDeviceContext *hwctx = &p->p;
3230 FFVulkanFunctions *vk = &p->vkctx.vkfn;
3231 const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0];
3232 VkBindImageMemoryInfo bind_info[AV_DRM_MAX_PLANES];
3233 VkBindImagePlaneMemoryInfo plane_info[AV_DRM_MAX_PLANES];
3234
3235 for (int i = 0; i < desc->nb_layers; i++) {
3236 if (drm_to_vulkan_fmt(desc->layers[i].format) == VK_FORMAT_UNDEFINED) {
3237 av_log(ctx, AV_LOG_ERROR, "Unsupported DMABUF layer format %#08x!\n",
3238 desc->layers[i].format);
3239 return AVERROR(EINVAL);
3240 }
3241 }
3242
3243 if (!(f = av_vk_frame_alloc())) {
3244 av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
3245 err = AVERROR(ENOMEM);
3246 goto fail;
3247 }
3248
3249 f->tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT;
3250
3251 for (int i = 0; i < desc->nb_layers; i++) {
3252 const int planes = desc->layers[i].nb_planes;
3253
3254 /* Semaphore */
3255 VkSemaphoreTypeCreateInfo sem_type_info = {
3256 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
3257 .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
3258 .initialValue = 0,
3259 };
3260 VkSemaphoreCreateInfo sem_spawn = {
3261 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
3262 .pNext = &sem_type_info,
3263 };
3264
3265 /* Image creation */
3266 VkSubresourceLayout ext_img_layouts[AV_DRM_MAX_PLANES];
3267 VkImageDrmFormatModifierExplicitCreateInfoEXT ext_img_mod_spec = {
3268 .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
3269 .drmFormatModifier = desc->objects[0].format_modifier,
3270 .drmFormatModifierPlaneCount = planes,
3271 .pPlaneLayouts = (const VkSubresourceLayout *)&ext_img_layouts,
3272 };
3273 VkExternalMemoryImageCreateInfo ext_img_spec = {
3274 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
3275 .pNext = &ext_img_mod_spec,
3276 .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
3277 };
3278 VkImageCreateInfo create_info = {
3279 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
3280 .pNext = &ext_img_spec,
3281 .imageType = VK_IMAGE_TYPE_2D,
3282 .format = drm_to_vulkan_fmt(desc->layers[i].format),
3283 .extent.depth = 1,
3284 .mipLevels = 1,
3285 .arrayLayers = 1,
3286 .flags = 0x0,
3287 .tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT,
3288 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */
3289 .usage = 0x0, /* filled in below */
3290 .samples = VK_SAMPLE_COUNT_1_BIT,
3291 .pQueueFamilyIndices = p->img_qfs,
3292 .queueFamilyIndexCount = p->nb_img_qfs,
3293 .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
3294 VK_SHARING_MODE_EXCLUSIVE,
3295 };
3296
3297 /* Image format verification */
3298 VkExternalImageFormatProperties ext_props = {
3299 .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
3300 };
3301 VkImageFormatProperties2 props_ret = {
3302 .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
3303 .pNext = &ext_props,
3304 };
3305 VkPhysicalDeviceImageDrmFormatModifierInfoEXT props_drm_mod = {
3306 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
3307 .drmFormatModifier = ext_img_mod_spec.drmFormatModifier,
3308 .pQueueFamilyIndices = create_info.pQueueFamilyIndices,
3309 .queueFamilyIndexCount = create_info.queueFamilyIndexCount,
3310 .sharingMode = create_info.sharingMode,
3311 };
3312 VkPhysicalDeviceExternalImageFormatInfo props_ext = {
3313 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
3314 .pNext = &props_drm_mod,
3315 .handleType = ext_img_spec.handleTypes,
3316 };
3317 VkPhysicalDeviceImageFormatInfo2 fmt_props;
3318
3319 if (flags & AV_HWFRAME_MAP_READ)
3320 create_info.usage |= VK_IMAGE_USAGE_SAMPLED_BIT |
3321 VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
3322 if (flags & AV_HWFRAME_MAP_WRITE)
3323 create_info.usage |= VK_IMAGE_USAGE_STORAGE_BIT |
3324 VK_IMAGE_USAGE_TRANSFER_DST_BIT;
3325
3326 fmt_props = (VkPhysicalDeviceImageFormatInfo2) {
3327 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
3328 .pNext = &props_ext,
3329 .format = create_info.format,
3330 .type = create_info.imageType,
3331 .tiling = create_info.tiling,
3332 .usage = create_info.usage,
3333 .flags = create_info.flags,
3334 };
3335
3336 /* Check if importing is possible for this combination of parameters */
3337 ret = vk->GetPhysicalDeviceImageFormatProperties2(hwctx->phys_dev,
3338 &fmt_props, &props_ret);
3339 if (ret != VK_SUCCESS) {
3340 av_log(ctx, AV_LOG_ERROR, "Cannot map DRM frame to Vulkan: %s\n",
3341 ff_vk_ret2str(ret));
3342 err = AVERROR_EXTERNAL;
3343 goto fail;
3344 }
3345
3346 /* Set the image width/height */
3347 get_plane_wh(&create_info.extent.width, &create_info.extent.height,
3348 hwfc->sw_format, src->width, src->height, i);
3349
3350 /* Set the subresource layout based on the layer properties */
3351 for (int j = 0; j < planes; j++) {
3352 ext_img_layouts[j].offset = desc->layers[i].planes[j].offset;
3353 ext_img_layouts[j].rowPitch = desc->layers[i].planes[j].pitch;
3354 ext_img_layouts[j].size = 0; /* The specs say so for all 3 */
3355 ext_img_layouts[j].arrayPitch = 0;
3356 ext_img_layouts[j].depthPitch = 0;
3357 }
3358
3359 /* Create image */
3360 ret = vk->CreateImage(hwctx->act_dev, &create_info,
3361 hwctx->alloc, &f->img[i]);
3362 if (ret != VK_SUCCESS) {
3363 av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
3364 ff_vk_ret2str(ret));
3365 err = AVERROR(EINVAL);
3366 goto fail;
3367 }
3368
3369 ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn,
3370 hwctx->alloc, &f->sem[i]);
3371 if (ret != VK_SUCCESS) {
3372 av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
3373 ff_vk_ret2str(ret));
3374 err = AVERROR_EXTERNAL;
3375 goto fail;
3376 }
3377
3378 f->queue_family[i] = VK_QUEUE_FAMILY_EXTERNAL;
3379 f->layout[i] = create_info.initialLayout;
3380 f->access[i] = 0x0;
3381 f->sem_value[i] = 0;
3382 }
3383
3384 for (int i = 0; i < desc->nb_layers; i++) {
3385 /* Memory requirements */
3386 VkImageMemoryRequirementsInfo2 req_desc = {
3387 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
3388 .image = f->img[i],
3389 };
3390 VkMemoryDedicatedRequirements ded_req = {
3391 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
3392 };
3393 VkMemoryRequirements2 req2 = {
3394 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
3395 .pNext = &ded_req,
3396 };
3397
3398 /* Allocation/importing */
3399 VkMemoryFdPropertiesKHR fdmp = {
3400 .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
3401 };
3402 /* This assumes that a layer will never be constructed from multiple
3403 * objects. If that was to happen in the real world, this code would
3404 * need to import each plane separately.
3405 */
3406 VkImportMemoryFdInfoKHR idesc = {
3407 .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
3408 .fd = dup(desc->objects[desc->layers[i].planes[0].object_index].fd),
3409 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
3410 };
3411 VkMemoryDedicatedAllocateInfo ded_alloc = {
3412 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
3413 .pNext = &idesc,
3414 .image = req_desc.image,
3415 };
3416
3417 /* Get object properties */
3418 ret = vk->GetMemoryFdPropertiesKHR(hwctx->act_dev,
3419 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
3420 idesc.fd, &fdmp);
3421 if (ret != VK_SUCCESS) {
3422 av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n",
3423 ff_vk_ret2str(ret));
3424 err = AVERROR_EXTERNAL;
3425 close(idesc.fd);
3426 goto fail;
3427 }
3428
3429 vk->GetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req2);
3430
3431 /* Only a single bit must be set, not a range, and it must match */
3432 req2.memoryRequirements.memoryTypeBits = fdmp.memoryTypeBits;
3433
3434 err = alloc_mem(ctx, &req2.memoryRequirements,
3435 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
3436 (ded_req.prefersDedicatedAllocation ||
3437 ded_req.requiresDedicatedAllocation) ?
3438 &ded_alloc : ded_alloc.pNext,
3439 &f->flags, &f->mem[i]);
3440 if (err) {
3441 close(idesc.fd);
3442 return err;
3443 }
3444
3445 f->size[i] = req2.memoryRequirements.size;
3446 }
3447
3448 for (int i = 0; i < desc->nb_layers; i++) {
3449 const int planes = desc->layers[i].nb_planes;
3450 for (int j = 0; j < planes; j++) {
3451 VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
3452 j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT :
3453 VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT;
3454
3455 plane_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
3456 plane_info[bind_counts].pNext = NULL;
3457 plane_info[bind_counts].planeAspect = aspect;
3458
3459 bind_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
3460 bind_info[bind_counts].pNext = planes > 1 ? &plane_info[bind_counts] : NULL;
3461 bind_info[bind_counts].image = f->img[i];
3462 bind_info[bind_counts].memory = f->mem[i];
3463
3464 /* Offset is already signalled via pPlaneLayouts above */
3465 bind_info[bind_counts].memoryOffset = 0;
3466
3467 bind_counts++;
3468 }
3469 }
3470
3471 /* Bind the allocated memory to the images */
3472 ret = vk->BindImageMemory2(hwctx->act_dev, bind_counts, bind_info);
3473 if (ret != VK_SUCCESS) {
3474 av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
3475 ff_vk_ret2str(ret));
3476 err = AVERROR_EXTERNAL;
3477 goto fail;
3478 }
3479
3480 *frame = f;
3481
3482 return 0;
3483
3484 fail:
3485 vulkan_frame_free(hwfc, f);
3486
3487 return err;
3488 }
3489
3490 static int vulkan_map_from_drm_frame_sync(AVHWFramesContext *hwfc, AVFrame *dst,
3491 const AVFrame *src, int flags)
3492 {
3493 int err;
3494 VkResult ret;
3495 AVHWDeviceContext *ctx = hwfc->device_ctx;
3496 VulkanDevicePriv *p = ctx->hwctx;
3497 VulkanFramesPriv *fp = hwfc->hwctx;
3498 AVVulkanDeviceContext *hwctx = &p->p;
3499 FFVulkanFunctions *vk = &p->vkctx.vkfn;
3500
3501 const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0];
3502
3503 #ifdef DMA_BUF_IOCTL_EXPORT_SYNC_FILE
3504 if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM) {
3505 VkCommandBuffer cmd_buf;
3506 FFVkExecContext *exec;
3507 VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
3508 VkSemaphore drm_sync_sem[AV_DRM_MAX_PLANES] = { 0 };
3509 int nb_img_bar = 0;
3510
3511 for (int i = 0; i < desc->nb_objects; i++) {
3512 VkSemaphoreTypeCreateInfo sem_type_info = {
3513 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
3514 .semaphoreType = VK_SEMAPHORE_TYPE_BINARY,
3515 };
3516 VkSemaphoreCreateInfo sem_spawn = {
3517 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
3518 .pNext = &sem_type_info,
3519 };
3520 VkImportSemaphoreFdInfoKHR import_info;
3521 struct dma_buf_export_sync_file implicit_fd_info = {
3522 .flags = DMA_BUF_SYNC_READ,
3523 .fd = -1,
3524 };
3525
3526 if (ioctl(desc->objects[i].fd, DMA_BUF_IOCTL_EXPORT_SYNC_FILE,
3527 &implicit_fd_info)) {
3528 err = AVERROR(errno);
3529 av_log(hwctx, AV_LOG_ERROR, "Failed to retrieve implicit DRM sync file: %s\n",
3530 av_err2str(err));
3531 for (; i >= 0; i--)
3532 vk->DestroySemaphore(hwctx->act_dev, drm_sync_sem[i], hwctx->alloc);
3533 return err;
3534 }
3535
3536 ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn,
3537 hwctx->alloc, &drm_sync_sem[i]);
3538 if (ret != VK_SUCCESS) {
3539 av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
3540 ff_vk_ret2str(ret));
3541 err = AVERROR_EXTERNAL;
3542 for (; i >= 0; i--)
3543 vk->DestroySemaphore(hwctx->act_dev, drm_sync_sem[i], hwctx->alloc);
3544 return err;
3545 }
3546
3547 import_info = (VkImportSemaphoreFdInfoKHR) {
3548 .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR,
3549 .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
3550 .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT,
3551 .semaphore = drm_sync_sem[i],
3552 .fd = implicit_fd_info.fd,
3553 };
3554
3555 ret = vk->ImportSemaphoreFdKHR(hwctx->act_dev, &import_info);
3556 if (ret != VK_SUCCESS) {
3557 av_log(hwctx, AV_LOG_ERROR, "Failed to import semaphore: %s\n",
3558 ff_vk_ret2str(ret));
3559 err = AVERROR_EXTERNAL;
3560 for (; i >= 0; i--)
3561 vk->DestroySemaphore(hwctx->act_dev, drm_sync_sem[i], hwctx->alloc);
3562 return err;
3563 }
3564 }
3565
3566 exec = ff_vk_exec_get(&p->vkctx, &fp->compute_exec);
3567 cmd_buf = exec->buf;
3568
3569 ff_vk_exec_start(&p->vkctx, exec);
3570
3571 /* Ownership of semaphores is passed */
3572 err = ff_vk_exec_add_dep_bool_sem(&p->vkctx, exec,
3573 drm_sync_sem, desc->nb_objects,
3574 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, 1);
3575 if (err < 0)
3576 return err;
3577
3578 err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, dst,
3579 VK_PIPELINE_STAGE_2_NONE,
3580 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT);
3581 if (err < 0)
3582 return err;
3583
3584 ff_vk_frame_barrier(&p->vkctx, exec, dst, img_bar, &nb_img_bar,
3585 VK_PIPELINE_STAGE_2_NONE,
3586 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
3587 ((flags & AV_HWFRAME_MAP_READ) ?
3588 VK_ACCESS_2_SHADER_SAMPLED_READ_BIT : 0x0) |
3589 ((flags & AV_HWFRAME_MAP_WRITE) ?
3590 VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT : 0x0),
3591 VK_IMAGE_LAYOUT_GENERAL,
3592 p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0]);
3593
3594 vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
3595 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
3596 .pImageMemoryBarriers = img_bar,
3597 .imageMemoryBarrierCount = nb_img_bar,
3598 });
3599
3600 err = ff_vk_exec_submit(&p->vkctx, exec);
3601 if (err < 0)
3602 return err;
3603 } else
3604 #endif
3605 {
3606 AVVkFrame *f = (AVVkFrame *)dst->data[0];
3607 av_log(hwctx, AV_LOG_WARNING, "No support for synchronization when importing DMA-BUFs, "
3608 "image may be corrupted.\n");
3609 err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_EXTERNAL_IMPORT);
3610 if (err)
3611 return err;
3612 }
3613
3614 return 0;
3615 }
3616
3617 static int vulkan_map_from_drm(AVHWFramesContext *hwfc, AVFrame *dst,
3618 const AVFrame *src, int flags)
3619 {
3620 int err = 0;
3621 AVVkFrame *f;
3622
3623 if ((err = vulkan_map_from_drm_frame_desc(hwfc, &f, src, flags)))
3624 return err;
3625
3626 /* The unmapping function will free this */
3627 dst->data[0] = (uint8_t *)f;
3628 dst->width = src->width;
3629 dst->height = src->height;
3630
3631 err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
3632 &vulkan_unmap_from_drm, f);
3633 if (err < 0)
3634 goto fail;
3635
3636 err = vulkan_map_from_drm_frame_sync(hwfc, dst, src, flags);
3637 if (err < 0)
3638 return err;
3639
3640 av_log(hwfc, AV_LOG_DEBUG, "Mapped DRM object to Vulkan!\n");
3641
3642 return 0;
3643
3644 fail:
3645 vulkan_frame_free(hwfc->device_ctx->hwctx, f);
3646 dst->data[0] = NULL;
3647 return err;
3648 }
3649
3650 #if CONFIG_VAAPI
3651 static int vulkan_map_from_vaapi(AVHWFramesContext *dst_fc,
3652 AVFrame *dst, const AVFrame *src,
3653 int flags)
3654 {
3655 int err;
3656 AVFrame *tmp = av_frame_alloc();
3657 AVHWFramesContext *vaapi_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
3658 AVVAAPIDeviceContext *vaapi_ctx = vaapi_fc->device_ctx->hwctx;
3659 VASurfaceID surface_id = (VASurfaceID)(uintptr_t)src->data[3];
3660
3661 if (!tmp)
3662 return AVERROR(ENOMEM);
3663
3664 /* We have to sync since like the previous comment said, no semaphores */
3665 vaSyncSurface(vaapi_ctx->display, surface_id);
3666
3667 tmp->format = AV_PIX_FMT_DRM_PRIME;
3668
3669 err = av_hwframe_map(tmp, src, flags);
3670 if (err < 0)
3671 goto fail;
3672
3673 err = vulkan_map_from_drm(dst_fc, dst, tmp, flags);
3674 if (err < 0)
3675 goto fail;
3676
3677 err = ff_hwframe_map_replace(dst, src);
3678
3679 fail:
3680 av_frame_free(&tmp);
3681 return err;
3682 }
3683 #endif
3684 #endif
3685
3686 #if CONFIG_CUDA
3687 static int export_mem_to_cuda(AVHWDeviceContext *ctx,
3688 AVHWDeviceContext *cuda_cu, CudaFunctions *cu,
3689 AVVkFrameInternal *dst_int, int idx,
3690 VkDeviceMemory mem, size_t size)
3691 {
3692 VkResult ret;
3693 VulkanDevicePriv *p = ctx->hwctx;
3694 AVVulkanDeviceContext *hwctx = &p->p;
3695 FFVulkanFunctions *vk = &p->vkctx.vkfn;
3696
3697 #ifdef _WIN32
3698 CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
3699 .type = IsWindows8OrGreater()
3700 ? CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32
3701 : CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT,
3702 .size = size,
3703 };
3704 VkMemoryGetWin32HandleInfoKHR export_info = {
3705 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
3706 .memory = mem,
3707 .handleType = IsWindows8OrGreater()
3708 ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
3709 : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
3710 };
3711
3712 ret = vk->GetMemoryWin32HandleKHR(hwctx->act_dev, &export_info,
3713 &ext_desc.handle.win32.handle);
3714 if (ret != VK_SUCCESS) {
3715 av_log(ctx, AV_LOG_ERROR, "Unable to export the image as a Win32 Handle: %s!\n",
3716 ff_vk_ret2str(ret));
3717 return AVERROR_EXTERNAL;
3718 }
3719 dst_int->ext_mem_handle[idx] = ext_desc.handle.win32.handle;
3720 #else
3721 CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
3722 .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD,
3723 .size = size,
3724 };
3725 VkMemoryGetFdInfoKHR export_info = {
3726 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
3727 .memory = mem,
3728 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
3729 };
3730
3731 ret = vk->GetMemoryFdKHR(hwctx->act_dev, &export_info,
3732 &ext_desc.handle.fd);
3733 if (ret != VK_SUCCESS) {
3734 av_log(ctx, AV_LOG_ERROR, "Unable to export the image as a FD: %s!\n",
3735 ff_vk_ret2str(ret));
3736 return AVERROR_EXTERNAL;
3737 }
3738 #endif
3739
3740 ret = CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[idx], &ext_desc));
3741 if (ret < 0) {
3742 #ifndef _WIN32
3743 close(ext_desc.handle.fd);
3744 #endif
3745 return AVERROR_EXTERNAL;
3746 }
3747
3748 return 0;
3749 }
3750
3751 static int export_sem_to_cuda(AVHWDeviceContext *ctx,
3752 AVHWDeviceContext *cuda_cu, CudaFunctions *cu,
3753 AVVkFrameInternal *dst_int, int idx,
3754 VkSemaphore sem)
3755 {
3756 VkResult ret;
3757 VulkanDevicePriv *p = ctx->hwctx;
3758 AVVulkanDeviceContext *hwctx = &p->p;
3759 FFVulkanFunctions *vk = &p->vkctx.vkfn;
3760
3761 #ifdef _WIN32
3762 VkSemaphoreGetWin32HandleInfoKHR sem_export = {
3763 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR,
3764 .semaphore = sem,
3765 .handleType = IsWindows8OrGreater()
3766 ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
3767 : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
3768 };
3769 CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
3770 .type = 10 /* TODO: CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 */,
3771 };
3772 #else
3773 VkSemaphoreGetFdInfoKHR sem_export = {
3774 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
3775 .semaphore = sem,
3776 .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
3777 };
3778 CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
3779 .type = 9 /* TODO: CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD */,
3780 };
3781 #endif
3782
3783 #ifdef _WIN32
3784 ret = vk->GetSemaphoreWin32HandleKHR(hwctx->act_dev, &sem_export,
3785 &ext_sem_desc.handle.win32.handle);
3786 #else
3787 ret = vk->GetSemaphoreFdKHR(hwctx->act_dev, &sem_export,
3788 &ext_sem_desc.handle.fd);
3789 #endif
3790 if (ret != VK_SUCCESS) {
3791 av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n",
3792 ff_vk_ret2str(ret));
3793 return AVERROR_EXTERNAL;
3794 }
3795 #ifdef _WIN32
3796 dst_int->ext_sem_handle[idx] = ext_sem_desc.handle.win32.handle;
3797 #endif
3798
3799 ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[idx],
3800 &ext_sem_desc));
3801 if (ret < 0) {
3802 #ifndef _WIN32
3803 close(ext_sem_desc.handle.fd);
3804 #endif
3805 return AVERROR_EXTERNAL;
3806 }
3807
3808 return 0;
3809 }
3810
3811 static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
3812 AVBufferRef *cuda_hwfc,
3813 const AVFrame *frame)
3814 {
3815 int err;
3816 VkResult ret;
3817 AVVkFrame *dst_f;
3818 AVVkFrameInternal *dst_int;
3819 AVHWDeviceContext *ctx = hwfc->device_ctx;
3820 const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
3821 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
3822 VulkanDevicePriv *p = ctx->hwctx;
3823 AVVulkanDeviceContext *hwctx = &p->p;
3824 FFVulkanFunctions *vk = &p->vkctx.vkfn;
3825 int nb_images;
3826
3827 AVHWFramesContext *cuda_fc = (AVHWFramesContext*)cuda_hwfc->data;
3828 AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
3829 AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
3830 AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
3831 CudaFunctions *cu = cu_internal->cuda_dl;
3832 CUarray_format cufmt = desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 :
3833 CU_AD_FORMAT_UNSIGNED_INT8;
3834
3835 dst_f = (AVVkFrame *)frame->data[0];
3836 dst_int = dst_f->internal;
3837
3838 if (!dst_int->cuda_fc_ref) {
3839 size_t offsets[3] = { 0 };
3840
3841 dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc);
3842 if (!dst_int->cuda_fc_ref)
3843 return AVERROR(ENOMEM);
3844
3845 nb_images = ff_vk_count_images(dst_f);
3846 for (int i = 0; i < nb_images; i++) {
3847 err = export_mem_to_cuda(ctx, cuda_cu, cu, dst_int, i,
3848 dst_f->mem[i], dst_f->size[i]);
3849 if (err < 0)
3850 goto fail;
3851
3852 err = export_sem_to_cuda(ctx, cuda_cu, cu, dst_int, i,
3853 dst_f->sem[i]);
3854 if (err < 0)
3855 goto fail;
3856 }
3857
3858 if (nb_images != planes) {
3859 for (int i = 0; i < planes; i++) {
3860 VkImageSubresource subres = {
3861 .aspectMask = i == 2 ? VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT :
3862 i == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT :
3863 VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT
3864 };
3865 VkSubresourceLayout layout = { 0 };
3866 vk->GetImageSubresourceLayout(hwctx->act_dev, dst_f->img[FFMIN(i, nb_images - 1)],
3867 &subres, &layout);
3868 offsets[i] = layout.offset;
3869 }
3870 }
3871
3872 for (int i = 0; i < planes; i++) {
3873 CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
3874 .offset = offsets[i],
3875 .arrayDesc = {
3876 .Depth = 0,
3877 .Format = cufmt,
3878 .NumChannels = 1 + ((planes == 2) && i),
3879 .Flags = 0,
3880 },
3881 .numLevels = 1,
3882 };
3883 int p_w, p_h;
3884
3885 get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i);
3886 tex_desc.arrayDesc.Width = p_w;
3887 tex_desc.arrayDesc.Height = p_h;
3888
3889 ret = CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[i],
3890 dst_int->ext_mem[FFMIN(i, nb_images - 1)],
3891 &tex_desc));
3892 if (ret < 0) {
3893 err = AVERROR_EXTERNAL;
3894 goto fail;
3895 }
3896
3897 ret = CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[i],
3898 dst_int->cu_mma[i], 0));
3899 if (ret < 0) {
3900 err = AVERROR_EXTERNAL;
3901 goto fail;
3902 }
3903
3904 }
3905 }
3906
3907 return 0;
3908
3909 fail:
3910 vulkan_free_internal(dst_f);
3911 return err;
3912 }
3913
3914 static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
3915 AVFrame *dst, const AVFrame *src)
3916 {
3917 int err;
3918 CUcontext dummy;
3919 AVVkFrame *dst_f;
3920 AVVkFrameInternal *dst_int;
3921 VulkanFramesPriv *fp = hwfc->hwctx;
3922 const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
3923 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
3924
3925 AVHWFramesContext *cuda_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
3926 AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
3927 AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
3928 AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
3929 CudaFunctions *cu = cu_internal->cuda_dl;
3930 CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
3931 CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
3932
3933 dst_f = (AVVkFrame *)dst->data[0];
3934
3935 err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_EXPORT);
3936 if (err < 0)
3937 return err;
3938
3939 err = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
3940 if (err < 0)
3941 return err;
3942
3943 err = vulkan_export_to_cuda(hwfc, src->hw_frames_ctx, dst);
3944 if (err < 0) {
3945 CHECK_CU(cu->cuCtxPopCurrent(&dummy));
3946 return err;
3947 }
3948
3949 dst_int = dst_f->internal;
3950
3951 for (int i = 0; i < planes; i++) {
3952 s_w_par[i].params.fence.value = dst_f->sem_value[i] + 0;
3953 s_s_par[i].params.fence.value = dst_f->sem_value[i] + 1;
3954 }
3955
3956 err = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
3957 planes, cuda_dev->stream));
3958 if (err < 0)
3959 goto fail;
3960
3961 for (int i = 0; i < planes; i++) {
3962 CUDA_MEMCPY2D cpy = {
3963 .srcMemoryType = CU_MEMORYTYPE_DEVICE,
3964 .srcDevice = (CUdeviceptr)src->data[i],
3965 .srcPitch = src->linesize[i],
3966 .srcY = 0,
3967
3968 .dstMemoryType = CU_MEMORYTYPE_ARRAY,
3969 .dstArray = dst_int->cu_array[i],
3970 };
3971
3972 int p_w, p_h;
3973 get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i);
3974
3975 cpy.WidthInBytes = p_w * desc->comp[i].step;
3976 cpy.Height = p_h;
3977
3978 err = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
3979 if (err < 0)
3980 goto fail;
3981 }
3982
3983 err = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
3984 planes, cuda_dev->stream));
3985 if (err < 0)
3986 goto fail;
3987
3988 for (int i = 0; i < planes; i++)
3989 dst_f->sem_value[i]++;
3990
3991 CHECK_CU(cu->cuCtxPopCurrent(&dummy));
3992
3993 av_log(hwfc, AV_LOG_VERBOSE, "Transferred CUDA image to Vulkan!\n");
3994
3995 return err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_IMPORT);
3996
3997 fail:
3998 CHECK_CU(cu->cuCtxPopCurrent(&dummy));
3999 vulkan_free_internal(dst_f);
4000 av_buffer_unref(&dst->buf[0]);
4001 return err;
4002 }
4003 #endif
4004
4005 static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
4006 const AVFrame *src, int flags)
4007 {
4008 av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
4009
4010 switch (src->format) {
4011 #if CONFIG_LIBDRM
4012 #if CONFIG_VAAPI
4013 case AV_PIX_FMT_VAAPI:
4014 if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
4015 return vulkan_map_from_vaapi(hwfc, dst, src, flags);
4016 else
4017 return AVERROR(ENOSYS);
4018 #endif
4019 case AV_PIX_FMT_DRM_PRIME:
4020 if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
4021 return vulkan_map_from_drm(hwfc, dst, src, flags);
4022 else
4023 return AVERROR(ENOSYS);
4024 #endif
4025 default:
4026 return AVERROR(ENOSYS);
4027 }
4028 }
4029
4030 #if CONFIG_LIBDRM
4031 typedef struct VulkanDRMMapping {
4032 AVDRMFrameDescriptor drm_desc;
4033 AVVkFrame *source;
4034 } VulkanDRMMapping;
4035
4036 static void vulkan_unmap_to_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
4037 {
4038 AVDRMFrameDescriptor *drm_desc = hwmap->priv;
4039
4040 for (int i = 0; i < drm_desc->nb_objects; i++)
4041 close(drm_desc->objects[i].fd);
4042
4043 av_free(drm_desc);
4044 }
4045
4046 static inline uint32_t vulkan_fmt_to_drm(VkFormat vkfmt)
4047 {
4048 for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
4049 if (vulkan_drm_format_map[i].vk_format == vkfmt)
4050 return vulkan_drm_format_map[i].drm_fourcc;
4051 return DRM_FORMAT_INVALID;
4052 }
4053
4054 #define MAX_MEMORY_PLANES 4
4055 static VkImageAspectFlags plane_index_to_aspect(int plane) {
4056 if (plane == 0) return VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT;
4057 if (plane == 1) return VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT;
4058 if (plane == 2) return VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT;
4059 if (plane == 3) return VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT;
4060
4061 av_assert2 (0 && "Invalid plane index");
4062 return VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT;
4063 }
4064
4065 static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
4066 const AVFrame *src, int flags)
4067 {
4068 int err = 0;
4069 VkResult ret;
4070 AVVkFrame *f = (AVVkFrame *)src->data[0];
4071 VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
4072 AVVulkanDeviceContext *hwctx = &p->p;
4073 FFVulkanFunctions *vk = &p->vkctx.vkfn;
4074 VulkanFramesPriv *fp = hwfc->hwctx;
4075 const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
4076 VkImageDrmFormatModifierPropertiesEXT drm_mod = {
4077 .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
4078 };
4079 VkSemaphoreWaitInfo wait_info = {
4080 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
4081 .flags = 0x0,
4082 .semaphoreCount = planes,
4083 };
4084
4085 AVDRMFrameDescriptor *drm_desc = av_mallocz(sizeof(*drm_desc));
4086 if (!drm_desc)
4087 return AVERROR(ENOMEM);
4088
4089 err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_EXTERNAL_EXPORT);
4090 if (err < 0)
4091 goto end;
4092
4093 /* Wait for the operation to finish so we can cleanly export it. */
4094 wait_info.pSemaphores = f->sem;
4095 wait_info.pValues = f->sem_value;
4096
4097 vk->WaitSemaphores(hwctx->act_dev, &wait_info, UINT64_MAX);
4098
4099 err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &vulkan_unmap_to_drm, drm_desc);
4100 if (err < 0)
4101 goto end;
4102
4103 ret = vk->GetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0],
4104 &drm_mod);
4105 if (ret != VK_SUCCESS) {
4106 av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n");
4107 err = AVERROR_EXTERNAL;
4108 goto end;
4109 }
4110
4111 for (int i = 0; (i < planes) && (f->mem[i]); i++) {
4112 VkMemoryGetFdInfoKHR export_info = {
4113 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
4114 .memory = f->mem[i],
4115 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
4116 };
4117
4118 ret = vk->GetMemoryFdKHR(hwctx->act_dev, &export_info,
4119 &drm_desc->objects[i].fd);
4120 if (ret != VK_SUCCESS) {
4121 av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
4122 err = AVERROR_EXTERNAL;
4123 goto end;
4124 }
4125
4126 drm_desc->nb_objects++;
4127 drm_desc->objects[i].size = f->size[i];
4128 drm_desc->objects[i].format_modifier = drm_mod.drmFormatModifier;
4129 }
4130
4131 drm_desc->nb_layers = planes;
4132 for (int i = 0; i < drm_desc->nb_layers; i++) {
4133 VkFormat plane_vkfmt = av_vkfmt_from_pixfmt(hwfc->sw_format)[i];
4134
4135 drm_desc->layers[i].format = vulkan_fmt_to_drm(plane_vkfmt);
4136 drm_desc->layers[i].nb_planes = fp->drm_format_modifier_properties[i].drmFormatModifierPlaneCount;
4137
4138 if (drm_desc->layers[i].nb_planes > MAX_MEMORY_PLANES) {
4139 av_log(hwfc, AV_LOG_ERROR, "Too many memory planes for DRM format!\n");
4140 err = AVERROR_EXTERNAL;
4141 goto end;
4142 }
4143
4144 for (int j = 0; j < drm_desc->layers[i].nb_planes; j++) {
4145 VkSubresourceLayout layout;
4146 VkImageSubresource sub = {
4147 .aspectMask = plane_index_to_aspect(j),
4148 };
4149
4150 drm_desc->layers[i].planes[j].object_index = FFMIN(i, drm_desc->nb_objects - 1);
4151
4152 vk->GetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
4153 drm_desc->layers[i].planes[j].offset = layout.offset;
4154 drm_desc->layers[i].planes[j].pitch = layout.rowPitch;
4155 }
4156
4157 if (drm_desc->layers[i].format == DRM_FORMAT_INVALID) {
4158 av_log(hwfc, AV_LOG_ERROR, "Cannot map to DRM layer, unsupported!\n");
4159 err = AVERROR_PATCHWELCOME;
4160 goto end;
4161 }
4162
4163
4164 if (f->tiling == VK_IMAGE_TILING_OPTIMAL)
4165 continue;
4166
4167 }
4168
4169 dst->width = src->width;
4170 dst->height = src->height;
4171 dst->data[0] = (uint8_t *)drm_desc;
4172
4173 av_log(hwfc, AV_LOG_VERBOSE, "Mapped AVVkFrame to a DRM object!\n");
4174
4175 return 0;
4176
4177 end:
4178 av_free(drm_desc);
4179 return err;
4180 }
4181
4182 #if CONFIG_VAAPI
4183 static int vulkan_map_to_vaapi(AVHWFramesContext *hwfc, AVFrame *dst,
4184 const AVFrame *src, int flags)
4185 {
4186 int err;
4187 AVFrame *tmp = av_frame_alloc();
4188 if (!tmp)
4189 return AVERROR(ENOMEM);
4190
4191 tmp->format = AV_PIX_FMT_DRM_PRIME;
4192
4193 err = vulkan_map_to_drm(hwfc, tmp, src, flags);
4194 if (err < 0)
4195 goto fail;
4196
4197 err = av_hwframe_map(dst, tmp, flags);
4198 if (err < 0)
4199 goto fail;
4200
4201 err = ff_hwframe_map_replace(dst, src);
4202
4203 fail:
4204 av_frame_free(&tmp);
4205 return err;
4206 }
4207 #endif
4208 #endif
4209
4210 static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
4211 const AVFrame *src, int flags)
4212 {
4213 av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
4214
4215 switch (dst->format) {
4216 #if CONFIG_LIBDRM
4217 case AV_PIX_FMT_DRM_PRIME:
4218 if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
4219 return vulkan_map_to_drm(hwfc, dst, src, flags);
4220 else
4221 return AVERROR(ENOSYS);
4222 #if CONFIG_VAAPI
4223 case AV_PIX_FMT_VAAPI:
4224 if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
4225 return vulkan_map_to_vaapi(hwfc, dst, src, flags);
4226 else
4227 return AVERROR(ENOSYS);
4228 #endif
4229 #endif
4230 default:
4231 break;
4232 }
4233 return AVERROR(ENOSYS);
4234 }
4235
4236 static int copy_buffer_data(AVHWFramesContext *hwfc, AVBufferRef *buf,
4237 AVFrame *swf, VkBufferImageCopy *region,
4238 int planes, int upload)
4239 {
4240 int err;
4241 VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
4242 FFVkBuffer *vkbuf = (FFVkBuffer *)buf->data;
4243
4244 if (upload) {
4245 for (int i = 0; i < planes; i++)
4246 av_image_copy_plane(vkbuf->mapped_mem + region[i].bufferOffset,
4247 region[i].bufferRowLength,
4248 swf->data[i],
4249 swf->linesize[i],
4250 swf->linesize[i],
4251 region[i].imageExtent.height);
4252
4253 err = ff_vk_flush_buffer(&p->vkctx, vkbuf, 0, VK_WHOLE_SIZE, 1);
4254 if (err != VK_SUCCESS) {
4255 av_log(hwfc, AV_LOG_ERROR, "Failed to flush buffer data: %s\n",
4256 av_err2str(err));
4257 return AVERROR_EXTERNAL;
4258 }
4259 } else {
4260 err = ff_vk_flush_buffer(&p->vkctx, vkbuf, 0, VK_WHOLE_SIZE, 0);
4261 if (err != VK_SUCCESS) {
4262 av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate buffer data: %s\n",
4263 av_err2str(err));
4264 return AVERROR_EXTERNAL;
4265 }
4266
4267 for (int i = 0; i < planes; i++)
4268 av_image_copy_plane(swf->data[i],
4269 swf->linesize[i],
4270 vkbuf->mapped_mem + region[i].bufferOffset,
4271 region[i].bufferRowLength,
4272 swf->linesize[i],
4273 region[i].imageExtent.height);
4274 }
4275
4276 return 0;
4277 }
4278
4279 static int get_plane_buf(AVHWFramesContext *hwfc, AVBufferRef **dst,
4280 AVFrame *swf, VkBufferImageCopy *region, int upload)
4281 {
4282 int err;
4283 uint32_t p_w, p_h;
4284 VulkanFramesPriv *fp = hwfc->hwctx;
4285 VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
4286 const int planes = av_pix_fmt_count_planes(swf->format);
4287 VkBufferUsageFlags buf_usage = upload ? VK_BUFFER_USAGE_TRANSFER_SRC_BIT :
4288 VK_BUFFER_USAGE_TRANSFER_DST_BIT;
4289
4290 size_t buf_offset = 0;
4291 for (int i = 0; i < planes; i++) {
4292 get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
4293
4294 region[i] = (VkBufferImageCopy) {
4295 .bufferOffset = buf_offset,
4296 .bufferRowLength = FFALIGN(swf->linesize[i],
4297 p->props.properties.limits.optimalBufferCopyRowPitchAlignment),
4298 .bufferImageHeight = p_h,
4299 .imageSubresource.layerCount = 1,
4300 .imageExtent = (VkExtent3D){ p_w, p_h, 1 },
4301 /* Rest of the fields adjusted/filled in later */
4302 };
4303
4304 buf_offset += FFALIGN(p_h*region[i].bufferRowLength,
4305 p->props.properties.limits.optimalBufferCopyOffsetAlignment);
4306 }
4307
4308 err = ff_vk_get_pooled_buffer(&p->vkctx, &fp->tmp, dst, buf_usage,
4309 NULL, buf_offset,
4310 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
4311 VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
4312 if (err < 0)
4313 return err;
4314
4315 return 0;
4316 }
4317
4318 static int host_map_frame(AVHWFramesContext *hwfc, AVBufferRef **dst, int *nb_bufs,
4319 AVFrame *swf, VkBufferImageCopy *region, int upload)
4320 {
4321 int err;
4322 VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
4323
4324 int nb_src_bufs;
4325 const int planes = av_pix_fmt_count_planes(swf->format);
4326 VkBufferUsageFlags buf_usage = upload ? VK_BUFFER_USAGE_TRANSFER_SRC_BIT :
4327 VK_BUFFER_USAGE_TRANSFER_DST_BIT;
4328
4329 /* We can't host map images with negative strides */
4330 for (int i = 0; i < planes; i++)
4331 if (swf->linesize[i] < 0)
4332 return AVERROR(EINVAL);
4333
4334 /* Count the number of buffers in the software frame */
4335 nb_src_bufs = 0;
4336 while (swf->buf[nb_src_bufs])
4337 nb_src_bufs++;
4338
4339 /* Single buffer contains all planes */
4340 if (nb_src_bufs == 1) {
4341 err = ff_vk_host_map_buffer(&p->vkctx, &dst[0],
4342 swf->data[0], swf->buf[0],
4343 buf_usage);
4344 if (err < 0)
4345 return err;
4346 (*nb_bufs)++;
4347
4348 for (int i = 0; i < planes; i++)
4349 region[i].bufferOffset = ((FFVkBuffer *)dst[0]->data)->virtual_offset +
4350 swf->data[i] - swf->data[0];
4351 } else if (nb_src_bufs == planes) { /* One buffer per plane */
4352 for (int i = 0; i < planes; i++) {
4353 err = ff_vk_host_map_buffer(&p->vkctx, &dst[i],
4354 swf->data[i], swf->buf[i],
4355 buf_usage);
4356 if (err < 0)
4357 goto fail;
4358 (*nb_bufs)++;
4359
4360 region[i].bufferOffset = ((FFVkBuffer *)dst[i]->data)->virtual_offset;
4361 }
4362 } else {
4363 /* Weird layout (3 planes, 2 buffers), patch welcome, fallback to copy */
4364 return AVERROR_PATCHWELCOME;
4365 }
4366
4367 return 0;
4368
4369 fail:
4370 for (int i = 0; i < (*nb_bufs); i++)
4371 av_buffer_unref(&dst[i]);
4372 return err;
4373 }
4374
4375 static int vulkan_transfer_host(AVHWFramesContext *hwfc, AVFrame *hwf,
4376 AVFrame *swf, int upload)
4377 {
4378 VulkanFramesPriv *fp = hwfc->hwctx;
4379 AVVulkanFramesContext *hwfc_vk = &fp->p;
4380 VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
4381 AVVulkanDeviceContext *hwctx = &p->p;
4382 FFVulkanFunctions *vk = &p->vkctx.vkfn;
4383
4384 AVVkFrame *hwf_vk = (AVVkFrame *)hwf->data[0];
4385 const int planes = av_pix_fmt_count_planes(swf->format);
4386 const int nb_images = ff_vk_count_images(hwf_vk);
4387
4388 VkSemaphoreWaitInfo sem_wait;
4389 VkHostImageLayoutTransitionInfoEXT layout_ch_info[AV_NUM_DATA_POINTERS];
4390 int nb_layout_ch = 0;
4391
4392 hwfc_vk->lock_frame(hwfc, hwf_vk);
4393
4394 for (int i = 0; i < nb_images; i++) {
4395 int compat = 0;
4396 for (int j = 0; j < p->vkctx.host_image_props.copySrcLayoutCount; j++) {
4397 if (hwf_vk->layout[i] == p->vkctx.host_image_props.pCopySrcLayouts[j]) {
4398 compat = 1;
4399 break;
4400 }
4401 }
4402 if (compat)
4403 continue;
4404
4405 layout_ch_info[nb_layout_ch] = (VkHostImageLayoutTransitionInfoEXT) {
4406 .sType = VK_STRUCTURE_TYPE_HOST_IMAGE_LAYOUT_TRANSITION_INFO_EXT,
4407 .image = hwf_vk->img[i],
4408 .oldLayout = hwf_vk->layout[i],
4409 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
4410 .subresourceRange = {
4411 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
4412 .levelCount = 1,
4413 .layerCount = 1,
4414 },
4415 };
4416
4417 hwf_vk->layout[i] = layout_ch_info[nb_layout_ch].newLayout;
4418 nb_layout_ch++;
4419 }
4420
4421 sem_wait = (VkSemaphoreWaitInfo) {
4422 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
4423 .pSemaphores = hwf_vk->sem,
4424 .pValues = hwf_vk->sem_value,
4425 .semaphoreCount = nb_images,
4426 };
4427
4428 vk->WaitSemaphores(hwctx->act_dev, &sem_wait, UINT64_MAX);
4429
4430 if (nb_layout_ch)
4431 vk->TransitionImageLayoutEXT(hwctx->act_dev,
4432 nb_layout_ch, layout_ch_info);
4433
4434 if (upload) {
4435 VkMemoryToImageCopyEXT region_info = {
4436 .sType = VK_STRUCTURE_TYPE_MEMORY_TO_IMAGE_COPY_EXT,
4437 .imageSubresource = {
4438 .layerCount = 1,
4439 },
4440 };
4441 VkCopyMemoryToImageInfoEXT copy_info = {
4442 .sType = VK_STRUCTURE_TYPE_COPY_MEMORY_TO_IMAGE_INFO_EXT,
4443 .flags = VK_HOST_IMAGE_COPY_MEMCPY_EXT,
4444 .regionCount = 1,
4445 .pRegions = &region_info,
4446 };
4447 for (int i = 0; i < planes; i++) {
4448 int img_idx = FFMIN(i, (nb_images - 1));
4449 uint32_t p_w, p_h;
4450 get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
4451
4452 region_info.pHostPointer = swf->data[i];
4453 region_info.imageSubresource.aspectMask = ff_vk_aspect_flag(hwf, i);
4454 region_info.imageExtent = (VkExtent3D){ p_w, p_h, 1 };
4455 copy_info.dstImage = hwf_vk->img[img_idx];
4456 copy_info.dstImageLayout = hwf_vk->layout[img_idx];
4457
4458 vk->CopyMemoryToImageEXT(hwctx->act_dev, &copy_info);
4459 }
4460 } else {
4461 VkImageToMemoryCopyEXT region_info = {
4462 .sType = VK_STRUCTURE_TYPE_MEMORY_TO_IMAGE_COPY_EXT,
4463 .imageSubresource = {
4464 .layerCount = 1,
4465 },
4466 };
4467 VkCopyImageToMemoryInfoEXT copy_info = {
4468 .sType = VK_STRUCTURE_TYPE_COPY_IMAGE_TO_MEMORY_INFO_EXT,
4469 .flags = VK_HOST_IMAGE_COPY_MEMCPY_EXT,
4470 .regionCount = 1,
4471 .pRegions = &region_info,
4472 };
4473 for (int i = 0; i < planes; i++) {
4474 int img_idx = FFMIN(i, (nb_images - 1));
4475 uint32_t p_w, p_h;
4476 get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
4477
4478 region_info.pHostPointer = swf->data[i];
4479 region_info.imageSubresource.aspectMask = ff_vk_aspect_flag(hwf, i);
4480 region_info.imageExtent = (VkExtent3D){ p_w, p_h, 1 };
4481 copy_info.srcImage = hwf_vk->img[img_idx];
4482 copy_info.srcImageLayout = hwf_vk->layout[img_idx];
4483
4484 vk->CopyImageToMemoryEXT(hwctx->act_dev, &copy_info);
4485 }
4486 }
4487
4488 hwfc_vk->unlock_frame(hwfc, hwf_vk);
4489
4490 return 0;
4491 }
4492
4493 static int vulkan_transfer_frame(AVHWFramesContext *hwfc,
4494 AVFrame *swf, AVFrame *hwf,
4495 int upload)
4496 {
4497 int err;
4498 VulkanFramesPriv *fp = hwfc->hwctx;
4499 AVVulkanFramesContext *hwctx = &fp->p;
4500 VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
4501 FFVulkanFunctions *vk = &p->vkctx.vkfn;
4502
4503 int host_mapped = 0;
4504
4505 AVVkFrame *hwf_vk = (AVVkFrame *)hwf->data[0];
4506 VkBufferImageCopy region[AV_NUM_DATA_POINTERS]; // always one per plane
4507
4508 const int planes = av_pix_fmt_count_planes(swf->format);
4509 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(swf->format);
4510 const int nb_images = ff_vk_count_images(hwf_vk);
4511
4512 VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
4513 int nb_img_bar = 0;
4514
4515 AVBufferRef *bufs[AV_NUM_DATA_POINTERS];
4516 int nb_bufs = 0;
4517
4518 VkCommandBuffer cmd_buf;
4519 FFVkExecContext *exec;
4520
4521 /* Sanity checking */
4522 if ((swf->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(swf->format))) {
4523 av_log(hwfc, AV_LOG_ERROR, "Unsupported software frame pixel format!\n");
4524 return AVERROR(EINVAL);
4525 }
4526
4527 if (swf->width > hwfc->width || swf->height > hwfc->height)
4528 return AVERROR(EINVAL);
4529
4530 if (hwctx->usage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT &&
4531 !(p->dprops.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY))
4532 return vulkan_transfer_host(hwfc, hwf, swf, upload);
4533
4534 for (int i = 0; i < av_pix_fmt_count_planes(swf->format); i++) {
4535 uint32_t p_w, p_h;
4536 get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
4537
4538 /* Buffer region for this plane */
4539 region[i] = (VkBufferImageCopy) {
4540 .bufferOffset = 0,
4541 .bufferRowLength = swf->linesize[i],
4542 .bufferImageHeight = p_h,
4543 .imageSubresource.layerCount = 1,
4544 .imageExtent = (VkExtent3D){ p_w, p_h, 1 },
4545 /* Rest of the fields adjusted/filled in later */
4546 };
4547 }
4548
4549 /* Setup buffers first */
4550 if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY && !p->avoid_host_import) {
4551 err = host_map_frame(hwfc, bufs, &nb_bufs, swf, region, upload);
4552 if (err >= 0)
4553 host_mapped = 1;
4554 }
4555
4556 if (!host_mapped) {
4557 err = get_plane_buf(hwfc, &bufs[0], swf, region, upload);
4558 if (err < 0)
4559 goto end;
4560 nb_bufs = 1;
4561
4562 if (upload) {
4563 err = copy_buffer_data(hwfc, bufs[0], swf, region, planes, 1);
4564 if (err < 0)
4565 goto end;
4566 }
4567 }
4568
4569 exec = ff_vk_exec_get(&p->vkctx, &fp->upload_exec);
4570 cmd_buf = exec->buf;
4571
4572 ff_vk_exec_start(&p->vkctx, exec);
4573
4574 /* Prep destination Vulkan frame */
4575 err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, hwf,
4576 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
4577 VK_PIPELINE_STAGE_2_TRANSFER_BIT);
4578 if (err < 0)
4579 goto end;
4580
4581 /* No need to declare buf deps for synchronous transfers (downloads) */
4582 if (upload) {
4583 /* Add the software frame backing the buffers if we're host mapping */
4584 if (host_mapped) {
4585 err = ff_vk_exec_add_dep_sw_frame(&p->vkctx, exec, swf);
4586 if (err < 0) {
4587 ff_vk_exec_discard_deps(&p->vkctx, exec);
4588 goto end;
4589 }
4590 }
4591
4592 /* Add the buffers as a dependency */
4593 err = ff_vk_exec_add_dep_buf(&p->vkctx, exec, bufs, nb_bufs, 1);
4594 if (err < 0) {
4595 ff_vk_exec_discard_deps(&p->vkctx, exec);
4596 goto end;
4597 }
4598 }
4599
4600 ff_vk_frame_barrier(&p->vkctx, exec, hwf, img_bar, &nb_img_bar,
4601 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
4602 VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR,
4603 upload ? VK_ACCESS_TRANSFER_WRITE_BIT :
4604 VK_ACCESS_TRANSFER_READ_BIT,
4605 upload ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL :
4606 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
4607 p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0]);
4608
4609 vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
4610 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
4611 .pImageMemoryBarriers = img_bar,
4612 .imageMemoryBarrierCount = nb_img_bar,
4613 });
4614
4615 for (int i = 0; i < planes; i++) {
4616 int buf_idx = FFMIN(i, (nb_bufs - 1));
4617 int img_idx = FFMIN(i, (nb_images - 1));
4618 FFVkBuffer *vkbuf = (FFVkBuffer *)bufs[buf_idx]->data;
4619
4620 uint32_t orig_stride = region[i].bufferRowLength;
4621 region[i].bufferRowLength /= desc->comp[i].step;
4622 region[i].imageSubresource.aspectMask = ff_vk_aspect_flag(hwf, i);
4623
4624 if (upload)
4625 vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf,
4626 hwf_vk->img[img_idx],
4627 img_bar[img_idx].newLayout,
4628 1, &region[i]);
4629 else
4630 vk->CmdCopyImageToBuffer(cmd_buf, hwf_vk->img[img_idx],
4631 img_bar[img_idx].newLayout,
4632 vkbuf->buf,
4633 1, &region[i]);
4634
4635 region[i].bufferRowLength = orig_stride;
4636 }
4637
4638 err = ff_vk_exec_submit(&p->vkctx, exec);
4639 if (err < 0) {
4640 ff_vk_exec_discard_deps(&p->vkctx, exec);
4641 } else if (!upload) {
4642 ff_vk_exec_wait(&p->vkctx, exec);
4643 if (!host_mapped)
4644 err = copy_buffer_data(hwfc, bufs[0], swf, region, planes, 0);
4645 }
4646
4647 end:
4648 for (int i = 0; i < nb_bufs; i++)
4649 av_buffer_unref(&bufs[i]);
4650
4651 return err;
4652 }
4653
4654 static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
4655 const AVFrame *src)
4656 {
4657 av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
4658
4659 switch (src->format) {
4660 #if CONFIG_CUDA
4661 case AV_PIX_FMT_CUDA:
4662 #ifdef _WIN32
4663 if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
4664 (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM))
4665 #else
4666 if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) &&
4667 (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM))
4668 #endif
4669 return vulkan_transfer_data_from_cuda(hwfc, dst, src);
4670 #endif
4671 default:
4672 if (src->hw_frames_ctx)
4673 return AVERROR(ENOSYS);
4674 else
4675 return vulkan_transfer_frame(hwfc, (AVFrame *)src, dst, 1);
4676 }
4677 }
4678
4679 #if CONFIG_CUDA
4680 static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
4681 const AVFrame *src)
4682 {
4683 int err;
4684 CUcontext dummy;
4685 AVVkFrame *dst_f;
4686 AVVkFrameInternal *dst_int;
4687 VulkanFramesPriv *fp = hwfc->hwctx;
4688 const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
4689 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
4690 int nb_images;
4691
4692 AVHWFramesContext *cuda_fc = (AVHWFramesContext*)dst->hw_frames_ctx->data;
4693 AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
4694 AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
4695 AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
4696 CudaFunctions *cu = cu_internal->cuda_dl;
4697 CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
4698 CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
4699
4700 dst_f = (AVVkFrame *)src->data[0];
4701 nb_images = ff_vk_count_images(dst_f);
4702
4703 err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_EXPORT);
4704 if (err < 0)
4705 return err;
4706
4707 err = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
4708 if (err < 0)
4709 return err;
4710
4711 err = vulkan_export_to_cuda(hwfc, dst->hw_frames_ctx, src);
4712 if (err < 0) {
4713 CHECK_CU(cu->cuCtxPopCurrent(&dummy));
4714 return err;
4715 }
4716
4717 dst_int = dst_f->internal;
4718
4719 for (int i = 0; i < planes; i++) {
4720 s_w_par[i].params.fence.value = dst_f->sem_value[i] + 0;
4721 s_s_par[i].params.fence.value = dst_f->sem_value[i] + 1;
4722 }
4723
4724 err = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
4725 nb_images, cuda_dev->stream));
4726 if (err < 0)
4727 goto fail;
4728
4729 for (int i = 0; i < planes; i++) {
4730 CUDA_MEMCPY2D cpy = {
4731 .dstMemoryType = CU_MEMORYTYPE_DEVICE,
4732 .dstDevice = (CUdeviceptr)dst->data[i],
4733 .dstPitch = dst->linesize[i],
4734 .dstY = 0,
4735
4736 .srcMemoryType = CU_MEMORYTYPE_ARRAY,
4737 .srcArray = dst_int->cu_array[i],
4738 };
4739
4740 int w, h;
4741 get_plane_wh(&w, &h, hwfc->sw_format, hwfc->width, hwfc->height, i);
4742
4743 cpy.WidthInBytes = w * desc->comp[i].step;
4744 cpy.Height = h;
4745
4746 err = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
4747 if (err < 0)
4748 goto fail;
4749 }
4750
4751 err = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
4752 nb_images, cuda_dev->stream));
4753 if (err < 0)
4754 goto fail;
4755
4756 for (int i = 0; i < planes; i++)
4757 dst_f->sem_value[i]++;
4758
4759 CHECK_CU(cu->cuCtxPopCurrent(&dummy));
4760
4761 av_log(hwfc, AV_LOG_VERBOSE, "Transferred Vulkan image to CUDA!\n");
4762
4763 return prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_IMPORT);
4764
4765 fail:
4766 CHECK_CU(cu->cuCtxPopCurrent(&dummy));
4767 vulkan_free_internal(dst_f);
4768 av_buffer_unref(&dst->buf[0]);
4769 return err;
4770 }
4771 #endif
4772
4773 static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst,
4774 const AVFrame *src)
4775 {
4776 av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
4777
4778 switch (dst->format) {
4779 #if CONFIG_CUDA
4780 case AV_PIX_FMT_CUDA:
4781 #ifdef _WIN32
4782 if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
4783 (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM))
4784 #else
4785 if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) &&
4786 (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM))
4787 #endif
4788 return vulkan_transfer_data_to_cuda(hwfc, dst, src);
4789 #endif
4790 default:
4791 if (dst->hw_frames_ctx)
4792 return AVERROR(ENOSYS);
4793 else
4794 return vulkan_transfer_frame(hwfc, dst, (AVFrame *)src, 0);
4795 }
4796 }
4797
4798 static int vulkan_frames_derive_to(AVHWFramesContext *dst_fc,
4799 AVHWFramesContext *src_fc, int flags)
4800 {
4801 return vulkan_frames_init(dst_fc);
4802 }
4803
4804 AVVkFrame *av_vk_frame_alloc(void)
4805 {
4806 int err;
4807 AVVkFrame *f = av_mallocz(sizeof(AVVkFrame));
4808 if (!f)
4809 return NULL;
4810
4811 f->internal = av_mallocz(sizeof(*f->internal));
4812 if (!f->internal) {
4813 av_free(f);
4814 return NULL;
4815 }
4816
4817 err = pthread_mutex_init(&f->internal->update_mutex, NULL);
4818 if (err != 0) {
4819 av_free(f->internal);
4820 av_free(f);
4821 return NULL;
4822 }
4823
4824 return f;
4825 }
4826
4827 const HWContextType ff_hwcontext_type_vulkan = {
4828 .type = AV_HWDEVICE_TYPE_VULKAN,
4829 .name = "Vulkan",
4830
4831 .device_hwctx_size = sizeof(VulkanDevicePriv),
4832 .frames_hwctx_size = sizeof(VulkanFramesPriv),
4833
4834 .device_init = &vulkan_device_init,
4835 .device_uninit = &vulkan_device_uninit,
4836 .device_create = &vulkan_device_create,
4837 .device_derive = &vulkan_device_derive,
4838
4839 .frames_get_constraints = &vulkan_frames_get_constraints,
4840 .frames_init = vulkan_frames_init,
4841 .frames_get_buffer = vulkan_get_buffer,
4842 .frames_uninit = vulkan_frames_uninit,
4843
4844 .transfer_get_formats = vulkan_transfer_get_formats,
4845 .transfer_data_to = vulkan_transfer_data_to,
4846 .transfer_data_from = vulkan_transfer_data_from,
4847
4848 .map_to = vulkan_map_to,
4849 .map_from = vulkan_map_from,
4850 .frames_derive_to = &vulkan_frames_derive_to,
4851
4852 .pix_fmts = (const enum AVPixelFormat []) {
4853 AV_PIX_FMT_VULKAN,
4854 AV_PIX_FMT_NONE
4855 },
4856 };