tools/sofa2wavs: fix build on Windows
[ffmpeg.git] / libavformat / iamf_writer.c
1 /*
2 * Immersive Audio Model and Formats muxing helpers and structs
3 * Copyright (c) 2023 James Almer <jamrial@gmail.com>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 #include "libavutil/bprint.h"
23 #include "libavutil/channel_layout.h"
24 #include "libavutil/intreadwrite.h"
25 #include "libavutil/iamf.h"
26 #include "libavutil/mem.h"
27 #include "libavcodec/get_bits.h"
28 #include "libavcodec/put_bits.h"
29 #include "avformat.h"
30 #include "avio_internal.h"
31 #include "iamf.h"
32 #include "iamf_writer.h"
33
34
35 static int update_extradata(IAMFCodecConfig *codec_config)
36 {
37 GetBitContext gb;
38 PutBitContext pb;
39 int ret;
40
41 switch(codec_config->codec_id) {
42 case AV_CODEC_ID_OPUS:
43 if (codec_config->extradata_size != 19)
44 return AVERROR_INVALIDDATA;
45 codec_config->extradata_size -= 8;
46 AV_WB8(codec_config->extradata + 0, AV_RL8(codec_config->extradata + 8)); // version
47 AV_WB8(codec_config->extradata + 1, 2); // set channels to stereo
48 AV_WB16A(codec_config->extradata + 2, AV_RL16A(codec_config->extradata + 10)); // Byte swap pre-skip
49 AV_WB32A(codec_config->extradata + 4, AV_RL32A(codec_config->extradata + 12)); // Byte swap sample rate
50 AV_WB16A(codec_config->extradata + 8, 0); // set Output Gain to 0
51 AV_WB8(codec_config->extradata + 10, AV_RL8(codec_config->extradata + 18)); // Mapping family
52 break;
53 case AV_CODEC_ID_FLAC: {
54 uint8_t buf[13];
55
56 init_put_bits(&pb, buf, sizeof(buf));
57 ret = init_get_bits8(&gb, codec_config->extradata, codec_config->extradata_size);
58 if (ret < 0)
59 return ret;
60
61 put_bits32(&pb, get_bits_long(&gb, 32)); // min/max blocksize
62 put_bits63(&pb, 48, get_bits64(&gb, 48)); // min/max framesize
63 put_bits(&pb, 20, get_bits(&gb, 20)); // samplerate
64 skip_bits(&gb, 3);
65 put_bits(&pb, 3, 1); // set channels to stereo
66 ret = put_bits_left(&pb);
67 put_bits(&pb, ret, get_bits(&gb, ret));
68 flush_put_bits(&pb);
69
70 memcpy(codec_config->extradata, buf, sizeof(buf));
71 break;
72 }
73 default:
74 break;
75 }
76
77 return 0;
78 }
79
80 static int populate_audio_roll_distance(IAMFCodecConfig *codec_config)
81 {
82 switch (codec_config->codec_id) {
83 case AV_CODEC_ID_OPUS:
84 if (!codec_config->nb_samples)
85 return AVERROR(EINVAL);
86 // ceil(3840 / nb_samples)
87 codec_config->audio_roll_distance = -(1 + ((3840 - 1) / codec_config->nb_samples));
88 break;
89 case AV_CODEC_ID_AAC:
90 codec_config->audio_roll_distance = -1;
91 break;
92 case AV_CODEC_ID_FLAC:
93 case AV_CODEC_ID_PCM_S16BE:
94 case AV_CODEC_ID_PCM_S24BE:
95 case AV_CODEC_ID_PCM_S32BE:
96 case AV_CODEC_ID_PCM_S16LE:
97 case AV_CODEC_ID_PCM_S24LE:
98 case AV_CODEC_ID_PCM_S32LE:
99 codec_config->audio_roll_distance = 0;
100 break;
101 default:
102 return AVERROR(EINVAL);
103 }
104
105 return 0;
106 }
107
108 static int fill_codec_config(IAMFContext *iamf, const AVStreamGroup *stg,
109 IAMFCodecConfig *codec_config)
110 {
111 const AVStream *st = stg->streams[0];
112 IAMFCodecConfig **tmp;
113 int j, ret = 0;
114
115 codec_config->codec_id = st->codecpar->codec_id;
116 codec_config->codec_tag = st->codecpar->codec_tag;
117 switch (codec_config->codec_id) {
118 case AV_CODEC_ID_OPUS:
119 codec_config->sample_rate = 48000;
120 codec_config->nb_samples = av_rescale(st->codecpar->frame_size, 48000, st->codecpar->sample_rate);
121 break;
122 default:
123 codec_config->sample_rate = st->codecpar->sample_rate;
124 codec_config->nb_samples = st->codecpar->frame_size;
125 break;
126 }
127 populate_audio_roll_distance(codec_config);
128 if (st->codecpar->extradata_size) {
129 codec_config->extradata = av_memdup(st->codecpar->extradata, st->codecpar->extradata_size);
130 if (!codec_config->extradata)
131 return AVERROR(ENOMEM);
132 codec_config->extradata_size = st->codecpar->extradata_size;
133 ret = update_extradata(codec_config);
134 if (ret < 0)
135 goto fail;
136 }
137
138 for (j = 0; j < iamf->nb_codec_configs; j++) {
139 if (!memcmp(iamf->codec_configs[j], codec_config, offsetof(IAMFCodecConfig, extradata)) &&
140 (!codec_config->extradata_size || !memcmp(iamf->codec_configs[j]->extradata,
141 codec_config->extradata, codec_config->extradata_size)))
142 break;
143 }
144
145 if (j < iamf->nb_codec_configs) {
146 av_free(iamf->codec_configs[j]->extradata);
147 av_free(iamf->codec_configs[j]);
148 iamf->codec_configs[j] = codec_config;
149 return j;
150 }
151
152 tmp = av_realloc_array(iamf->codec_configs, iamf->nb_codec_configs + 1, sizeof(*iamf->codec_configs));
153 if (!tmp) {
154 ret = AVERROR(ENOMEM);
155 goto fail;
156 }
157
158 iamf->codec_configs = tmp;
159 iamf->codec_configs[iamf->nb_codec_configs] = codec_config;
160 codec_config->codec_config_id = iamf->nb_codec_configs;
161
162 return iamf->nb_codec_configs++;
163
164 fail:
165 av_freep(&codec_config->extradata);
166 return ret;
167 }
168
169 static int add_param_definition(IAMFContext *iamf, AVIAMFParamDefinition *param,
170 const IAMFAudioElement *audio_element, void *log_ctx)
171 {
172 IAMFParamDefinition **tmp, *param_definition;
173 IAMFCodecConfig *codec_config = NULL;
174
175 tmp = av_realloc_array(iamf->param_definitions, iamf->nb_param_definitions + 1,
176 sizeof(*iamf->param_definitions));
177 if (!tmp)
178 return AVERROR(ENOMEM);
179
180 iamf->param_definitions = tmp;
181
182 if (audio_element)
183 codec_config = iamf->codec_configs[audio_element->codec_config_id];
184
185 if (!param->parameter_rate) {
186 if (!codec_config) {
187 av_log(log_ctx, AV_LOG_ERROR, "parameter_rate needed but not set for parameter_id %u\n",
188 param->parameter_id);
189 return AVERROR(EINVAL);
190 }
191 param->parameter_rate = codec_config->sample_rate;
192 }
193 if (codec_config) {
194 if (!param->duration)
195 param->duration = av_rescale(codec_config->nb_samples, param->parameter_rate, codec_config->sample_rate);
196 if (!param->constant_subblock_duration)
197 param->constant_subblock_duration = av_rescale(codec_config->nb_samples, param->parameter_rate, codec_config->sample_rate);
198 }
199
200 param_definition = av_mallocz(sizeof(*param_definition));
201 if (!param_definition)
202 return AVERROR(ENOMEM);
203
204 param_definition->mode = !!param->duration;
205 param_definition->param = param;
206 param_definition->audio_element = audio_element;
207 iamf->param_definitions[iamf->nb_param_definitions++] = param_definition;
208
209 return 0;
210 }
211
212 int ff_iamf_add_audio_element(IAMFContext *iamf, const AVStreamGroup *stg, void *log_ctx)
213 {
214 const AVIAMFAudioElement *iamf_audio_element;
215 IAMFAudioElement **tmp, *audio_element;
216 IAMFCodecConfig *codec_config;
217 int ret;
218
219 if (stg->type != AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT)
220 return AVERROR(EINVAL);
221 if (!stg->nb_streams) {
222 av_log(log_ctx, AV_LOG_ERROR, "Audio Element id %"PRId64" has no streams\n", stg->id);
223 return AVERROR(EINVAL);
224 }
225
226 iamf_audio_element = stg->params.iamf_audio_element;
227 if (iamf_audio_element->audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE) {
228 const AVIAMFLayer *layer = iamf_audio_element->layers[0];
229 if (iamf_audio_element->nb_layers != 1) {
230 av_log(log_ctx, AV_LOG_ERROR, "Invalid amount of layers for SCENE_BASED audio element. Must be 1\n");
231 return AVERROR(EINVAL);
232 }
233 if (layer->ch_layout.order != AV_CHANNEL_ORDER_CUSTOM &&
234 layer->ch_layout.order != AV_CHANNEL_ORDER_AMBISONIC) {
235 av_log(log_ctx, AV_LOG_ERROR, "Invalid channel layout for SCENE_BASED audio element\n");
236 return AVERROR(EINVAL);
237 }
238 if (layer->ambisonics_mode >= AV_IAMF_AMBISONICS_MODE_PROJECTION) {
239 av_log(log_ctx, AV_LOG_ERROR, "Unsupported ambisonics mode %d\n", layer->ambisonics_mode);
240 return AVERROR_PATCHWELCOME;
241 }
242 for (int i = 0; i < stg->nb_streams; i++) {
243 if (stg->streams[i]->codecpar->ch_layout.nb_channels > 1) {
244 av_log(log_ctx, AV_LOG_ERROR, "Invalid amount of channels in a stream for MONO mode ambisonics\n");
245 return AVERROR(EINVAL);
246 }
247 }
248 } else {
249 AVBPrint bp;
250
251 if (iamf_audio_element->nb_layers < 1) {
252 av_log(log_ctx, AV_LOG_ERROR, "Invalid amount of layers for CHANNEL_BASED audio element. Must be >= 1\n");
253 return AVERROR(EINVAL);
254 }
255
256 for (int j, i = 0; i < iamf_audio_element->nb_layers; i++) {
257 const AVIAMFLayer *layer = iamf_audio_element->layers[i];
258
259 for (j = 0; j < FF_ARRAY_ELEMS(ff_iamf_scalable_ch_layouts); j++)
260 if (av_channel_layout_subset(&layer->ch_layout, UINT64_MAX) ==
261 av_channel_layout_subset(&ff_iamf_scalable_ch_layouts[j], UINT64_MAX))
262 break;
263
264 if (j >= FF_ARRAY_ELEMS(ff_iamf_scalable_ch_layouts)) {
265 for (j = 0; j < FF_ARRAY_ELEMS(ff_iamf_expanded_scalable_ch_layouts); j++)
266 if (av_channel_layout_subset(&layer->ch_layout, UINT64_MAX) ==
267 av_channel_layout_subset(&ff_iamf_expanded_scalable_ch_layouts[j], UINT64_MAX))
268 break;
269
270 if (j >= FF_ARRAY_ELEMS(ff_iamf_expanded_scalable_ch_layouts)) {
271 av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC);
272 av_channel_layout_describe_bprint(&layer->ch_layout, &bp);
273 av_log(log_ctx, AV_LOG_ERROR, "Unsupported channel layout in Audio Element id %"PRId64
274 ", Layer %d: %s\n",
275 stg->id, i, bp.str);
276 av_bprint_finalize(&bp, NULL);
277 return AVERROR(EINVAL);
278 }
279 }
280
281 if (!i)
282 continue;
283
284 const AVIAMFLayer *prev_layer = iamf_audio_element->layers[i-1];
285 uint64_t prev_mask = av_channel_layout_subset(&prev_layer->ch_layout, UINT64_MAX);
286 if (av_channel_layout_subset(&layer->ch_layout, prev_mask) != prev_mask || (layer->ch_layout.nb_channels <=
287 prev_layer->ch_layout.nb_channels)) {
288 av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC);
289 av_bprintf(&bp, "Channel layout \"");
290 av_channel_layout_describe_bprint(&layer->ch_layout, &bp);
291 av_bprintf(&bp, "\" can't follow channel layout \"");
292 av_channel_layout_describe_bprint(&prev_layer->ch_layout, &bp);
293 av_bprintf(&bp, "\" in Scalable Audio Element id %"PRId64, stg->id);
294 av_log(log_ctx, AV_LOG_ERROR, "%s\n", bp.str);
295 av_bprint_finalize(&bp, NULL);
296 return AVERROR(EINVAL);
297 }
298 }
299 }
300
301 for (int i = 0; i < iamf->nb_audio_elements; i++) {
302 if (stg->id == iamf->audio_elements[i]->audio_element_id) {
303 av_log(log_ctx, AV_LOG_ERROR, "Duplicated Audio Element id %"PRId64"\n", stg->id);
304 return AVERROR(EINVAL);
305 }
306 }
307
308 codec_config = av_mallocz(sizeof(*codec_config));
309 if (!codec_config)
310 return AVERROR(ENOMEM);
311
312 ret = fill_codec_config(iamf, stg, codec_config);
313 if (ret < 0) {
314 av_free(codec_config);
315 return ret;
316 }
317
318 audio_element = av_mallocz(sizeof(*audio_element));
319 if (!audio_element)
320 return AVERROR(ENOMEM);
321
322 audio_element->celement = stg->params.iamf_audio_element;
323 audio_element->audio_element_id = stg->id;
324 audio_element->codec_config_id = ret;
325
326 audio_element->substreams = av_calloc(stg->nb_streams, sizeof(*audio_element->substreams));
327 if (!audio_element->substreams) {
328 ret = AVERROR(ENOMEM);
329 goto fail;
330 }
331 audio_element->nb_substreams = stg->nb_streams;
332
333 audio_element->layers = av_calloc(iamf_audio_element->nb_layers, sizeof(*audio_element->layers));
334 if (!audio_element->layers) {
335 ret = AVERROR(ENOMEM);
336 goto fail;
337 }
338
339 for (int i = 0, j = 0; i < iamf_audio_element->nb_layers; i++) {
340 int nb_channels = iamf_audio_element->layers[i]->ch_layout.nb_channels;
341
342 IAMFLayer *layer = &audio_element->layers[i];
343
344 if (i)
345 nb_channels -= iamf_audio_element->layers[i - 1]->ch_layout.nb_channels;
346 for (; nb_channels > 0 && j < stg->nb_streams; j++) {
347 const AVStream *st = stg->streams[j];
348 IAMFSubStream *substream = &audio_element->substreams[j];
349
350 substream->audio_substream_id = st->id;
351 layer->substream_count++;
352 layer->coupled_substream_count += st->codecpar->ch_layout.nb_channels == 2;
353 nb_channels -= st->codecpar->ch_layout.nb_channels;
354 }
355 if (nb_channels) {
356 av_log(log_ctx, AV_LOG_ERROR, "Invalid channel count across substreams in layer %u from stream group %u\n",
357 i, stg->index);
358 ret = AVERROR(EINVAL);
359 goto fail;
360 }
361 }
362
363 for (int i = 0; i < audio_element->nb_substreams; i++) {
364 for (int j = i + 1; j < audio_element->nb_substreams; j++)
365 if (audio_element->substreams[i].audio_substream_id ==
366 audio_element->substreams[j].audio_substream_id) {
367 av_log(log_ctx, AV_LOG_ERROR, "Duplicate id %u in streams %u and %u from stream group %u\n",
368 audio_element->substreams[i].audio_substream_id, i, j, stg->index);
369 ret = AVERROR(EINVAL);
370 goto fail;
371 }
372 }
373
374 if (iamf_audio_element->demixing_info) {
375 AVIAMFParamDefinition *param = iamf_audio_element->demixing_info;
376 const IAMFParamDefinition *param_definition = ff_iamf_get_param_definition(iamf, param->parameter_id);
377
378 if (param->nb_subblocks != 1) {
379 av_log(log_ctx, AV_LOG_ERROR, "nb_subblocks in demixing_info for stream group %u is not 1\n", stg->index);
380 ret = AVERROR(EINVAL);
381 goto fail;
382 }
383
384 if (!param_definition) {
385 ret = add_param_definition(iamf, param, audio_element, log_ctx);
386 if (ret < 0)
387 goto fail;
388 }
389 }
390 if (iamf_audio_element->recon_gain_info) {
391 AVIAMFParamDefinition *param = iamf_audio_element->recon_gain_info;
392 const IAMFParamDefinition *param_definition = ff_iamf_get_param_definition(iamf, param->parameter_id);
393
394 if (param->nb_subblocks != 1) {
395 av_log(log_ctx, AV_LOG_ERROR, "nb_subblocks in recon_gain_info for stream group %u is not 1\n", stg->index);
396 ret = AVERROR(EINVAL);
397 goto fail;
398 }
399
400 if (!param_definition) {
401 ret = add_param_definition(iamf, param, audio_element, log_ctx);
402 if (ret < 0)
403 goto fail;
404 }
405 }
406
407 tmp = av_realloc_array(iamf->audio_elements, iamf->nb_audio_elements + 1, sizeof(*iamf->audio_elements));
408 if (!tmp) {
409 ret = AVERROR(ENOMEM);
410 goto fail;
411 }
412
413 iamf->audio_elements = tmp;
414 iamf->audio_elements[iamf->nb_audio_elements++] = audio_element;
415
416 return 0;
417 fail:
418 ff_iamf_free_audio_element(&audio_element);
419 return ret;
420 }
421
422 int ff_iamf_add_mix_presentation(IAMFContext *iamf, const AVStreamGroup *stg, void *log_ctx)
423 {
424 IAMFMixPresentation **tmp, *mix_presentation;
425 int ret;
426
427 if (stg->type != AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION)
428 return AVERROR(EINVAL);
429 if (!stg->nb_streams) {
430 av_log(log_ctx, AV_LOG_ERROR, "Mix Presentation id %"PRId64" has no streams\n", stg->id);
431 return AVERROR(EINVAL);
432 }
433
434 for (int i = 0; i < iamf->nb_mix_presentations; i++) {
435 if (stg->id == iamf->mix_presentations[i]->mix_presentation_id) {
436 av_log(log_ctx, AV_LOG_ERROR, "Duplicate Mix Presentation id %"PRId64"\n", stg->id);
437 return AVERROR(EINVAL);
438 }
439 }
440
441 mix_presentation = av_mallocz(sizeof(*mix_presentation));
442 if (!mix_presentation)
443 return AVERROR(ENOMEM);
444
445 mix_presentation->cmix = stg->params.iamf_mix_presentation;
446 mix_presentation->mix_presentation_id = stg->id;
447
448 for (int i = 0; i < mix_presentation->cmix->nb_submixes; i++) {
449 const AVIAMFSubmix *submix = mix_presentation->cmix->submixes[i];
450 AVIAMFParamDefinition *param = submix->output_mix_config;
451 IAMFParamDefinition *param_definition;
452
453 if (!param) {
454 av_log(log_ctx, AV_LOG_ERROR, "output_mix_config is not present in submix %u from "
455 "Mix Presentation ID %"PRId64"\n", i, stg->id);
456 ret = AVERROR(EINVAL);
457 goto fail;
458 }
459
460 param_definition = ff_iamf_get_param_definition(iamf, param->parameter_id);
461 if (!param_definition) {
462 ret = add_param_definition(iamf, param, NULL, log_ctx);
463 if (ret < 0)
464 goto fail;
465 }
466
467 for (int j = 0; j < submix->nb_elements; j++) {
468 const AVIAMFSubmixElement *element = submix->elements[j];
469 param = element->element_mix_config;
470
471 if (!param) {
472 av_log(log_ctx, AV_LOG_ERROR, "element_mix_config is not present for element %u in submix %u from "
473 "Mix Presentation ID %"PRId64"\n", j, i, stg->id);
474 ret = AVERROR(EINVAL);
475 goto fail;
476 }
477 param_definition = ff_iamf_get_param_definition(iamf, param->parameter_id);
478 if (!param_definition) {
479 ret = add_param_definition(iamf, param, NULL, log_ctx);
480 if (ret < 0)
481 goto fail;
482 }
483 }
484 }
485
486 tmp = av_realloc_array(iamf->mix_presentations, iamf->nb_mix_presentations + 1, sizeof(*iamf->mix_presentations));
487 if (!tmp) {
488 ret = AVERROR(ENOMEM);
489 goto fail;
490 }
491
492 iamf->mix_presentations = tmp;
493 iamf->mix_presentations[iamf->nb_mix_presentations++] = mix_presentation;
494
495 return 0;
496 fail:
497 ff_iamf_free_mix_presentation(&mix_presentation);
498 return ret;
499 }
500
501 static int iamf_write_codec_config(const IAMFContext *iamf,
502 const IAMFCodecConfig *codec_config,
503 AVIOContext *pb)
504 {
505 uint8_t header[MAX_IAMF_OBU_HEADER_SIZE];
506 AVIOContext *dyn_bc;
507 uint8_t *dyn_buf = NULL;
508 PutBitContext pbc;
509 int dyn_size;
510
511 int ret = avio_open_dyn_buf(&dyn_bc);
512 if (ret < 0)
513 return ret;
514
515 ffio_write_leb(dyn_bc, codec_config->codec_config_id);
516 avio_wl32(dyn_bc, codec_config->codec_tag);
517
518 ffio_write_leb(dyn_bc, codec_config->nb_samples);
519 avio_wb16(dyn_bc, codec_config->audio_roll_distance);
520
521 switch(codec_config->codec_id) {
522 case AV_CODEC_ID_OPUS:
523 avio_write(dyn_bc, codec_config->extradata, codec_config->extradata_size);
524 break;
525 case AV_CODEC_ID_AAC:
526 return AVERROR_PATCHWELCOME;
527 case AV_CODEC_ID_FLAC:
528 avio_w8(dyn_bc, 0x80);
529 avio_wb24(dyn_bc, codec_config->extradata_size);
530 avio_write(dyn_bc, codec_config->extradata, codec_config->extradata_size);
531 break;
532 case AV_CODEC_ID_PCM_S16LE:
533 avio_w8(dyn_bc, 1);
534 avio_w8(dyn_bc, 16);
535 avio_wb32(dyn_bc, codec_config->sample_rate);
536 break;
537 case AV_CODEC_ID_PCM_S24LE:
538 avio_w8(dyn_bc, 1);
539 avio_w8(dyn_bc, 24);
540 avio_wb32(dyn_bc, codec_config->sample_rate);
541 break;
542 case AV_CODEC_ID_PCM_S32LE:
543 avio_w8(dyn_bc, 1);
544 avio_w8(dyn_bc, 32);
545 avio_wb32(dyn_bc, codec_config->sample_rate);
546 break;
547 case AV_CODEC_ID_PCM_S16BE:
548 avio_w8(dyn_bc, 0);
549 avio_w8(dyn_bc, 16);
550 avio_wb32(dyn_bc, codec_config->sample_rate);
551 break;
552 case AV_CODEC_ID_PCM_S24BE:
553 avio_w8(dyn_bc, 0);
554 avio_w8(dyn_bc, 24);
555 avio_wb32(dyn_bc, codec_config->sample_rate);
556 break;
557 case AV_CODEC_ID_PCM_S32BE:
558 avio_w8(dyn_bc, 0);
559 avio_w8(dyn_bc, 32);
560 avio_wb32(dyn_bc, codec_config->sample_rate);
561 break;
562 default:
563 break;
564 }
565
566 init_put_bits(&pbc, header, sizeof(header));
567 put_bits(&pbc, 5, IAMF_OBU_IA_CODEC_CONFIG);
568 put_bits(&pbc, 3, 0);
569 flush_put_bits(&pbc);
570
571 dyn_size = avio_get_dyn_buf(dyn_bc, &dyn_buf);
572 avio_write(pb, header, put_bytes_count(&pbc, 1));
573 ffio_write_leb(pb, dyn_size);
574 avio_write(pb, dyn_buf, dyn_size);
575 ffio_free_dyn_buf(&dyn_bc);
576
577 return 0;
578 }
579
580 static inline int rescale_rational(AVRational q, int b)
581 {
582 return av_clip_int16(av_rescale(q.num, b, q.den));
583 }
584
585 static void get_loudspeaker_layout(const AVIAMFLayer *layer,
586 int *playout, int *pexpanded_layout)
587 {
588 int layout, expanded_layout = -1;
589
590 for (layout = 0; layout < FF_ARRAY_ELEMS(ff_iamf_scalable_ch_layouts); layout++) {
591 if (!av_channel_layout_compare(&layer->ch_layout, &ff_iamf_scalable_ch_layouts[layout]))
592 break;
593 }
594 if (layout >= FF_ARRAY_ELEMS(ff_iamf_scalable_ch_layouts)) {
595 for (layout = 0; layout < FF_ARRAY_ELEMS(ff_iamf_scalable_ch_layouts); layout++)
596 if (av_channel_layout_subset(&layer->ch_layout, UINT64_MAX) ==
597 av_channel_layout_subset(&ff_iamf_scalable_ch_layouts[layout], UINT64_MAX))
598 break;
599 }
600 if (layout >= FF_ARRAY_ELEMS(ff_iamf_scalable_ch_layouts)) {
601 layout = 15;
602 for (expanded_layout = 0; expanded_layout < FF_ARRAY_ELEMS(ff_iamf_expanded_scalable_ch_layouts); expanded_layout++) {
603 if (!av_channel_layout_compare(&layer->ch_layout, &ff_iamf_expanded_scalable_ch_layouts[expanded_layout]))
604 break;
605 }
606 if (expanded_layout >= FF_ARRAY_ELEMS(ff_iamf_expanded_scalable_ch_layouts)) {
607 for (expanded_layout = 0; expanded_layout < FF_ARRAY_ELEMS(ff_iamf_expanded_scalable_ch_layouts); expanded_layout++)
608 if (av_channel_layout_subset(&layer->ch_layout, UINT64_MAX) ==
609 av_channel_layout_subset(&ff_iamf_expanded_scalable_ch_layouts[expanded_layout], UINT64_MAX))
610 break;
611 }
612 }
613 av_assert0((expanded_layout > 0 && expanded_layout < FF_ARRAY_ELEMS(ff_iamf_expanded_scalable_ch_layouts)) ||
614 layout < FF_ARRAY_ELEMS(ff_iamf_scalable_ch_layouts));
615
616 *playout = layout;
617 *pexpanded_layout = expanded_layout;
618 }
619
620 static int scalable_channel_layout_config(const IAMFAudioElement *audio_element,
621 AVIOContext *dyn_bc)
622 {
623 const AVIAMFAudioElement *element = audio_element->celement;
624 uint8_t header[MAX_IAMF_OBU_HEADER_SIZE];
625 PutBitContext pb;
626
627 init_put_bits(&pb, header, sizeof(header));
628 put_bits(&pb, 3, element->nb_layers);
629 put_bits(&pb, 5, 0);
630 flush_put_bits(&pb);
631 avio_write(dyn_bc, header, put_bytes_count(&pb, 1));
632 for (int i = 0; i < element->nb_layers; i++) {
633 const AVIAMFLayer *layer = element->layers[i];
634 int layout, expanded_layout;
635
636 get_loudspeaker_layout(layer, &layout, &expanded_layout);
637 init_put_bits(&pb, header, sizeof(header));
638 put_bits(&pb, 4, layout);
639 put_bits(&pb, 1, !!layer->output_gain_flags);
640 put_bits(&pb, 1, !!(layer->flags & AV_IAMF_LAYER_FLAG_RECON_GAIN));
641 put_bits(&pb, 2, 0); // reserved
642 put_bits(&pb, 8, audio_element->layers[i].substream_count);
643 put_bits(&pb, 8, audio_element->layers[i].coupled_substream_count);
644 if (layer->output_gain_flags) {
645 put_bits(&pb, 6, layer->output_gain_flags);
646 put_bits(&pb, 2, 0);
647 put_bits(&pb, 16, rescale_rational(layer->output_gain, 1 << 8));
648 }
649 if (expanded_layout >= 0)
650 put_bits(&pb, 8, expanded_layout);
651 flush_put_bits(&pb);
652 avio_write(dyn_bc, header, put_bytes_count(&pb, 1));
653 }
654
655 return 0;
656 }
657
658 static int ambisonics_config(const IAMFAudioElement *audio_element,
659 AVIOContext *dyn_bc)
660 {
661 const AVIAMFAudioElement *element = audio_element->celement;
662 const AVIAMFLayer *layer = element->layers[0];
663
664 ffio_write_leb(dyn_bc, 0); // ambisonics_mode
665 ffio_write_leb(dyn_bc, layer->ch_layout.nb_channels); // output_channel_count
666 ffio_write_leb(dyn_bc, audio_element->nb_substreams); // substream_count
667
668 if (layer->ch_layout.order == AV_CHANNEL_ORDER_AMBISONIC)
669 for (int i = 0; i < layer->ch_layout.nb_channels; i++)
670 avio_w8(dyn_bc, i);
671 else
672 for (int i = 0; i < layer->ch_layout.nb_channels; i++)
673 avio_w8(dyn_bc, layer->ch_layout.u.map[i].id);
674
675 return 0;
676 }
677
678 static int param_definition(const IAMFContext *iamf,
679 const IAMFParamDefinition *param_def,
680 AVIOContext *dyn_bc, void *log_ctx)
681 {
682 const AVIAMFParamDefinition *param = param_def->param;
683
684 ffio_write_leb(dyn_bc, param->parameter_id);
685 ffio_write_leb(dyn_bc, param->parameter_rate);
686 avio_w8(dyn_bc, param->duration ? 0 : 1 << 7);
687 if (param->duration) {
688 ffio_write_leb(dyn_bc, param->duration);
689 ffio_write_leb(dyn_bc, param->constant_subblock_duration);
690 if (param->constant_subblock_duration == 0) {
691 ffio_write_leb(dyn_bc, param->nb_subblocks);
692 for (int i = 0; i < param->nb_subblocks; i++) {
693 const void *subblock = av_iamf_param_definition_get_subblock(param, i);
694
695 switch (param->type) {
696 case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: {
697 const AVIAMFMixGain *mix = subblock;
698 ffio_write_leb(dyn_bc, mix->subblock_duration);
699 break;
700 }
701 case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: {
702 const AVIAMFDemixingInfo *demix = subblock;
703 ffio_write_leb(dyn_bc, demix->subblock_duration);
704 break;
705 }
706 case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: {
707 const AVIAMFReconGain *recon = subblock;
708 ffio_write_leb(dyn_bc, recon->subblock_duration);
709 break;
710 }
711 }
712 }
713 }
714 }
715
716 return 0;
717 }
718
719 static int iamf_write_audio_element(const IAMFContext *iamf,
720 const IAMFAudioElement *audio_element,
721 AVIOContext *pb, void *log_ctx)
722 {
723 const AVIAMFAudioElement *element = audio_element->celement;
724 const IAMFCodecConfig *codec_config = iamf->codec_configs[audio_element->codec_config_id];
725 uint8_t header[MAX_IAMF_OBU_HEADER_SIZE];
726 AVIOContext *dyn_bc;
727 uint8_t *dyn_buf = NULL;
728 PutBitContext pbc;
729 int param_definition_types = AV_IAMF_PARAMETER_DEFINITION_DEMIXING, dyn_size;
730
731 int ret = avio_open_dyn_buf(&dyn_bc);
732 if (ret < 0)
733 return ret;
734
735 ffio_write_leb(dyn_bc, audio_element->audio_element_id);
736
737 init_put_bits(&pbc, header, sizeof(header));
738 put_bits(&pbc, 3, element->audio_element_type);
739 put_bits(&pbc, 5, 0);
740 flush_put_bits(&pbc);
741 avio_write(dyn_bc, header, put_bytes_count(&pbc, 1));
742
743 ffio_write_leb(dyn_bc, audio_element->codec_config_id);
744 ffio_write_leb(dyn_bc, audio_element->nb_substreams);
745
746 for (int i = 0; i < audio_element->nb_substreams; i++)
747 ffio_write_leb(dyn_bc, audio_element->substreams[i].audio_substream_id);
748
749 /* When audio_element_type = 1, num_parameters SHALL be set to 0 */
750 if (element->audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE)
751 param_definition_types = 0;
752 else {
753 int layout = 0, expanded_layout = 0;
754 get_loudspeaker_layout(element->layers[0], &layout, &expanded_layout);
755 /* When the loudspeaker_layout = 15, the type PARAMETER_DEFINITION_DEMIXING SHALL NOT be present. */
756 if (layout == 15)
757 param_definition_types &= ~AV_IAMF_PARAMETER_DEFINITION_DEMIXING;
758 /* When the loudspeaker_layout of the (non-)scalable channel audio (i.e., num_layers = 1) is less than or equal to 3.1.2ch,
759 * (i.e., Mono, Stereo, or 3.1.2ch), the type PARAMETER_DEFINITION_DEMIXING SHALL NOT be present. */
760 else if (element->nb_layers == 1 && (layout == 0 || layout == 1 || layout == 8))
761 param_definition_types &= ~AV_IAMF_PARAMETER_DEFINITION_DEMIXING;
762 /* When num_layers > 1, the type PARAMETER_DEFINITION_RECON_GAIN SHALL be present */
763 if (element->nb_layers > 1)
764 param_definition_types |= AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN;
765 /* When codec_id = fLaC or ipcm, the type PARAMETER_DEFINITION_RECON_GAIN SHALL NOT be present. */
766 if (codec_config->codec_tag == MKTAG('f','L','a','C') ||
767 codec_config->codec_tag == MKTAG('i','p','c','m'))
768 param_definition_types &= ~AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN;
769 if ((param_definition_types & AV_IAMF_PARAMETER_DEFINITION_DEMIXING) && !element->demixing_info) {
770 if (element->nb_layers > 1) {
771 get_loudspeaker_layout(element->layers[element->nb_layers-1], &layout, &expanded_layout);
772 /* When the highest loudspeaker_layout of the scalable channel audio (i.e., num_layers > 1) is greater than 3.1.2ch,
773 * (i.e., 5.1.2ch, 5.1.4ch, 7.1.2ch, or 7.1.4ch), type PARAMETER_DEFINITION_DEMIXING SHALL be present. */
774 if (layout == 3 || layout == 4 || layout == 6 || layout == 7) {
775 av_log(log_ctx, AV_LOG_ERROR, "demixing_info needed but not set in Stream Group #%u\n",
776 audio_element->audio_element_id);
777 return AVERROR(EINVAL);
778 }
779 }
780 param_definition_types &= ~AV_IAMF_PARAMETER_DEFINITION_DEMIXING;
781 }
782 }
783
784 ffio_write_leb(dyn_bc, av_popcount(param_definition_types)); // num_parameters
785
786 if (param_definition_types & AV_IAMF_PARAMETER_DEFINITION_DEMIXING) {
787 const AVIAMFParamDefinition *param = element->demixing_info;
788 const IAMFParamDefinition *param_def;
789 const AVIAMFDemixingInfo *demix;
790
791 demix = av_iamf_param_definition_get_subblock(param, 0);
792 ffio_write_leb(dyn_bc, AV_IAMF_PARAMETER_DEFINITION_DEMIXING); // type
793
794 param_def = ff_iamf_get_param_definition(iamf, param->parameter_id);
795 ret = param_definition(iamf, param_def, dyn_bc, log_ctx);
796 if (ret < 0)
797 return ret;
798
799 avio_w8(dyn_bc, demix->dmixp_mode << 5); // dmixp_mode
800 avio_w8(dyn_bc, element->default_w << 4); // default_w
801 }
802 if (param_definition_types & AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN) {
803 const AVIAMFParamDefinition *param = element->recon_gain_info;
804 const IAMFParamDefinition *param_def;
805
806 if (!param) {
807 av_log(log_ctx, AV_LOG_ERROR, "recon_gain_info needed but not set in Stream Group #%u\n",
808 audio_element->audio_element_id);
809 return AVERROR(EINVAL);
810 }
811 ffio_write_leb(dyn_bc, AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN); // type
812
813 param_def = ff_iamf_get_param_definition(iamf, param->parameter_id);
814 ret = param_definition(iamf, param_def, dyn_bc, log_ctx);
815 if (ret < 0)
816 return ret;
817 }
818
819 if (element->audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL) {
820 ret = scalable_channel_layout_config(audio_element, dyn_bc);
821 if (ret < 0)
822 return ret;
823 } else {
824 ret = ambisonics_config(audio_element, dyn_bc);
825 if (ret < 0)
826 return ret;
827 }
828
829 init_put_bits(&pbc, header, sizeof(header));
830 put_bits(&pbc, 5, IAMF_OBU_IA_AUDIO_ELEMENT);
831 put_bits(&pbc, 3, 0);
832 flush_put_bits(&pbc);
833
834 dyn_size = avio_get_dyn_buf(dyn_bc, &dyn_buf);
835 avio_write(pb, header, put_bytes_count(&pbc, 1));
836 ffio_write_leb(pb, dyn_size);
837 avio_write(pb, dyn_buf, dyn_size);
838 ffio_free_dyn_buf(&dyn_bc);
839
840 return 0;
841 }
842
843 static int iamf_write_mixing_presentation(const IAMFContext *iamf,
844 const IAMFMixPresentation *mix_presentation,
845 AVIOContext *pb, void *log_ctx)
846 {
847 uint8_t header[MAX_IAMF_OBU_HEADER_SIZE];
848 const AVIAMFMixPresentation *mix = mix_presentation->cmix;
849 const AVDictionaryEntry *tag = NULL;
850 PutBitContext pbc;
851 AVIOContext *dyn_bc;
852 uint8_t *dyn_buf = NULL;
853 int dyn_size;
854
855 int ret = avio_open_dyn_buf(&dyn_bc);
856 if (ret < 0)
857 return ret;
858
859 ffio_write_leb(dyn_bc, mix_presentation->mix_presentation_id); // mix_presentation_id
860 ffio_write_leb(dyn_bc, av_dict_count(mix->annotations)); // count_label
861
862 while ((tag = av_dict_iterate(mix->annotations, tag)))
863 avio_put_str(dyn_bc, tag->key);
864 while ((tag = av_dict_iterate(mix->annotations, tag)))
865 avio_put_str(dyn_bc, tag->value);
866
867 ffio_write_leb(dyn_bc, mix->nb_submixes);
868 for (int i = 0; i < mix->nb_submixes; i++) {
869 const AVIAMFSubmix *sub_mix = mix->submixes[i];
870 const IAMFParamDefinition *param_def;
871
872 ffio_write_leb(dyn_bc, sub_mix->nb_elements);
873 for (int j = 0; j < sub_mix->nb_elements; j++) {
874 const IAMFAudioElement *audio_element = NULL;
875 const AVIAMFSubmixElement *submix_element = sub_mix->elements[j];
876
877 for (int k = 0; k < iamf->nb_audio_elements; k++)
878 if (iamf->audio_elements[k]->audio_element_id == submix_element->audio_element_id) {
879 audio_element = iamf->audio_elements[k];
880 break;
881 }
882
883 av_assert0(audio_element);
884 ffio_write_leb(dyn_bc, submix_element->audio_element_id);
885
886 if (av_dict_count(submix_element->annotations) != av_dict_count(mix->annotations)) {
887 av_log(log_ctx, AV_LOG_ERROR, "Inconsistent amount of labels in submix %d from Mix Presentation id #%u\n",
888 j, audio_element->audio_element_id);
889 return AVERROR(EINVAL);
890 }
891 while ((tag = av_dict_iterate(submix_element->annotations, tag)))
892 avio_put_str(dyn_bc, tag->value);
893
894 init_put_bits(&pbc, header, sizeof(header));
895 put_bits(&pbc, 2, submix_element->headphones_rendering_mode);
896 put_bits(&pbc, 6, 0); // reserved
897 flush_put_bits(&pbc);
898 avio_write(dyn_bc, header, put_bytes_count(&pbc, 1));
899 ffio_write_leb(dyn_bc, 0); // rendering_config_extension_size
900
901 param_def = ff_iamf_get_param_definition(iamf, submix_element->element_mix_config->parameter_id);
902 ret = param_definition(iamf, param_def, dyn_bc, log_ctx);
903 if (ret < 0)
904 return ret;
905
906 avio_wb16(dyn_bc, rescale_rational(submix_element->default_mix_gain, 1 << 8));
907 }
908
909 param_def = ff_iamf_get_param_definition(iamf, sub_mix->output_mix_config->parameter_id);
910 ret = param_definition(iamf, param_def, dyn_bc, log_ctx);
911 if (ret < 0)
912 return ret;
913 avio_wb16(dyn_bc, rescale_rational(sub_mix->default_mix_gain, 1 << 8));
914
915 ffio_write_leb(dyn_bc, sub_mix->nb_layouts); // nb_layouts
916 for (int i = 0; i < sub_mix->nb_layouts; i++) {
917 const AVIAMFSubmixLayout *submix_layout = sub_mix->layouts[i];
918 int layout, info_type;
919 int dialogue = submix_layout->dialogue_anchored_loudness.num &&
920 submix_layout->dialogue_anchored_loudness.den;
921 int album = submix_layout->album_anchored_loudness.num &&
922 submix_layout->album_anchored_loudness.den;
923
924 if (submix_layout->layout_type == AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS) {
925 for (layout = 0; layout < FF_ARRAY_ELEMS(ff_iamf_sound_system_map); layout++) {
926 if (!av_channel_layout_compare(&submix_layout->sound_system, &ff_iamf_sound_system_map[layout].layout))
927 break;
928 }
929 if (layout == FF_ARRAY_ELEMS(ff_iamf_sound_system_map)) {
930 av_log(log_ctx, AV_LOG_ERROR, "Invalid Sound System value in a submix\n");
931 return AVERROR(EINVAL);
932 }
933 } else if (submix_layout->layout_type != AV_IAMF_SUBMIX_LAYOUT_TYPE_BINAURAL) {
934 av_log(log_ctx, AV_LOG_ERROR, "Unsupported Layout Type value in a submix\n");
935 return AVERROR(EINVAL);
936 }
937 init_put_bits(&pbc, header, sizeof(header));
938 put_bits(&pbc, 2, submix_layout->layout_type); // layout_type
939 if (submix_layout->layout_type == AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS) {
940 put_bits(&pbc, 4, ff_iamf_sound_system_map[layout].id); // sound_system
941 put_bits(&pbc, 2, 0); // reserved
942 } else
943 put_bits(&pbc, 6, 0); // reserved
944 flush_put_bits(&pbc);
945 avio_write(dyn_bc, header, put_bytes_count(&pbc, 1));
946
947 info_type = (submix_layout->true_peak.num && submix_layout->true_peak.den);
948 info_type |= (dialogue || album) << 1;
949 avio_w8(dyn_bc, info_type);
950 avio_wb16(dyn_bc, rescale_rational(submix_layout->integrated_loudness, 1 << 8));
951 avio_wb16(dyn_bc, rescale_rational(submix_layout->digital_peak, 1 << 8));
952 if (info_type & 1)
953 avio_wb16(dyn_bc, rescale_rational(submix_layout->true_peak, 1 << 8));
954 if (info_type & 2) {
955 avio_w8(dyn_bc, dialogue + album); // num_anchored_loudness
956 if (dialogue) {
957 avio_w8(dyn_bc, IAMF_ANCHOR_ELEMENT_DIALOGUE);
958 avio_wb16(dyn_bc, rescale_rational(submix_layout->dialogue_anchored_loudness, 1 << 8));
959 }
960 if (album) {
961 avio_w8(dyn_bc, IAMF_ANCHOR_ELEMENT_ALBUM);
962 avio_wb16(dyn_bc, rescale_rational(submix_layout->album_anchored_loudness, 1 << 8));
963 }
964 }
965 }
966 }
967
968 init_put_bits(&pbc, header, sizeof(header));
969 put_bits(&pbc, 5, IAMF_OBU_IA_MIX_PRESENTATION);
970 put_bits(&pbc, 3, 0);
971 flush_put_bits(&pbc);
972
973 dyn_size = avio_get_dyn_buf(dyn_bc, &dyn_buf);
974 avio_write(pb, header, put_bytes_count(&pbc, 1));
975 ffio_write_leb(pb, dyn_size);
976 avio_write(pb, dyn_buf, dyn_size);
977 ffio_free_dyn_buf(&dyn_bc);
978
979 return 0;
980 }
981
982 int ff_iamf_write_descriptors(const IAMFContext *iamf, AVIOContext *pb, void *log_ctx)
983 {
984 int ret;
985
986 // Sequence Header
987 avio_w8(pb, IAMF_OBU_IA_SEQUENCE_HEADER << 3);
988
989 ffio_write_leb(pb, 6);
990 avio_wb32(pb, MKBETAG('i','a','m','f'));
991 avio_w8(pb, iamf->nb_audio_elements > 1); // primary_profile
992 avio_w8(pb, iamf->nb_audio_elements > 1); // additional_profile
993
994 for (int i = 0; i < iamf->nb_codec_configs; i++) {
995 ret = iamf_write_codec_config(iamf, iamf->codec_configs[i], pb);
996 if (ret < 0)
997 return ret;
998 }
999
1000 for (int i = 0; i < iamf->nb_audio_elements; i++) {
1001 ret = iamf_write_audio_element(iamf, iamf->audio_elements[i], pb, log_ctx);
1002 if (ret < 0)
1003 return ret;
1004 }
1005
1006 for (int i = 0; i < iamf->nb_mix_presentations; i++) {
1007 ret = iamf_write_mixing_presentation(iamf, iamf->mix_presentations[i], pb, log_ctx);
1008 if (ret < 0)
1009 return ret;
1010 }
1011
1012 return 0;
1013 }
1014
1015 static int write_parameter_block(const IAMFContext *iamf, AVIOContext *pb,
1016 const AVIAMFParamDefinition *param, void *log_ctx)
1017 {
1018 uint8_t header[MAX_IAMF_OBU_HEADER_SIZE];
1019 const IAMFParamDefinition *param_definition = ff_iamf_get_param_definition(iamf, param->parameter_id);
1020 PutBitContext pbc;
1021 AVIOContext *dyn_bc;
1022 uint8_t *dyn_buf = NULL;
1023 int dyn_size, ret;
1024
1025 if (param->type > AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN) {
1026 av_log(log_ctx, AV_LOG_DEBUG, "Ignoring side data with unknown type %u\n",
1027 param->type);
1028 return 0;
1029 }
1030
1031 if (!param_definition) {
1032 av_log(log_ctx, AV_LOG_ERROR, "Non-existent Parameter Definition with ID %u referenced by a packet\n",
1033 param->parameter_id);
1034 return AVERROR(EINVAL);
1035 }
1036
1037 if (param->type != param_definition->param->type) {
1038 av_log(log_ctx, AV_LOG_ERROR, "Inconsistent values for Parameter Definition "
1039 "with ID %u in a packet\n",
1040 param->parameter_id);
1041 return AVERROR(EINVAL);
1042 }
1043
1044 ret = avio_open_dyn_buf(&dyn_bc);
1045 if (ret < 0)
1046 return ret;
1047
1048 // Sequence Header
1049 init_put_bits(&pbc, header, sizeof(header));
1050 put_bits(&pbc, 5, IAMF_OBU_IA_PARAMETER_BLOCK);
1051 put_bits(&pbc, 3, 0);
1052 flush_put_bits(&pbc);
1053 avio_write(pb, header, put_bytes_count(&pbc, 1));
1054
1055 ffio_write_leb(dyn_bc, param->parameter_id);
1056 if (!param_definition->mode) {
1057 ffio_write_leb(dyn_bc, param->duration);
1058 ffio_write_leb(dyn_bc, param->constant_subblock_duration);
1059 if (param->constant_subblock_duration == 0)
1060 ffio_write_leb(dyn_bc, param->nb_subblocks);
1061 }
1062
1063 for (int i = 0; i < param->nb_subblocks; i++) {
1064 const void *subblock = av_iamf_param_definition_get_subblock(param, i);
1065
1066 switch (param->type) {
1067 case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: {
1068 const AVIAMFMixGain *mix = subblock;
1069 if (!param_definition->mode && param->constant_subblock_duration == 0)
1070 ffio_write_leb(dyn_bc, mix->subblock_duration);
1071
1072 ffio_write_leb(dyn_bc, mix->animation_type);
1073
1074 avio_wb16(dyn_bc, rescale_rational(mix->start_point_value, 1 << 8));
1075 if (mix->animation_type >= AV_IAMF_ANIMATION_TYPE_LINEAR)
1076 avio_wb16(dyn_bc, rescale_rational(mix->end_point_value, 1 << 8));
1077 if (mix->animation_type == AV_IAMF_ANIMATION_TYPE_BEZIER) {
1078 avio_wb16(dyn_bc, rescale_rational(mix->control_point_value, 1 << 8));
1079 avio_w8(dyn_bc, av_clip_uint8(av_rescale(mix->control_point_relative_time.num, 1 << 8,
1080 mix->control_point_relative_time.den)));
1081 }
1082 break;
1083 }
1084 case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: {
1085 const AVIAMFDemixingInfo *demix = subblock;
1086 if (!param_definition->mode && param->constant_subblock_duration == 0)
1087 ffio_write_leb(dyn_bc, demix->subblock_duration);
1088
1089 avio_w8(dyn_bc, demix->dmixp_mode << 5);
1090 break;
1091 }
1092 case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: {
1093 const AVIAMFReconGain *recon = subblock;
1094 const AVIAMFAudioElement *audio_element = param_definition->audio_element->celement;
1095
1096 if (!param_definition->mode && param->constant_subblock_duration == 0)
1097 ffio_write_leb(dyn_bc, recon->subblock_duration);
1098
1099 if (!audio_element) {
1100 av_log(log_ctx, AV_LOG_ERROR, "Invalid Parameter Definition with ID %u referenced by a packet\n", param->parameter_id);
1101 return AVERROR(EINVAL);
1102 }
1103
1104 for (int j = 0; j < audio_element->nb_layers; j++) {
1105 const AVIAMFLayer *layer = audio_element->layers[j];
1106
1107 if (layer->flags & AV_IAMF_LAYER_FLAG_RECON_GAIN) {
1108 unsigned int recon_gain_flags = 0;
1109 int k = 0;
1110
1111 for (; k < 7; k++)
1112 recon_gain_flags |= (1 << k) * !!recon->recon_gain[j][k];
1113 for (; k < 12; k++)
1114 recon_gain_flags |= (2 << k) * !!recon->recon_gain[j][k];
1115 if (recon_gain_flags >> 8)
1116 recon_gain_flags |= (1 << k);
1117
1118 ffio_write_leb(dyn_bc, recon_gain_flags);
1119 for (k = 0; k < 12; k++) {
1120 if (recon->recon_gain[j][k])
1121 avio_w8(dyn_bc, recon->recon_gain[j][k]);
1122 }
1123 }
1124 }
1125 break;
1126 }
1127 default:
1128 av_assert0(0);
1129 }
1130 }
1131
1132 dyn_size = avio_get_dyn_buf(dyn_bc, &dyn_buf);
1133 ffio_write_leb(pb, dyn_size);
1134 avio_write(pb, dyn_buf, dyn_size);
1135 ffio_free_dyn_buf(&dyn_bc);
1136
1137 return 0;
1138 }
1139
1140 int ff_iamf_write_parameter_blocks(const IAMFContext *iamf, AVIOContext *pb,
1141 const AVPacket *pkt, void *log_ctx)
1142 {
1143 AVIAMFParamDefinition *mix =
1144 (AVIAMFParamDefinition *)av_packet_get_side_data(pkt,
1145 AV_PKT_DATA_IAMF_MIX_GAIN_PARAM,
1146 NULL);
1147 AVIAMFParamDefinition *demix =
1148 (AVIAMFParamDefinition *)av_packet_get_side_data(pkt,
1149 AV_PKT_DATA_IAMF_DEMIXING_INFO_PARAM,
1150 NULL);
1151 AVIAMFParamDefinition *recon =
1152 (AVIAMFParamDefinition *)av_packet_get_side_data(pkt,
1153 AV_PKT_DATA_IAMF_RECON_GAIN_INFO_PARAM,
1154 NULL);
1155
1156 if (mix) {
1157 int ret = write_parameter_block(iamf, pb, mix, log_ctx);
1158 if (ret < 0)
1159 return ret;
1160 }
1161 if (demix) {
1162 int ret = write_parameter_block(iamf, pb, demix, log_ctx);
1163 if (ret < 0)
1164 return ret;
1165 }
1166 if (recon) {
1167 int ret = write_parameter_block(iamf, pb, recon, log_ctx);
1168 if (ret < 0)
1169 return ret;
1170 }
1171
1172 return 0;
1173 }
1174
1175 static IAMFAudioElement *get_audio_element(const IAMFContext *c,
1176 unsigned int audio_substream_id)
1177 {
1178 for (int i = 0; i < c->nb_audio_elements; i++) {
1179 IAMFAudioElement *audio_element = c->audio_elements[i];
1180 for (int j = 0; j < audio_element->nb_substreams; j++) {
1181 IAMFSubStream *substream = &audio_element->substreams[j];
1182 if (substream->audio_substream_id == audio_substream_id)
1183 return audio_element;
1184 }
1185 }
1186
1187 return NULL;
1188 }
1189
1190 int ff_iamf_write_audio_frame(const IAMFContext *iamf, AVIOContext *pb,
1191 unsigned audio_substream_id, const AVPacket *pkt)
1192 {
1193 uint8_t header[MAX_IAMF_OBU_HEADER_SIZE];
1194 PutBitContext pbc;
1195 const IAMFAudioElement *audio_element;
1196 IAMFCodecConfig *codec_config;
1197 AVIOContext *dyn_bc;
1198 const uint8_t *side_data;
1199 uint8_t *dyn_buf = NULL;
1200 unsigned int skip_samples = 0, discard_padding = 0;
1201 size_t side_data_size;
1202 int dyn_size, type = audio_substream_id <= 17 ?
1203 audio_substream_id + IAMF_OBU_IA_AUDIO_FRAME_ID0 : IAMF_OBU_IA_AUDIO_FRAME;
1204 int ret;
1205
1206 audio_element = get_audio_element(iamf, audio_substream_id);
1207 if (!audio_element)
1208 return AVERROR(EINVAL);
1209 codec_config = ff_iamf_get_codec_config(iamf, audio_element->codec_config_id);
1210 if (!codec_config)
1211 return AVERROR(EINVAL);
1212
1213 if (!pkt->size) {
1214 size_t new_extradata_size;
1215 const uint8_t *new_extradata = av_packet_get_side_data(pkt,
1216 AV_PKT_DATA_NEW_EXTRADATA,
1217 &new_extradata_size);
1218
1219 if (!new_extradata)
1220 return AVERROR_INVALIDDATA;
1221
1222 av_free(codec_config->extradata);
1223 codec_config->extradata = av_memdup(new_extradata, new_extradata_size);
1224 if (!codec_config->extradata) {
1225 codec_config->extradata_size = 0;
1226 return AVERROR(ENOMEM);
1227 }
1228 codec_config->extradata_size = new_extradata_size;
1229
1230 return update_extradata(codec_config);
1231 }
1232
1233 side_data = av_packet_get_side_data(pkt, AV_PKT_DATA_SKIP_SAMPLES,
1234 &side_data_size);
1235
1236 if (side_data && side_data_size >= 10) {
1237 skip_samples = AV_RL32(side_data);
1238 discard_padding = AV_RL32(side_data + 4);
1239 }
1240
1241 if (codec_config->codec_id == AV_CODEC_ID_OPUS) {
1242 // IAMF's num_samples_to_trim_at_start is the same as Opus's pre-skip.
1243 skip_samples = pkt->dts < 0
1244 ? av_rescale(-pkt->dts, 48000, pkt->time_base.den)
1245 : 0;
1246 discard_padding = av_rescale(discard_padding, 48000, pkt->time_base.den);
1247 }
1248
1249 ret = avio_open_dyn_buf(&dyn_bc);
1250 if (ret < 0)
1251 return ret;
1252
1253 init_put_bits(&pbc, header, sizeof(header));
1254 put_bits(&pbc, 5, type);
1255 put_bits(&pbc, 1, 0); // obu_redundant_copy
1256 put_bits(&pbc, 1, skip_samples || discard_padding);
1257 put_bits(&pbc, 1, 0); // obu_extension_flag
1258 flush_put_bits(&pbc);
1259 avio_write(pb, header, put_bytes_count(&pbc, 1));
1260
1261 if (skip_samples || discard_padding) {
1262 ffio_write_leb(dyn_bc, discard_padding);
1263 ffio_write_leb(dyn_bc, skip_samples);
1264 }
1265
1266 if (audio_substream_id > 17)
1267 ffio_write_leb(dyn_bc, audio_substream_id);
1268
1269 dyn_size = avio_get_dyn_buf(dyn_bc, &dyn_buf);
1270 ffio_write_leb(pb, dyn_size + pkt->size);
1271 avio_write(pb, dyn_buf, dyn_size);
1272 ffio_free_dyn_buf(&dyn_bc);
1273 avio_write(pb, pkt->data, pkt->size);
1274
1275 return 0;
1276 }