2 * Immersive Audio Model and Formats parsing
3 * Copyright (c) 2023 James Almer <jamrial@gmail.com>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "libavutil/avassert.h"
23 #include "libavutil/iamf.h"
24 #include "libavutil/intreadwrite.h"
25 #include "libavutil/log.h"
26 #include "libavutil/mem.h"
27 #include "libavcodec/get_bits.h"
28 #include "libavcodec/flac.h"
29 #include "libavcodec/leb.h"
30 #include "libavcodec/mpeg4audio.h"
31 #include "libavcodec/put_bits.h"
32 #include "avio_internal.h"
33 #include "iamf_parse.h"
36 static int opus_decoder_config(IAMFCodecConfig
*codec_config
,
37 AVIOContext
*pb
, int len
)
39 int ret
, left
= len
- avio_tell(pb
);
41 if (left
< 11 || codec_config
->audio_roll_distance
>= 0)
42 return AVERROR_INVALIDDATA
;
44 codec_config
->extradata
= av_malloc(left
+ 8);
45 if (!codec_config
->extradata
)
46 return AVERROR(ENOMEM
);
48 AV_WB32A(codec_config
->extradata
, MKBETAG('O','p','u','s'));
49 AV_WB32A(codec_config
->extradata
+ 4, MKBETAG('H','e','a','d'));
50 ret
= ffio_read_size(pb
, codec_config
->extradata
+ 8, left
);
54 codec_config
->extradata_size
= left
+ 8;
55 codec_config
->sample_rate
= 48000;
60 static int aac_decoder_config(IAMFCodecConfig
*codec_config
,
61 AVIOContext
*pb
, int len
, void *logctx
)
63 MPEG4AudioConfig cfg
= { 0 };
64 int object_type_id
, codec_id
, stream_type
;
67 if (codec_config
->audio_roll_distance
>= 0)
68 return AVERROR_INVALIDDATA
;
70 ff_mp4_read_descr(logctx
, pb
, &tag
);
71 if (tag
!= MP4DecConfigDescrTag
)
72 return AVERROR_INVALIDDATA
;
74 object_type_id
= avio_r8(pb
);
75 if (object_type_id
!= 0x40)
76 return AVERROR_INVALIDDATA
;
78 stream_type
= avio_r8(pb
);
79 if (((stream_type
>> 2) != 5) || ((stream_type
>> 1) & 1))
80 return AVERROR_INVALIDDATA
;
82 avio_skip(pb
, 3); // buffer size db
83 avio_skip(pb
, 4); // rc_max_rate
84 avio_skip(pb
, 4); // avg bitrate
86 codec_id
= ff_codec_get_id(ff_mp4_obj_type
, object_type_id
);
87 if (codec_id
&& codec_id
!= codec_config
->codec_id
)
88 return AVERROR_INVALIDDATA
;
90 left
= ff_mp4_read_descr(logctx
, pb
, &tag
);
91 if (tag
!= MP4DecSpecificDescrTag
||
92 !left
|| left
> (len
- avio_tell(pb
)))
93 return AVERROR_INVALIDDATA
;
95 // We pad extradata here because avpriv_mpeg4audio_get_config2() needs it.
96 codec_config
->extradata
= av_malloc((size_t)left
+ AV_INPUT_BUFFER_PADDING_SIZE
);
97 if (!codec_config
->extradata
)
98 return AVERROR(ENOMEM
);
100 ret
= ffio_read_size(pb
, codec_config
->extradata
, left
);
103 codec_config
->extradata_size
= left
;
104 memset(codec_config
->extradata
+ codec_config
->extradata_size
, 0,
105 AV_INPUT_BUFFER_PADDING_SIZE
);
107 ret
= avpriv_mpeg4audio_get_config2(&cfg
, codec_config
->extradata
,
108 codec_config
->extradata_size
, 1, logctx
);
112 codec_config
->sample_rate
= cfg
.sample_rate
;
117 static int flac_decoder_config(IAMFCodecConfig
*codec_config
,
118 AVIOContext
*pb
, int len
)
122 if (codec_config
->audio_roll_distance
)
123 return AVERROR_INVALIDDATA
;
125 avio_skip(pb
, 4); // METADATA_BLOCK_HEADER
127 left
= len
- avio_tell(pb
);
128 if (left
< FLAC_STREAMINFO_SIZE
)
129 return AVERROR_INVALIDDATA
;
131 codec_config
->extradata
= av_malloc(left
);
132 if (!codec_config
->extradata
)
133 return AVERROR(ENOMEM
);
135 ret
= ffio_read_size(pb
, codec_config
->extradata
, left
);
139 codec_config
->extradata_size
= left
;
140 codec_config
->sample_rate
= AV_RB24(codec_config
->extradata
+ 10) >> 4;
145 static int ipcm_decoder_config(IAMFCodecConfig
*codec_config
,
146 AVIOContext
*pb
, int len
)
148 static const enum AVCodecID sample_fmt
[2][3] = {
149 { AV_CODEC_ID_PCM_S16BE
, AV_CODEC_ID_PCM_S24BE
, AV_CODEC_ID_PCM_S32BE
},
150 { AV_CODEC_ID_PCM_S16LE
, AV_CODEC_ID_PCM_S24LE
, AV_CODEC_ID_PCM_S32LE
},
152 int sample_format
= avio_r8(pb
); // 0 = BE, 1 = LE
153 int sample_size
= (avio_r8(pb
) / 8 - 2); // 16, 24, 32
154 if (sample_format
> 1 || sample_size
> 2U || codec_config
->audio_roll_distance
)
155 return AVERROR_INVALIDDATA
;
157 codec_config
->codec_id
= sample_fmt
[sample_format
][sample_size
];
158 codec_config
->sample_rate
= avio_rb32(pb
);
160 if (len
- avio_tell(pb
))
161 return AVERROR_INVALIDDATA
;
166 static int codec_config_obu(void *s
, IAMFContext
*c
, AVIOContext
*pb
, int len
)
168 IAMFCodecConfig
**tmp
, *codec_config
= NULL
;
172 enum AVCodecID avcodec_id
;
173 unsigned codec_config_id
, nb_samples
, codec_id
;
174 int16_t audio_roll_distance
;
177 buf
= av_malloc(len
);
179 return AVERROR(ENOMEM
);
181 ret
= ffio_read_size(pb
, buf
, len
);
185 ffio_init_context(&b
, buf
, len
, 0, NULL
, NULL
, NULL
, NULL
);
188 codec_config_id
= ffio_read_leb(pbc
);
189 codec_id
= avio_rb32(pbc
);
190 nb_samples
= ffio_read_leb(pbc
);
191 audio_roll_distance
= avio_rb16(pbc
);
194 case MKBETAG('O','p','u','s'):
195 avcodec_id
= AV_CODEC_ID_OPUS
;
197 case MKBETAG('m','p','4','a'):
198 avcodec_id
= AV_CODEC_ID_AAC
;
200 case MKBETAG('f','L','a','C'):
201 avcodec_id
= AV_CODEC_ID_FLAC
;
204 avcodec_id
= AV_CODEC_ID_NONE
;
208 for (int i
= 0; i
< c
->nb_codec_configs
; i
++)
209 if (c
->codec_configs
[i
]->codec_config_id
== codec_config_id
) {
210 ret
= AVERROR_INVALIDDATA
;
214 tmp
= av_realloc_array(c
->codec_configs
, c
->nb_codec_configs
+ 1, sizeof(*c
->codec_configs
));
216 ret
= AVERROR(ENOMEM
);
219 c
->codec_configs
= tmp
;
221 codec_config
= av_mallocz(sizeof(*codec_config
));
223 ret
= AVERROR(ENOMEM
);
227 codec_config
->codec_config_id
= codec_config_id
;
228 codec_config
->codec_id
= avcodec_id
;
229 codec_config
->nb_samples
= nb_samples
;
230 codec_config
->audio_roll_distance
= audio_roll_distance
;
233 case MKBETAG('O','p','u','s'):
234 ret
= opus_decoder_config(codec_config
, pbc
, len
);
236 case MKBETAG('m','p','4','a'):
237 ret
= aac_decoder_config(codec_config
, pbc
, len
, s
);
239 case MKBETAG('f','L','a','C'):
240 ret
= flac_decoder_config(codec_config
, pbc
, len
);
242 case MKBETAG('i','p','c','m'):
243 ret
= ipcm_decoder_config(codec_config
, pbc
, len
);
251 if ((codec_config
->nb_samples
> INT_MAX
) || codec_config
->nb_samples
<= 0 ||
252 (-codec_config
->audio_roll_distance
> INT_MAX
/ codec_config
->nb_samples
)) {
253 ret
= AVERROR_INVALIDDATA
;
257 c
->codec_configs
[c
->nb_codec_configs
++] = codec_config
;
259 len
-= avio_tell(pbc
);
261 av_log(s
, AV_LOG_WARNING
, "Underread in codec_config_obu. %d bytes left at the end\n", len
);
268 av_free(codec_config
->extradata
);
269 av_free(codec_config
);
274 static int update_extradata(AVCodecParameters
*codecpar
)
280 switch(codecpar
->codec_id
) {
281 case AV_CODEC_ID_OPUS
:
282 AV_WB8(codecpar
->extradata
+ 9, codecpar
->ch_layout
.nb_channels
);
283 AV_WL16A(codecpar
->extradata
+ 10, AV_RB16A(codecpar
->extradata
+ 10)); // Byte swap pre-skip
284 AV_WL32A(codecpar
->extradata
+ 12, AV_RB32A(codecpar
->extradata
+ 12)); // Byte swap sample rate
285 AV_WL16A(codecpar
->extradata
+ 16, AV_RB16A(codecpar
->extradata
+ 16)); // Byte swap Output Gain
287 case AV_CODEC_ID_AAC
: {
289 int size
= FFMIN(codecpar
->extradata_size
, sizeof(buf
));
291 init_put_bits(&pb
, buf
, sizeof(buf
));
292 ret
= init_get_bits8(&gb
, codecpar
->extradata
, size
);
296 ret
= get_bits(&gb
, 5);
297 put_bits(&pb
, 5, ret
);
298 if (ret
== AOT_ESCAPE
) // violates section 3.11.2, but better check for it
299 put_bits(&pb
, 6, get_bits(&gb
, 6));
300 ret
= get_bits(&gb
, 4);
301 put_bits(&pb
, 4, ret
);
303 put_bits(&pb
, 24, get_bits(&gb
, 24));
306 put_bits(&pb
, 4, codecpar
->ch_layout
.nb_channels
); // set channel config
307 ret
= get_bits_left(&gb
);
309 return AVERROR_INVALIDDATA
;
310 ret
= FFMIN(ret
, put_bits_left(&pb
));
312 put_bits32(&pb
, get_bits_long(&gb
, 32));
315 put_bits(&pb
, ret
, get_bits_long(&gb
, ret
));
318 memcpy(codecpar
->extradata
, buf
, put_bytes_output(&pb
));
321 case AV_CODEC_ID_FLAC
: {
323 int size
= FFMIN(codecpar
->extradata_size
, sizeof(buf
));
325 init_put_bits(&pb
, buf
, sizeof(buf
));
326 ret
= init_get_bits8(&gb
, codecpar
->extradata
, size
);
330 put_bits32(&pb
, get_bits_long(&gb
, 32)); // min/max blocksize
331 put_bits63(&pb
, 48, get_bits64(&gb
, 48)); // min/max framesize
332 put_bits(&pb
, 20, get_bits(&gb
, 20)); // samplerate
334 put_bits(&pb
, 3, codecpar
->ch_layout
.nb_channels
- 1);
335 ret
= get_bits_left(&gb
);
337 return AVERROR_INVALIDDATA
;
338 ret
= FFMIN(ret
, put_bits_left(&pb
));
339 put_bits(&pb
, ret
, get_bits(&gb
, ret
));
342 memcpy(codecpar
->extradata
, buf
, put_bytes_output(&pb
));
350 static int scalable_channel_layout_config(void *s
, AVIOContext
*pb
,
351 IAMFAudioElement
*audio_element
,
352 const IAMFCodecConfig
*codec_config
)
354 int nb_layers
, k
= 0;
356 nb_layers
= avio_r8(pb
) >> 5; // get_bits(&gb, 3);
357 // skip_bits(&gb, 5); //reserved
359 if (nb_layers
> 6 || nb_layers
== 0)
360 return AVERROR_INVALIDDATA
;
362 audio_element
->layers
= av_calloc(nb_layers
, sizeof(*audio_element
->layers
));
363 if (!audio_element
->layers
)
364 return AVERROR(ENOMEM
);
366 audio_element
->nb_layers
= nb_layers
;
367 for (int i
= 0, n
= 0; i
< nb_layers
; i
++) {
368 AVChannelLayout ch_layout
= { 0 };
370 int loudspeaker_layout
, output_gain_is_present_flag
;
371 int substream_count
, coupled_substream_count
;
372 int expanded_loudspeaker_layout
= -1;
373 int ret
, byte
= avio_r8(pb
);
375 layer
= av_iamf_audio_element_add_layer(audio_element
->element
);
377 return AVERROR(ENOMEM
);
379 loudspeaker_layout
= byte
>> 4; // get_bits(&gb, 4);
380 output_gain_is_present_flag
= (byte
>> 3) & 1; //get_bits1(&gb);
382 layer
->flags
|= AV_IAMF_LAYER_FLAG_RECON_GAIN
;
383 substream_count
= avio_r8(pb
);
384 coupled_substream_count
= avio_r8(pb
);
386 if (substream_count
+ k
> audio_element
->nb_substreams
)
387 return AVERROR_INVALIDDATA
;
389 audio_element
->layers
[i
].substream_count
= substream_count
;
390 audio_element
->layers
[i
].coupled_substream_count
= coupled_substream_count
;
391 if (output_gain_is_present_flag
) {
392 layer
->output_gain_flags
= avio_r8(pb
) >> 2; // get_bits(&gb, 6);
393 layer
->output_gain
= av_make_q(sign_extend(avio_rb16(pb
), 16), 1 << 8);
396 if (!i
&& loudspeaker_layout
== 15)
397 expanded_loudspeaker_layout
= avio_r8(pb
);
398 if (expanded_loudspeaker_layout
> 0 && expanded_loudspeaker_layout
< 13) {
399 av_channel_layout_copy(&ch_layout
, &ff_iamf_expanded_scalable_ch_layouts
[expanded_loudspeaker_layout
]);
400 } else if (loudspeaker_layout
< 10) {
401 av_channel_layout_copy(&ch_layout
, &ff_iamf_scalable_ch_layouts
[loudspeaker_layout
]);
403 ch_layout
.u
.mask
&= ~av_channel_layout_subset(&audio_element
->element
->layers
[i
-1]->ch_layout
, UINT64_MAX
);
405 ch_layout
= (AVChannelLayout
){ .order
= AV_CHANNEL_ORDER_UNSPEC
,
406 .nb_channels
= substream_count
+
407 coupled_substream_count
};
409 if (i
&& ch_layout
.nb_channels
<= audio_element
->element
->layers
[i
-1]->ch_layout
.nb_channels
)
410 return AVERROR_INVALIDDATA
;
412 for (int j
= 0; j
< substream_count
; j
++) {
413 IAMFSubStream
*substream
= &audio_element
->substreams
[k
++];
415 substream
->codecpar
->ch_layout
= coupled_substream_count
-- > 0 ? (AVChannelLayout
)AV_CHANNEL_LAYOUT_STEREO
:
416 (AVChannelLayout
)AV_CHANNEL_LAYOUT_MONO
;
418 ret
= update_extradata(substream
->codecpar
);
423 if (ch_layout
.order
== AV_CHANNEL_ORDER_NATIVE
) {
424 ret
= av_channel_layout_custom_init(&layer
->ch_layout
, ch_layout
.nb_channels
);
428 for (int j
= 0; j
< n
; j
++)
429 layer
->ch_layout
.u
.map
[j
].id
= av_channel_layout_channel_from_index(&audio_element
->element
->layers
[i
-1]->ch_layout
, j
);
431 coupled_substream_count
= audio_element
->layers
[i
].coupled_substream_count
;
432 while (coupled_substream_count
--) {
433 if (ch_layout
.u
.mask
& AV_CH_LAYOUT_STEREO
) {
434 layer
->ch_layout
.u
.map
[n
++].id
= AV_CHAN_FRONT_LEFT
;
435 layer
->ch_layout
.u
.map
[n
++].id
= AV_CHAN_FRONT_RIGHT
;
436 ch_layout
.u
.mask
&= ~AV_CH_LAYOUT_STEREO
;
437 } else if (ch_layout
.u
.mask
& (AV_CH_FRONT_LEFT_OF_CENTER
|AV_CH_FRONT_RIGHT_OF_CENTER
)) {
438 layer
->ch_layout
.u
.map
[n
++].id
= AV_CHAN_FRONT_LEFT_OF_CENTER
;
439 layer
->ch_layout
.u
.map
[n
++].id
= AV_CHAN_FRONT_RIGHT_OF_CENTER
;
440 ch_layout
.u
.mask
&= ~(AV_CH_FRONT_LEFT_OF_CENTER
|AV_CH_FRONT_RIGHT_OF_CENTER
);
441 } else if (ch_layout
.u
.mask
& (AV_CH_SIDE_LEFT
|AV_CH_SIDE_RIGHT
)) {
442 layer
->ch_layout
.u
.map
[n
++].id
= AV_CHAN_SIDE_LEFT
;
443 layer
->ch_layout
.u
.map
[n
++].id
= AV_CHAN_SIDE_RIGHT
;
444 ch_layout
.u
.mask
&= ~(AV_CH_SIDE_LEFT
|AV_CH_SIDE_RIGHT
);
445 } else if (ch_layout
.u
.mask
& (AV_CH_BACK_LEFT
|AV_CH_BACK_RIGHT
)) {
446 layer
->ch_layout
.u
.map
[n
++].id
= AV_CHAN_BACK_LEFT
;
447 layer
->ch_layout
.u
.map
[n
++].id
= AV_CHAN_BACK_RIGHT
;
448 ch_layout
.u
.mask
&= ~(AV_CH_BACK_LEFT
|AV_CH_BACK_RIGHT
);
449 } else if (ch_layout
.u
.mask
& (AV_CH_TOP_FRONT_LEFT
|AV_CH_TOP_FRONT_RIGHT
)) {
450 layer
->ch_layout
.u
.map
[n
++].id
= AV_CHAN_TOP_FRONT_LEFT
;
451 layer
->ch_layout
.u
.map
[n
++].id
= AV_CHAN_TOP_FRONT_RIGHT
;
452 ch_layout
.u
.mask
&= ~(AV_CH_TOP_FRONT_LEFT
|AV_CH_TOP_FRONT_RIGHT
);
453 } else if (ch_layout
.u
.mask
& (AV_CH_TOP_SIDE_LEFT
|AV_CH_TOP_SIDE_RIGHT
)) {
454 layer
->ch_layout
.u
.map
[n
++].id
= AV_CHAN_TOP_SIDE_LEFT
;
455 layer
->ch_layout
.u
.map
[n
++].id
= AV_CHAN_TOP_SIDE_RIGHT
;
456 ch_layout
.u
.mask
&= ~(AV_CH_TOP_SIDE_LEFT
|AV_CH_TOP_SIDE_RIGHT
);
457 } else if (ch_layout
.u
.mask
& (AV_CH_TOP_BACK_LEFT
|AV_CH_TOP_BACK_RIGHT
)) {
458 layer
->ch_layout
.u
.map
[n
++].id
= AV_CHAN_TOP_BACK_LEFT
;
459 layer
->ch_layout
.u
.map
[n
++].id
= AV_CHAN_TOP_BACK_RIGHT
;
460 ch_layout
.u
.mask
&= ~(AV_CH_TOP_BACK_LEFT
|AV_CH_TOP_BACK_RIGHT
);
464 substream_count
-= audio_element
->layers
[i
].coupled_substream_count
;
465 while (substream_count
--) {
466 if (ch_layout
.u
.mask
& AV_CH_FRONT_CENTER
) {
467 layer
->ch_layout
.u
.map
[n
++].id
= AV_CHAN_FRONT_CENTER
;
468 ch_layout
.u
.mask
&= ~AV_CH_FRONT_CENTER
;
470 if (ch_layout
.u
.mask
& AV_CH_LOW_FREQUENCY
) {
471 layer
->ch_layout
.u
.map
[n
++].id
= AV_CHAN_LOW_FREQUENCY
;
472 ch_layout
.u
.mask
&= ~AV_CH_LOW_FREQUENCY
;
476 ret
= av_channel_layout_retype(&layer
->ch_layout
, AV_CHANNEL_ORDER_NATIVE
, 0);
477 if (ret
< 0 && ret
!= AVERROR(ENOSYS
))
479 } else // AV_CHANNEL_ORDER_UNSPEC
480 av_channel_layout_copy(&layer
->ch_layout
, &ch_layout
);
483 if (k
!= audio_element
->nb_substreams
)
484 return AVERROR_INVALIDDATA
;
489 static int ambisonics_config(void *s
, AVIOContext
*pb
,
490 IAMFAudioElement
*audio_element
,
491 const IAMFCodecConfig
*codec_config
)
494 unsigned ambisonics_mode
;
495 int output_channel_count
, substream_count
, order
;
498 ambisonics_mode
= ffio_read_leb(pb
);
499 if (ambisonics_mode
> 1)
500 return AVERROR_INVALIDDATA
;
502 output_channel_count
= avio_r8(pb
); // C
503 substream_count
= avio_r8(pb
); // N
504 if (audio_element
->nb_substreams
!= substream_count
|| output_channel_count
== 0)
505 return AVERROR_INVALIDDATA
;
507 order
= floor(sqrt(output_channel_count
- 1));
508 /* incomplete order - some harmonics are missing */
509 if ((order
+ 1) * (order
+ 1) != output_channel_count
)
510 return AVERROR_INVALIDDATA
;
512 audio_element
->layers
= av_mallocz(sizeof(*audio_element
->layers
));
513 if (!audio_element
->layers
)
514 return AVERROR(ENOMEM
);
516 audio_element
->nb_layers
= 1;
517 audio_element
->layers
->substream_count
= substream_count
;
519 layer
= av_iamf_audio_element_add_layer(audio_element
->element
);
521 return AVERROR(ENOMEM
);
523 layer
->ambisonics_mode
= ambisonics_mode
;
524 if (ambisonics_mode
== 0) {
525 for (int i
= 0; i
< substream_count
; i
++) {
526 IAMFSubStream
*substream
= &audio_element
->substreams
[i
];
528 substream
->codecpar
->ch_layout
= (AVChannelLayout
)AV_CHANNEL_LAYOUT_MONO
;
530 ret
= update_extradata(substream
->codecpar
);
535 ret
= av_channel_layout_custom_init(&layer
->ch_layout
, output_channel_count
);
539 for (int i
= 0; i
< output_channel_count
; i
++)
540 layer
->ch_layout
.u
.map
[i
].id
= avio_r8(pb
) + AV_CHAN_AMBISONIC_BASE
;
542 ret
= av_channel_layout_retype(&layer
->ch_layout
, AV_CHANNEL_ORDER_AMBISONIC
, 0);
543 if (ret
< 0 && ret
!= AVERROR(ENOSYS
))
546 int coupled_substream_count
= avio_r8(pb
); // M
547 int nb_demixing_matrix
= substream_count
+ coupled_substream_count
;
548 int demixing_matrix_size
= nb_demixing_matrix
* output_channel_count
;
550 audio_element
->layers
->coupled_substream_count
= coupled_substream_count
;
552 layer
->ch_layout
= (AVChannelLayout
){ .order
= AV_CHANNEL_ORDER_AMBISONIC
, .nb_channels
= output_channel_count
};
553 layer
->demixing_matrix
= av_malloc_array(demixing_matrix_size
, sizeof(*layer
->demixing_matrix
));
554 if (!layer
->demixing_matrix
)
555 return AVERROR(ENOMEM
);
557 for (int i
= 0; i
< demixing_matrix_size
; i
++)
558 layer
->demixing_matrix
[i
] = av_make_q(sign_extend(avio_rb16(pb
), 16), 1 << 8);
560 for (int i
= 0; i
< substream_count
; i
++) {
561 IAMFSubStream
*substream
= &audio_element
->substreams
[i
];
563 substream
->codecpar
->ch_layout
= coupled_substream_count
-- > 0 ? (AVChannelLayout
)AV_CHANNEL_LAYOUT_STEREO
:
564 (AVChannelLayout
)AV_CHANNEL_LAYOUT_MONO
;
567 ret
= update_extradata(substream
->codecpar
);
576 static int param_parse(void *s
, IAMFContext
*c
, AVIOContext
*pb
,
578 const IAMFAudioElement
*audio_element
,
579 AVIAMFParamDefinition
**out_param_definition
)
581 IAMFParamDefinition
*param_definition
= NULL
;
582 AVIAMFParamDefinition
*param
;
583 unsigned int parameter_id
, parameter_rate
, mode
;
584 unsigned int duration
= 0, constant_subblock_duration
= 0, nb_subblocks
= 0;
585 unsigned int total_duration
= 0;
588 parameter_id
= ffio_read_leb(pb
);
590 for (int i
= 0; i
< c
->nb_param_definitions
; i
++)
591 if (c
->param_definitions
[i
]->param
->parameter_id
== parameter_id
) {
592 param_definition
= c
->param_definitions
[i
];
596 parameter_rate
= ffio_read_leb(pb
);
597 mode
= avio_r8(pb
) >> 7;
600 duration
= ffio_read_leb(pb
);
602 return AVERROR_INVALIDDATA
;
603 constant_subblock_duration
= ffio_read_leb(pb
);
604 if (constant_subblock_duration
== 0)
605 nb_subblocks
= ffio_read_leb(pb
);
607 nb_subblocks
= duration
/ constant_subblock_duration
;
608 total_duration
= duration
;
612 param
= av_iamf_param_definition_alloc(type
, nb_subblocks
, ¶m_size
);
614 return AVERROR(ENOMEM
);
616 for (int i
= 0; i
< nb_subblocks
; i
++) {
617 void *subblock
= av_iamf_param_definition_get_subblock(param
, i
);
618 unsigned int subblock_duration
= constant_subblock_duration
;
620 if (constant_subblock_duration
== 0) {
621 subblock_duration
= ffio_read_leb(pb
);
622 total_duration
+= subblock_duration
;
623 } else if (i
== nb_subblocks
- 1)
624 subblock_duration
= duration
- i
* constant_subblock_duration
;
627 case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN
: {
628 AVIAMFMixGain
*mix
= subblock
;
629 mix
->subblock_duration
= subblock_duration
;
632 case AV_IAMF_PARAMETER_DEFINITION_DEMIXING
: {
633 AVIAMFDemixingInfo
*demix
= subblock
;
634 demix
->subblock_duration
= subblock_duration
;
635 // DefaultDemixingInfoParameterData
636 av_assert0(audio_element
);
637 demix
->dmixp_mode
= avio_r8(pb
) >> 5;
638 audio_element
->element
->default_w
= avio_r8(pb
) >> 4;
641 case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN
: {
642 AVIAMFReconGain
*recon
= subblock
;
643 recon
->subblock_duration
= subblock_duration
;
648 return AVERROR_INVALIDDATA
;
652 if (!mode
&& !constant_subblock_duration
&& total_duration
!= duration
) {
653 av_log(s
, AV_LOG_ERROR
, "Invalid subblock durations in parameter_id %u\n", parameter_id
);
655 return AVERROR_INVALIDDATA
;
658 param
->parameter_id
= parameter_id
;
659 param
->parameter_rate
= parameter_rate
;
660 param
->duration
= duration
;
661 param
->constant_subblock_duration
= constant_subblock_duration
;
662 param
->nb_subblocks
= nb_subblocks
;
664 if (param_definition
) {
665 if (param_definition
->param_size
!= param_size
|| memcmp(param_definition
->param
, param
, param_size
)) {
666 av_log(s
, AV_LOG_ERROR
, "Inconsistent parameters for parameter_id %u\n", parameter_id
);
668 return AVERROR_INVALIDDATA
;
671 IAMFParamDefinition
**tmp
= av_realloc_array(c
->param_definitions
, c
->nb_param_definitions
+ 1,
672 sizeof(*c
->param_definitions
));
675 return AVERROR(ENOMEM
);
677 c
->param_definitions
= tmp
;
679 param_definition
= av_mallocz(sizeof(*param_definition
));
680 if (!param_definition
) {
682 return AVERROR(ENOMEM
);
684 param_definition
->param
= param
;
685 param_definition
->mode
= !mode
;
686 param_definition
->param_size
= param_size
;
687 param_definition
->audio_element
= audio_element
;
689 c
->param_definitions
[c
->nb_param_definitions
++] = param_definition
;
692 av_assert0(out_param_definition
);
693 *out_param_definition
= param
;
698 static int audio_element_obu(void *s
, IAMFContext
*c
, AVIOContext
*pb
, int len
)
700 const IAMFCodecConfig
*codec_config
;
701 AVIAMFAudioElement
*element
;
702 IAMFAudioElement
**tmp
, *audio_element
= NULL
;
706 unsigned audio_element_id
, nb_substreams
, codec_config_id
, num_parameters
;
707 int audio_element_type
, ret
;
709 buf
= av_malloc(len
);
711 return AVERROR(ENOMEM
);
713 ret
= ffio_read_size(pb
, buf
, len
);
717 ffio_init_context(&b
, buf
, len
, 0, NULL
, NULL
, NULL
, NULL
);
720 audio_element_id
= ffio_read_leb(pbc
);
722 for (int i
= 0; i
< c
->nb_audio_elements
; i
++)
723 if (c
->audio_elements
[i
]->audio_element_id
== audio_element_id
) {
724 av_log(s
, AV_LOG_ERROR
, "Duplicate audio_element_id %d\n", audio_element_id
);
725 ret
= AVERROR_INVALIDDATA
;
729 audio_element_type
= avio_r8(pbc
) >> 5;
730 if (audio_element_type
> AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE
) {
731 av_log(s
, AV_LOG_DEBUG
, "Unknown audio_element_type referenced in an audio element. Ignoring\n");
736 codec_config_id
= ffio_read_leb(pbc
);
738 codec_config
= ff_iamf_get_codec_config(c
, codec_config_id
);
740 av_log(s
, AV_LOG_ERROR
, "Non existent codec config id %d referenced in an audio element\n", codec_config_id
);
741 ret
= AVERROR_INVALIDDATA
;
745 if (codec_config
->codec_id
== AV_CODEC_ID_NONE
) {
746 av_log(s
, AV_LOG_DEBUG
, "Unknown codec id referenced in an audio element. Ignoring\n");
751 tmp
= av_realloc_array(c
->audio_elements
, c
->nb_audio_elements
+ 1, sizeof(*c
->audio_elements
));
753 ret
= AVERROR(ENOMEM
);
756 c
->audio_elements
= tmp
;
758 audio_element
= av_mallocz(sizeof(*audio_element
));
759 if (!audio_element
) {
760 ret
= AVERROR(ENOMEM
);
764 nb_substreams
= ffio_read_leb(pbc
);
765 audio_element
->codec_config_id
= codec_config_id
;
766 audio_element
->audio_element_id
= audio_element_id
;
767 audio_element
->substreams
= av_calloc(nb_substreams
, sizeof(*audio_element
->substreams
));
768 if (!audio_element
->substreams
) {
769 ret
= AVERROR(ENOMEM
);
772 audio_element
->nb_substreams
= nb_substreams
;
774 element
= audio_element
->element
= av_iamf_audio_element_alloc();
776 ret
= AVERROR(ENOMEM
);
779 audio_element
->celement
= element
;
781 element
->audio_element_type
= audio_element_type
;
783 for (int i
= 0; i
< audio_element
->nb_substreams
; i
++) {
784 IAMFSubStream
*substream
= &audio_element
->substreams
[i
];
786 substream
->codecpar
= avcodec_parameters_alloc();
787 if (!substream
->codecpar
) {
788 ret
= AVERROR(ENOMEM
);
792 substream
->audio_substream_id
= ffio_read_leb(pbc
);
794 substream
->codecpar
->codec_type
= AVMEDIA_TYPE_AUDIO
;
795 substream
->codecpar
->codec_id
= codec_config
->codec_id
;
796 substream
->codecpar
->frame_size
= codec_config
->nb_samples
;
797 substream
->codecpar
->sample_rate
= codec_config
->sample_rate
;
798 substream
->codecpar
->seek_preroll
= -codec_config
->audio_roll_distance
* codec_config
->nb_samples
;
800 switch(substream
->codecpar
->codec_id
) {
801 case AV_CODEC_ID_AAC
:
802 case AV_CODEC_ID_FLAC
:
803 case AV_CODEC_ID_OPUS
:
804 substream
->codecpar
->extradata
= av_malloc(codec_config
->extradata_size
+ AV_INPUT_BUFFER_PADDING_SIZE
);
805 if (!substream
->codecpar
->extradata
) {
806 ret
= AVERROR(ENOMEM
);
809 memcpy(substream
->codecpar
->extradata
, codec_config
->extradata
, codec_config
->extradata_size
);
810 memset(substream
->codecpar
->extradata
+ codec_config
->extradata_size
, 0, AV_INPUT_BUFFER_PADDING_SIZE
);
811 substream
->codecpar
->extradata_size
= codec_config
->extradata_size
;
816 num_parameters
= ffio_read_leb(pbc
);
817 if (num_parameters
> 2 && audio_element_type
== 0) {
818 av_log(s
, AV_LOG_ERROR
, "Audio Element parameter count %u is invalid"
819 " for Channel representations\n", num_parameters
);
820 ret
= AVERROR_INVALIDDATA
;
823 if (num_parameters
&& audio_element_type
!= 0) {
824 av_log(s
, AV_LOG_ERROR
, "Audio Element parameter count %u is invalid"
825 " for Scene representations\n", num_parameters
);
826 ret
= AVERROR_INVALIDDATA
;
830 for (int i
= 0; i
< num_parameters
; i
++) {
833 type
= ffio_read_leb(pbc
);
834 if (type
== AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN
)
835 ret
= AVERROR_INVALIDDATA
;
836 else if (type
== AV_IAMF_PARAMETER_DEFINITION_DEMIXING
) {
837 if (element
->demixing_info
) {
838 ret
= AVERROR_INVALIDDATA
;
841 ret
= param_parse(s
, c
, pbc
, type
, audio_element
, &element
->demixing_info
);
842 } else if (type
== AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN
) {
843 if (element
->recon_gain_info
) {
844 ret
= AVERROR_INVALIDDATA
;
847 ret
= param_parse(s
, c
, pbc
, type
, audio_element
, &element
->recon_gain_info
);
849 unsigned param_definition_size
= ffio_read_leb(pbc
);
850 avio_skip(pbc
, param_definition_size
);
856 if (audio_element_type
== AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL
) {
857 ret
= scalable_channel_layout_config(s
, pbc
, audio_element
, codec_config
);
860 } else if (audio_element_type
== AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE
) {
861 ret
= ambisonics_config(s
, pbc
, audio_element
, codec_config
);
868 c
->audio_elements
[c
->nb_audio_elements
++] = audio_element
;
870 len
-= avio_tell(pbc
);
872 av_log(s
, AV_LOG_WARNING
, "Underread in audio_element_obu. %d bytes left at the end\n", len
);
878 ff_iamf_free_audio_element(&audio_element
);
882 static int label_string(AVIOContext
*pb
, char **label
)
886 avio_get_str(pb
, sizeof(buf
), buf
, sizeof(buf
));
891 return AVERROR_INVALIDDATA
;
892 *label
= av_strdup(buf
);
894 return AVERROR(ENOMEM
);
899 static int mix_presentation_obu(void *s
, IAMFContext
*c
, AVIOContext
*pb
, int len
)
901 AVIAMFMixPresentation
*mix
;
902 IAMFMixPresentation
**tmp
, *mix_presentation
= NULL
;
906 unsigned nb_submixes
, mix_presentation_id
;
909 buf
= av_malloc(len
);
911 return AVERROR(ENOMEM
);
913 ret
= ffio_read_size(pb
, buf
, len
);
917 ffio_init_context(&b
, buf
, len
, 0, NULL
, NULL
, NULL
, NULL
);
920 mix_presentation_id
= ffio_read_leb(pbc
);
922 for (int i
= 0; i
< c
->nb_mix_presentations
; i
++)
923 if (c
->mix_presentations
[i
]->mix_presentation_id
== mix_presentation_id
) {
924 av_log(s
, AV_LOG_ERROR
, "Duplicate mix_presentation_id %d\n", mix_presentation_id
);
925 ret
= AVERROR_INVALIDDATA
;
929 tmp
= av_realloc_array(c
->mix_presentations
, c
->nb_mix_presentations
+ 1, sizeof(*c
->mix_presentations
));
931 ret
= AVERROR(ENOMEM
);
934 c
->mix_presentations
= tmp
;
936 mix_presentation
= av_mallocz(sizeof(*mix_presentation
));
937 if (!mix_presentation
) {
938 ret
= AVERROR(ENOMEM
);
942 mix_presentation
->mix_presentation_id
= mix_presentation_id
;
943 mix
= mix_presentation
->mix
= av_iamf_mix_presentation_alloc();
945 ret
= AVERROR(ENOMEM
);
948 mix_presentation
->cmix
= mix
;
950 mix_presentation
->count_label
= ffio_read_leb(pbc
);
951 mix_presentation
->language_label
= av_calloc(mix_presentation
->count_label
,
952 sizeof(*mix_presentation
->language_label
));
953 if (!mix_presentation
->language_label
) {
954 mix_presentation
->count_label
= 0;
955 ret
= AVERROR(ENOMEM
);
959 for (int i
= 0; i
< mix_presentation
->count_label
; i
++) {
960 ret
= label_string(pbc
, &mix_presentation
->language_label
[i
]);
965 for (int i
= 0; i
< mix_presentation
->count_label
; i
++) {
966 char *annotation
= NULL
;
967 ret
= label_string(pbc
, &annotation
);
970 ret
= av_dict_set(&mix
->annotations
, mix_presentation
->language_label
[i
], annotation
,
971 AV_DICT_DONT_STRDUP_VAL
| AV_DICT_DONT_OVERWRITE
);
976 nb_submixes
= ffio_read_leb(pbc
);
977 for (int i
= 0; i
< nb_submixes
; i
++) {
978 AVIAMFSubmix
*sub_mix
;
979 unsigned nb_elements
, nb_layouts
;
981 sub_mix
= av_iamf_mix_presentation_add_submix(mix
);
983 ret
= AVERROR(ENOMEM
);
987 nb_elements
= ffio_read_leb(pbc
);
988 for (int j
= 0; j
< nb_elements
; j
++) {
989 AVIAMFSubmixElement
*submix_element
;
990 IAMFAudioElement
*audio_element
= NULL
;
991 unsigned int rendering_config_extension_size
;
993 submix_element
= av_iamf_submix_add_element(sub_mix
);
994 if (!submix_element
) {
995 ret
= AVERROR(ENOMEM
);
999 submix_element
->audio_element_id
= ffio_read_leb(pbc
);
1001 for (int k
= 0; k
< c
->nb_audio_elements
; k
++)
1002 if (c
->audio_elements
[k
]->audio_element_id
== submix_element
->audio_element_id
) {
1003 audio_element
= c
->audio_elements
[k
];
1007 if (!audio_element
) {
1008 av_log(s
, AV_LOG_ERROR
, "Invalid Audio Element with id %u referenced by Mix Parameters %u\n",
1009 submix_element
->audio_element_id
, mix_presentation_id
);
1010 ret
= AVERROR_INVALIDDATA
;
1014 for (int k
= 0; k
< mix_presentation
->count_label
; k
++) {
1015 char *annotation
= NULL
;
1016 ret
= label_string(pbc
, &annotation
);
1019 ret
= av_dict_set(&submix_element
->annotations
, mix_presentation
->language_label
[k
], annotation
,
1020 AV_DICT_DONT_STRDUP_VAL
| AV_DICT_DONT_OVERWRITE
);
1025 submix_element
->headphones_rendering_mode
= avio_r8(pbc
) >> 6;
1027 rendering_config_extension_size
= ffio_read_leb(pbc
);
1028 avio_skip(pbc
, rendering_config_extension_size
);
1030 ret
= param_parse(s
, c
, pbc
, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN
,
1032 &submix_element
->element_mix_config
);
1035 submix_element
->default_mix_gain
= av_make_q(sign_extend(avio_rb16(pbc
), 16), 1 << 8);
1038 ret
= param_parse(s
, c
, pbc
, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN
, NULL
, &sub_mix
->output_mix_config
);
1041 sub_mix
->default_mix_gain
= av_make_q(sign_extend(avio_rb16(pbc
), 16), 1 << 8);
1043 nb_layouts
= ffio_read_leb(pbc
);
1044 for (int j
= 0; j
< nb_layouts
; j
++) {
1045 AVIAMFSubmixLayout
*submix_layout
;
1047 int byte
= avio_r8(pbc
);
1049 submix_layout
= av_iamf_submix_add_layout(sub_mix
);
1050 if (!submix_layout
) {
1051 ret
= AVERROR(ENOMEM
);
1055 submix_layout
->layout_type
= byte
>> 6;
1056 if (submix_layout
->layout_type
< AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS
||
1057 submix_layout
->layout_type
> AV_IAMF_SUBMIX_LAYOUT_TYPE_BINAURAL
) {
1058 av_log(s
, AV_LOG_ERROR
, "Invalid Layout type %u in a submix from Mix Presentation %u\n",
1059 submix_layout
->layout_type
, mix_presentation_id
);
1060 ret
= AVERROR_INVALIDDATA
;
1063 if (submix_layout
->layout_type
== 2) {
1065 sound_system
= (byte
>> 2) & 0xF;
1066 if (sound_system
>= FF_ARRAY_ELEMS(ff_iamf_sound_system_map
)) {
1067 ret
= AVERROR_INVALIDDATA
;
1070 av_channel_layout_copy(&submix_layout
->sound_system
, &ff_iamf_sound_system_map
[sound_system
].layout
);
1072 submix_layout
->sound_system
= (AVChannelLayout
)AV_CHANNEL_LAYOUT_BINAURAL
;
1074 info_type
= avio_r8(pbc
);
1075 submix_layout
->integrated_loudness
= av_make_q(sign_extend(avio_rb16(pbc
), 16), 1 << 8);
1076 submix_layout
->digital_peak
= av_make_q(sign_extend(avio_rb16(pbc
), 16), 1 << 8);
1079 submix_layout
->true_peak
= av_make_q(sign_extend(avio_rb16(pbc
), 16), 1 << 8);
1080 if (info_type
& 2) {
1081 unsigned int num_anchored_loudness
= avio_r8(pbc
);
1083 for (int k
= 0; k
< num_anchored_loudness
; k
++) {
1084 unsigned int anchor_element
= avio_r8(pbc
);
1085 AVRational anchored_loudness
= av_make_q(sign_extend(avio_rb16(pbc
), 16), 1 << 8);
1086 if (anchor_element
== IAMF_ANCHOR_ELEMENT_DIALOGUE
)
1087 submix_layout
->dialogue_anchored_loudness
= anchored_loudness
;
1088 else if (anchor_element
<= IAMF_ANCHOR_ELEMENT_ALBUM
)
1089 submix_layout
->album_anchored_loudness
= anchored_loudness
;
1091 av_log(s
, AV_LOG_DEBUG
, "Unknown anchor_element. Ignoring\n");
1095 if (info_type
& 0xFC) {
1096 unsigned int info_type_size
= ffio_read_leb(pbc
);
1097 avio_skip(pbc
, info_type_size
);
1102 c
->mix_presentations
[c
->nb_mix_presentations
++] = mix_presentation
;
1104 len
-= avio_tell(pbc
);
1106 av_log(s
, AV_LOG_WARNING
, "Underread in mix_presentation_obu. %d bytes left at the end\n", len
);
1112 ff_iamf_free_mix_presentation(&mix_presentation
);
1116 int ff_iamf_parse_obu_header(const uint8_t *buf
, int buf_size
,
1117 unsigned *obu_size
, int *start_pos
, enum IAMF_OBU_Type
*type
,
1118 unsigned *skip_samples
, unsigned *discard_padding
)
1121 int ret
, extension_flag
, trimming
, start
;
1122 unsigned skip
= 0, discard
= 0;
1125 ret
= init_get_bits8(&gb
, buf
, FFMIN(buf_size
, MAX_IAMF_OBU_HEADER_SIZE
));
1129 *type
= get_bits(&gb
, 5);
1130 /*redundant =*/ get_bits1(&gb
);
1131 trimming
= get_bits1(&gb
);
1132 extension_flag
= get_bits1(&gb
);
1134 *obu_size
= get_leb(&gb
);
1135 if (*obu_size
> INT_MAX
)
1136 return AVERROR_INVALIDDATA
;
1138 start
= get_bits_count(&gb
) / 8;
1141 discard
= get_leb(&gb
); // num_samples_to_trim_at_end
1142 skip
= get_leb(&gb
); // num_samples_to_trim_at_start
1146 *skip_samples
= skip
;
1147 if (discard_padding
)
1148 *discard_padding
= discard
;
1150 if (extension_flag
) {
1151 unsigned int extension_bytes
;
1152 extension_bytes
= get_leb(&gb
);
1153 if (extension_bytes
> INT_MAX
/ 8)
1154 return AVERROR_INVALIDDATA
;
1155 skip_bits_long(&gb
, extension_bytes
* 8);
1158 if (get_bits_left(&gb
) < 0)
1159 return AVERROR_INVALIDDATA
;
1161 size
= *obu_size
+ start
;
1163 return AVERROR_INVALIDDATA
;
1165 *obu_size
-= get_bits_count(&gb
) / 8 - start
;
1166 *start_pos
= size
- *obu_size
;
1171 int ff_iamfdec_read_descriptors(IAMFContext
*c
, AVIOContext
*pb
,
1172 int max_size
, void *log_ctx
)
1174 uint8_t header
[MAX_IAMF_OBU_HEADER_SIZE
+ AV_INPUT_BUFFER_PADDING_SIZE
];
1179 enum IAMF_OBU_Type type
;
1180 int start_pos
, len
, size
;
1182 if ((ret
= ffio_ensure_seekback(pb
, FFMIN(MAX_IAMF_OBU_HEADER_SIZE
, max_size
))) < 0)
1184 size
= avio_read(pb
, header
, FFMIN(MAX_IAMF_OBU_HEADER_SIZE
, max_size
));
1187 memset(header
+ size
, 0, AV_INPUT_BUFFER_PADDING_SIZE
);
1189 len
= ff_iamf_parse_obu_header(header
, size
, &obu_size
, &start_pos
, &type
, NULL
, NULL
);
1190 if (len
< 0 || obu_size
> max_size
) {
1191 av_log(log_ctx
, AV_LOG_ERROR
, "Failed to read obu header\n");
1192 avio_seek(pb
, -size
, SEEK_CUR
);
1196 if (type
>= IAMF_OBU_IA_PARAMETER_BLOCK
&& type
< IAMF_OBU_IA_SEQUENCE_HEADER
) {
1197 avio_seek(pb
, -size
, SEEK_CUR
);
1201 avio_seek(pb
, -(size
- start_pos
), SEEK_CUR
);
1203 case IAMF_OBU_IA_CODEC_CONFIG
:
1204 ret
= codec_config_obu(log_ctx
, c
, pb
, obu_size
);
1206 case IAMF_OBU_IA_AUDIO_ELEMENT
:
1207 ret
= audio_element_obu(log_ctx
, c
, pb
, obu_size
);
1209 case IAMF_OBU_IA_MIX_PRESENTATION
:
1210 ret
= mix_presentation_obu(log_ctx
, c
, pb
, obu_size
);
1213 int64_t offset
= avio_skip(pb
, obu_size
);
1220 av_log(log_ctx
, AV_LOG_ERROR
, "Failed to read obu type %d\n", type
);
1223 max_size
-= obu_size
+ start_pos
;
1225 return AVERROR_INVALIDDATA
;