libavformat/iamf_parse.c

   1 /*
   2  * Immersive Audio Model and Formats parsing
   3  * Copyright (c) 2023 James Almer <jamrial@gmail.com>
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 #include "libavutil/avassert.h"
  23 #include "libavutil/iamf.h"
  24 #include "libavutil/intreadwrite.h"
  25 #include "libavutil/log.h"
  26 #include "libavutil/mem.h"
  27 #include "libavcodec/get_bits.h"
  28 #include "libavcodec/flac.h"
  29 #include "libavcodec/leb.h"
  30 #include "libavcodec/mpeg4audio.h"
  31 #include "libavcodec/put_bits.h"
  32 #include "avio_internal.h"
  33 #include "iamf_parse.h"
  34 #include "isom.h"
  35
  36 static int opus_decoder_config(IAMFCodecConfig *codec_config,
  37                                AVIOContext *pb, int len)
  38 {
  39     int ret, left = len - avio_tell(pb);
  40
  41     if (left < 11 || codec_config->audio_roll_distance >= 0)
  42         return AVERROR_INVALIDDATA;
  43
  44     codec_config->extradata = av_malloc(left + 8);
  45     if (!codec_config->extradata)
  46         return AVERROR(ENOMEM);
  47
  48     AV_WB32A(codec_config->extradata,     MKBETAG('O','p','u','s'));
  49     AV_WB32A(codec_config->extradata + 4, MKBETAG('H','e','a','d'));
  50     ret = ffio_read_size(pb, codec_config->extradata + 8, left);
  51     if (ret < 0)
  52         return ret;
  53
  54     codec_config->extradata_size = left + 8;
  55     codec_config->sample_rate = 48000;
  56
  57     return 0;
  58 }
  59
  60 static int aac_decoder_config(IAMFCodecConfig *codec_config,
  61                               AVIOContext *pb, int len, void *logctx)
  62 {
  63     MPEG4AudioConfig cfg = { 0 };
  64     int object_type_id, codec_id, stream_type;
  65     int ret, tag, left;
  66
  67     if (codec_config->audio_roll_distance >= 0)
  68         return AVERROR_INVALIDDATA;
  69
  70     ff_mp4_read_descr(logctx, pb, &tag);
  71     if (tag != MP4DecConfigDescrTag)
  72         return AVERROR_INVALIDDATA;
  73
  74     object_type_id = avio_r8(pb);
  75     if (object_type_id != 0x40)
  76         return AVERROR_INVALIDDATA;
  77
  78     stream_type = avio_r8(pb);
  79     if (((stream_type >> 2) != 5) || ((stream_type >> 1) & 1))
  80         return AVERROR_INVALIDDATA;
  81
  82     avio_skip(pb, 3); // buffer size db
  83     avio_skip(pb, 4); // rc_max_rate
  84     avio_skip(pb, 4); // avg bitrate
  85
  86     codec_id = ff_codec_get_id(ff_mp4_obj_type, object_type_id);
  87     if (codec_id && codec_id != codec_config->codec_id)
  88         return AVERROR_INVALIDDATA;
  89
  90     left = ff_mp4_read_descr(logctx, pb, &tag);
  91     if (tag != MP4DecSpecificDescrTag ||
  92         !left || left > (len - avio_tell(pb)))
  93         return AVERROR_INVALIDDATA;
  94
  95     // We pad extradata here because avpriv_mpeg4audio_get_config2() needs it.
  96     codec_config->extradata = av_malloc((size_t)left + AV_INPUT_BUFFER_PADDING_SIZE);
  97     if (!codec_config->extradata)
  98         return AVERROR(ENOMEM);
  99
 100     ret = ffio_read_size(pb, codec_config->extradata, left);
 101     if (ret < 0)
 102         return ret;
 103     codec_config->extradata_size = left;
 104     memset(codec_config->extradata + codec_config->extradata_size, 0,
 105            AV_INPUT_BUFFER_PADDING_SIZE);
 106
 107     ret = avpriv_mpeg4audio_get_config2(&cfg, codec_config->extradata,
 108                                         codec_config->extradata_size, 1, logctx);
 109     if (ret < 0)
 110         return ret;
 111
 112     codec_config->sample_rate = cfg.sample_rate;
 113
 114     return 0;
 115 }
 116
 117 static int flac_decoder_config(IAMFCodecConfig *codec_config,
 118                                AVIOContext *pb, int len)
 119 {
 120     int ret, left;
 121
 122     if (codec_config->audio_roll_distance)
 123         return AVERROR_INVALIDDATA;
 124
 125     avio_skip(pb, 4); // METADATA_BLOCK_HEADER
 126
 127     left = len - avio_tell(pb);
 128     if (left < FLAC_STREAMINFO_SIZE)
 129         return AVERROR_INVALIDDATA;
 130
 131     codec_config->extradata = av_malloc(left);
 132     if (!codec_config->extradata)
 133         return AVERROR(ENOMEM);
 134
 135     ret = ffio_read_size(pb, codec_config->extradata, left);
 136     if (ret < 0)
 137         return ret;
 138
 139     codec_config->extradata_size = left;
 140     codec_config->sample_rate = AV_RB24(codec_config->extradata + 10) >> 4;
 141
 142     return 0;
 143 }
 144
 145 static int ipcm_decoder_config(IAMFCodecConfig *codec_config,
 146                                AVIOContext *pb, int len)
 147 {
 148     static const enum AVCodecID sample_fmt[2][3] = {
 149         { AV_CODEC_ID_PCM_S16BE, AV_CODEC_ID_PCM_S24BE, AV_CODEC_ID_PCM_S32BE },
 150         { AV_CODEC_ID_PCM_S16LE, AV_CODEC_ID_PCM_S24LE, AV_CODEC_ID_PCM_S32LE },
 151     };
 152     int sample_format = avio_r8(pb); // 0 = BE, 1 = LE
 153     int sample_size = (avio_r8(pb) / 8 - 2); // 16, 24, 32
 154     if (sample_format > 1 || sample_size > 2U || codec_config->audio_roll_distance)
 155         return AVERROR_INVALIDDATA;
 156
 157     codec_config->codec_id = sample_fmt[sample_format][sample_size];
 158     codec_config->sample_rate = avio_rb32(pb);
 159
 160     if (len - avio_tell(pb))
 161         return AVERROR_INVALIDDATA;
 162
 163     return 0;
 164 }
 165
 166 static int codec_config_obu(void *s, IAMFContext *c, AVIOContext *pb, int len)
 167 {
 168     IAMFCodecConfig **tmp, *codec_config = NULL;
 169     FFIOContext b;
 170     AVIOContext *pbc;
 171     uint8_t *buf;
 172     enum AVCodecID avcodec_id;
 173     unsigned codec_config_id, nb_samples, codec_id;
 174     int16_t audio_roll_distance;
 175     int ret;
 176
 177     buf = av_malloc(len);
 178     if (!buf)
 179         return AVERROR(ENOMEM);
 180
 181     ret = ffio_read_size(pb, buf, len);
 182     if (ret < 0)
 183         goto fail;
 184
 185     ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
 186     pbc = &b.pub;
 187
 188     codec_config_id = ffio_read_leb(pbc);
 189     codec_id = avio_rb32(pbc);
 190     nb_samples = ffio_read_leb(pbc);
 191     audio_roll_distance = avio_rb16(pbc);
 192
 193     switch(codec_id) {
 194     case MKBETAG('O','p','u','s'):
 195         avcodec_id = AV_CODEC_ID_OPUS;
 196         break;
 197     case MKBETAG('m','p','4','a'):
 198         avcodec_id = AV_CODEC_ID_AAC;
 199         break;
 200     case MKBETAG('f','L','a','C'):
 201         avcodec_id = AV_CODEC_ID_FLAC;
 202         break;
 203     default:
 204         avcodec_id = AV_CODEC_ID_NONE;
 205         break;
 206     }
 207
 208     for (int i = 0; i < c->nb_codec_configs; i++)
 209         if (c->codec_configs[i]->codec_config_id == codec_config_id) {
 210             ret = AVERROR_INVALIDDATA;
 211             goto fail;
 212         }
 213
 214     tmp = av_realloc_array(c->codec_configs, c->nb_codec_configs + 1, sizeof(*c->codec_configs));
 215     if (!tmp) {
 216         ret = AVERROR(ENOMEM);
 217         goto fail;
 218     }
 219     c->codec_configs = tmp;
 220
 221     codec_config = av_mallocz(sizeof(*codec_config));
 222     if (!codec_config) {
 223         ret = AVERROR(ENOMEM);
 224         goto fail;
 225     }
 226
 227     codec_config->codec_config_id = codec_config_id;
 228     codec_config->codec_id = avcodec_id;
 229     codec_config->nb_samples = nb_samples;
 230     codec_config->audio_roll_distance = audio_roll_distance;
 231
 232     switch(codec_id) {
 233     case MKBETAG('O','p','u','s'):
 234         ret = opus_decoder_config(codec_config, pbc, len);
 235         break;
 236     case MKBETAG('m','p','4','a'):
 237         ret = aac_decoder_config(codec_config, pbc, len, s);
 238         break;
 239     case MKBETAG('f','L','a','C'):
 240         ret = flac_decoder_config(codec_config, pbc, len);
 241         break;
 242     case MKBETAG('i','p','c','m'):
 243         ret = ipcm_decoder_config(codec_config, pbc, len);
 244         break;
 245     default:
 246         break;
 247     }
 248     if (ret < 0)
 249         goto fail;
 250
 251     if ((codec_config->nb_samples > INT_MAX) || codec_config->nb_samples <= 0 ||
 252         (-codec_config->audio_roll_distance > INT_MAX / codec_config->nb_samples)) {
 253         ret = AVERROR_INVALIDDATA;
 254         goto fail;
 255     }
 256
 257     c->codec_configs[c->nb_codec_configs++] = codec_config;
 258
 259     len -= avio_tell(pbc);
 260     if (len)
 261        av_log(s, AV_LOG_WARNING, "Underread in codec_config_obu. %d bytes left at the end\n", len);
 262
 263     ret = 0;
 264 fail:
 265     av_free(buf);
 266     if (ret < 0) {
 267         if (codec_config)
 268             av_free(codec_config->extradata);
 269         av_free(codec_config);
 270     }
 271     return ret;
 272 }
 273
 274 static int update_extradata(AVCodecParameters *codecpar)
 275 {
 276     GetBitContext gb;
 277     PutBitContext pb;
 278     int ret;
 279
 280     switch(codecpar->codec_id) {
 281     case AV_CODEC_ID_OPUS:
 282         AV_WB8(codecpar->extradata   + 9,  codecpar->ch_layout.nb_channels);
 283         AV_WL16A(codecpar->extradata + 10, AV_RB16A(codecpar->extradata + 10)); // Byte swap pre-skip
 284         AV_WL32A(codecpar->extradata + 12, AV_RB32A(codecpar->extradata + 12)); // Byte swap sample rate
 285         AV_WL16A(codecpar->extradata + 16, AV_RB16A(codecpar->extradata + 16)); // Byte swap Output Gain
 286         break;
 287     case AV_CODEC_ID_AAC: {
 288         uint8_t buf[6];
 289         int size = FFMIN(codecpar->extradata_size, sizeof(buf));
 290
 291         init_put_bits(&pb, buf, sizeof(buf));
 292         ret = init_get_bits8(&gb, codecpar->extradata, size);
 293         if (ret < 0)
 294             return ret;
 295
 296         ret = get_bits(&gb, 5);
 297         put_bits(&pb, 5, ret);
 298         if (ret == AOT_ESCAPE) // violates section 3.11.2, but better check for it
 299             put_bits(&pb, 6, get_bits(&gb, 6));
 300         ret = get_bits(&gb, 4);
 301         put_bits(&pb, 4, ret);
 302         if (ret == 0x0f)
 303             put_bits(&pb, 24, get_bits(&gb, 24));
 304
 305         skip_bits(&gb, 4);
 306         put_bits(&pb, 4, codecpar->ch_layout.nb_channels); // set channel config
 307         ret = get_bits_left(&gb);
 308         if (ret < 0)
 309             return AVERROR_INVALIDDATA;
 310         ret = FFMIN(ret, put_bits_left(&pb));
 311         while (ret >= 32) {
 312            put_bits32(&pb, get_bits_long(&gb, 32));
 313            ret -= 32;
 314         }
 315         put_bits(&pb, ret, get_bits_long(&gb, ret));
 316         flush_put_bits(&pb);
 317
 318         memcpy(codecpar->extradata, buf, put_bytes_output(&pb));
 319         break;
 320     }
 321     case AV_CODEC_ID_FLAC: {
 322         uint8_t buf[13];
 323         int size = FFMIN(codecpar->extradata_size, sizeof(buf));
 324
 325         init_put_bits(&pb, buf, sizeof(buf));
 326         ret = init_get_bits8(&gb, codecpar->extradata, size);
 327         if (ret < 0)
 328             return ret;
 329
 330         put_bits32(&pb, get_bits_long(&gb, 32)); // min/max blocksize
 331         put_bits63(&pb, 48, get_bits64(&gb, 48)); // min/max framesize
 332         put_bits(&pb, 20, get_bits(&gb, 20)); // samplerate
 333         skip_bits(&gb, 3);
 334         put_bits(&pb, 3, codecpar->ch_layout.nb_channels - 1);
 335         ret = get_bits_left(&gb);
 336         if (ret < 0)
 337             return AVERROR_INVALIDDATA;
 338         ret = FFMIN(ret, put_bits_left(&pb));
 339         put_bits(&pb, ret, get_bits(&gb, ret));
 340         flush_put_bits(&pb);
 341
 342         memcpy(codecpar->extradata, buf, put_bytes_output(&pb));
 343         break;
 344     }
 345     }
 346
 347     return 0;
 348 }
 349
 350 static int scalable_channel_layout_config(void *s, AVIOContext *pb,
 351                                           IAMFAudioElement *audio_element,
 352                                           const IAMFCodecConfig *codec_config)
 353 {
 354     int nb_layers, k = 0;
 355
 356     nb_layers = avio_r8(pb) >> 5; // get_bits(&gb, 3);
 357     // skip_bits(&gb, 5); //reserved
 358
 359     if (nb_layers > 6 || nb_layers == 0)
 360         return AVERROR_INVALIDDATA;
 361
 362     audio_element->layers = av_calloc(nb_layers, sizeof(*audio_element->layers));
 363     if (!audio_element->layers)
 364         return AVERROR(ENOMEM);
 365
 366     audio_element->nb_layers = nb_layers;
 367     for (int i = 0, n = 0; i < nb_layers; i++) {
 368         AVChannelLayout ch_layout = { 0 };
 369         AVIAMFLayer *layer;
 370         int loudspeaker_layout, output_gain_is_present_flag;
 371         int substream_count, coupled_substream_count;
 372         int expanded_loudspeaker_layout = -1;
 373         int ret, byte = avio_r8(pb);
 374
 375         layer = av_iamf_audio_element_add_layer(audio_element->element);
 376         if (!layer)
 377             return AVERROR(ENOMEM);
 378
 379         loudspeaker_layout = byte >> 4; // get_bits(&gb, 4);
 380         output_gain_is_present_flag = (byte >> 3) & 1; //get_bits1(&gb);
 381         if ((byte >> 2) & 1)
 382             layer->flags |= AV_IAMF_LAYER_FLAG_RECON_GAIN;
 383         substream_count = avio_r8(pb);
 384         coupled_substream_count = avio_r8(pb);
 385
 386         if (substream_count + k > audio_element->nb_substreams)
 387             return AVERROR_INVALIDDATA;
 388
 389         audio_element->layers[i].substream_count         = substream_count;
 390         audio_element->layers[i].coupled_substream_count = coupled_substream_count;
 391         if (output_gain_is_present_flag) {
 392             layer->output_gain_flags = avio_r8(pb) >> 2;  // get_bits(&gb, 6);
 393             layer->output_gain = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8);
 394         }
 395
 396         if (!i && loudspeaker_layout == 15)
 397             expanded_loudspeaker_layout = avio_r8(pb);
 398         if (expanded_loudspeaker_layout > 0 && expanded_loudspeaker_layout < 13) {
 399             av_channel_layout_copy(&ch_layout, &ff_iamf_expanded_scalable_ch_layouts[expanded_loudspeaker_layout]);
 400         } else if (loudspeaker_layout < 10) {
 401             av_channel_layout_copy(&ch_layout, &ff_iamf_scalable_ch_layouts[loudspeaker_layout]);
 402             if (i)
 403                 ch_layout.u.mask &= ~av_channel_layout_subset(&audio_element->element->layers[i-1]->ch_layout, UINT64_MAX);
 404         } else
 405             ch_layout = (AVChannelLayout){ .order = AV_CHANNEL_ORDER_UNSPEC,
 406                                                           .nb_channels = substream_count +
 407                                                                          coupled_substream_count };
 408
 409         if (i && ch_layout.nb_channels <= audio_element->element->layers[i-1]->ch_layout.nb_channels)
 410             return AVERROR_INVALIDDATA;
 411
 412         for (int j = 0; j < substream_count; j++) {
 413             IAMFSubStream *substream = &audio_element->substreams[k++];
 414
 415             substream->codecpar->ch_layout = coupled_substream_count-- > 0 ? (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO :
 416                                                                              (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
 417
 418             ret = update_extradata(substream->codecpar);
 419             if (ret < 0)
 420                 return ret;
 421         }
 422
 423         if (ch_layout.order == AV_CHANNEL_ORDER_NATIVE) {
 424             ret = av_channel_layout_custom_init(&layer->ch_layout, ch_layout.nb_channels);
 425             if (ret < 0)
 426                 return ret;
 427
 428             for (int j = 0; j < n; j++)
 429                 layer->ch_layout.u.map[j].id = av_channel_layout_channel_from_index(&audio_element->element->layers[i-1]->ch_layout, j);
 430
 431             coupled_substream_count = audio_element->layers[i].coupled_substream_count;
 432             while (coupled_substream_count--) {
 433                 if (ch_layout.u.mask & AV_CH_LAYOUT_STEREO) {
 434                     layer->ch_layout.u.map[n++].id = AV_CHAN_FRONT_LEFT;
 435                     layer->ch_layout.u.map[n++].id = AV_CHAN_FRONT_RIGHT;
 436                     ch_layout.u.mask &= ~AV_CH_LAYOUT_STEREO;
 437                 } else if (ch_layout.u.mask & (AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)) {
 438                     layer->ch_layout.u.map[n++].id = AV_CHAN_FRONT_LEFT_OF_CENTER;
 439                     layer->ch_layout.u.map[n++].id = AV_CHAN_FRONT_RIGHT_OF_CENTER;
 440                     ch_layout.u.mask &= ~(AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER);
 441                 } else if (ch_layout.u.mask & (AV_CH_SIDE_LEFT|AV_CH_SIDE_RIGHT)) {
 442                     layer->ch_layout.u.map[n++].id = AV_CHAN_SIDE_LEFT;
 443                     layer->ch_layout.u.map[n++].id = AV_CHAN_SIDE_RIGHT;
 444                     ch_layout.u.mask &= ~(AV_CH_SIDE_LEFT|AV_CH_SIDE_RIGHT);
 445                 } else if (ch_layout.u.mask & (AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT)) {
 446                     layer->ch_layout.u.map[n++].id = AV_CHAN_BACK_LEFT;
 447                     layer->ch_layout.u.map[n++].id = AV_CHAN_BACK_RIGHT;
 448                     ch_layout.u.mask &= ~(AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT);
 449                 } else if (ch_layout.u.mask & (AV_CH_TOP_FRONT_LEFT|AV_CH_TOP_FRONT_RIGHT)) {
 450                     layer->ch_layout.u.map[n++].id = AV_CHAN_TOP_FRONT_LEFT;
 451                     layer->ch_layout.u.map[n++].id = AV_CHAN_TOP_FRONT_RIGHT;
 452                     ch_layout.u.mask &= ~(AV_CH_TOP_FRONT_LEFT|AV_CH_TOP_FRONT_RIGHT);
 453                 } else if (ch_layout.u.mask & (AV_CH_TOP_SIDE_LEFT|AV_CH_TOP_SIDE_RIGHT)) {
 454                     layer->ch_layout.u.map[n++].id = AV_CHAN_TOP_SIDE_LEFT;
 455                     layer->ch_layout.u.map[n++].id = AV_CHAN_TOP_SIDE_RIGHT;
 456                     ch_layout.u.mask &= ~(AV_CH_TOP_SIDE_LEFT|AV_CH_TOP_SIDE_RIGHT);
 457                 } else if (ch_layout.u.mask & (AV_CH_TOP_BACK_LEFT|AV_CH_TOP_BACK_RIGHT)) {
 458                     layer->ch_layout.u.map[n++].id = AV_CHAN_TOP_BACK_LEFT;
 459                     layer->ch_layout.u.map[n++].id = AV_CHAN_TOP_BACK_RIGHT;
 460                     ch_layout.u.mask &= ~(AV_CH_TOP_BACK_LEFT|AV_CH_TOP_BACK_RIGHT);
 461                 }
 462             }
 463
 464             substream_count -= audio_element->layers[i].coupled_substream_count;
 465             while (substream_count--) {
 466                 if (ch_layout.u.mask & AV_CH_FRONT_CENTER) {
 467                     layer->ch_layout.u.map[n++].id = AV_CHAN_FRONT_CENTER;
 468                     ch_layout.u.mask &= ~AV_CH_FRONT_CENTER;
 469                 }
 470                 if (ch_layout.u.mask & AV_CH_LOW_FREQUENCY) {
 471                     layer->ch_layout.u.map[n++].id = AV_CHAN_LOW_FREQUENCY;
 472                     ch_layout.u.mask &= ~AV_CH_LOW_FREQUENCY;
 473                 }
 474             }
 475
 476             ret = av_channel_layout_retype(&layer->ch_layout, AV_CHANNEL_ORDER_NATIVE, 0);
 477             if (ret < 0 && ret != AVERROR(ENOSYS))
 478                 return ret;
 479         } else // AV_CHANNEL_ORDER_UNSPEC
 480             av_channel_layout_copy(&layer->ch_layout, &ch_layout);
 481     }
 482
 483     if (k != audio_element->nb_substreams)
 484         return AVERROR_INVALIDDATA;
 485
 486     return 0;
 487 }
 488
 489 static int ambisonics_config(void *s, AVIOContext *pb,
 490                              IAMFAudioElement *audio_element,
 491                              const IAMFCodecConfig *codec_config)
 492 {
 493     AVIAMFLayer *layer;
 494     unsigned ambisonics_mode;
 495     int output_channel_count, substream_count, order;
 496     int ret;
 497
 498     ambisonics_mode = ffio_read_leb(pb);
 499     if (ambisonics_mode > 1)
 500         return AVERROR_INVALIDDATA;
 501
 502     output_channel_count = avio_r8(pb);  // C
 503     substream_count = avio_r8(pb);  // N
 504     if (audio_element->nb_substreams != substream_count || output_channel_count == 0)
 505         return AVERROR_INVALIDDATA;
 506
 507     order = floor(sqrt(output_channel_count - 1));
 508     /* incomplete order - some harmonics are missing */
 509     if ((order + 1) * (order + 1) != output_channel_count)
 510         return AVERROR_INVALIDDATA;
 511
 512     audio_element->layers = av_mallocz(sizeof(*audio_element->layers));
 513     if (!audio_element->layers)
 514         return AVERROR(ENOMEM);
 515
 516     audio_element->nb_layers = 1;
 517     audio_element->layers->substream_count = substream_count;
 518
 519     layer = av_iamf_audio_element_add_layer(audio_element->element);
 520     if (!layer)
 521         return AVERROR(ENOMEM);
 522
 523     layer->ambisonics_mode = ambisonics_mode;
 524     if (ambisonics_mode == 0) {
 525         for (int i = 0; i < substream_count; i++) {
 526             IAMFSubStream *substream = &audio_element->substreams[i];
 527
 528             substream->codecpar->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
 529
 530             ret = update_extradata(substream->codecpar);
 531             if (ret < 0)
 532                 return ret;
 533         }
 534
 535         ret = av_channel_layout_custom_init(&layer->ch_layout, output_channel_count);
 536         if (ret < 0)
 537             return ret;
 538
 539         for (int i = 0; i < output_channel_count; i++)
 540             layer->ch_layout.u.map[i].id = avio_r8(pb) + AV_CHAN_AMBISONIC_BASE;
 541
 542         ret = av_channel_layout_retype(&layer->ch_layout, AV_CHANNEL_ORDER_AMBISONIC, 0);
 543         if (ret < 0 && ret != AVERROR(ENOSYS))
 544             return ret;
 545     } else {
 546         int coupled_substream_count = avio_r8(pb);  // M
 547         int nb_demixing_matrix = substream_count + coupled_substream_count;
 548         int demixing_matrix_size = nb_demixing_matrix * output_channel_count;
 549
 550         audio_element->layers->coupled_substream_count = coupled_substream_count;
 551
 552         layer->ch_layout = (AVChannelLayout){ .order = AV_CHANNEL_ORDER_AMBISONIC, .nb_channels = output_channel_count };
 553         layer->demixing_matrix = av_malloc_array(demixing_matrix_size, sizeof(*layer->demixing_matrix));
 554         if (!layer->demixing_matrix)
 555             return AVERROR(ENOMEM);
 556
 557         for (int i = 0; i < demixing_matrix_size; i++)
 558             layer->demixing_matrix[i] = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8);
 559
 560         for (int i = 0; i < substream_count; i++) {
 561             IAMFSubStream *substream = &audio_element->substreams[i];
 562
 563             substream->codecpar->ch_layout = coupled_substream_count-- > 0 ? (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO :
 564                                                                              (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
 565
 566
 567             ret = update_extradata(substream->codecpar);
 568             if (ret < 0)
 569                 return ret;
 570         }
 571     }
 572
 573     return 0;
 574 }
 575
 576 static int param_parse(void *s, IAMFContext *c, AVIOContext *pb,
 577                        unsigned int type,
 578                        const IAMFAudioElement *audio_element,
 579                        AVIAMFParamDefinition **out_param_definition)
 580 {
 581     IAMFParamDefinition *param_definition = NULL;
 582     AVIAMFParamDefinition *param;
 583     unsigned int parameter_id, parameter_rate, mode;
 584     unsigned int duration = 0, constant_subblock_duration = 0, nb_subblocks = 0;
 585     unsigned int total_duration = 0;
 586     size_t param_size;
 587
 588     parameter_id = ffio_read_leb(pb);
 589
 590     for (int i = 0; i < c->nb_param_definitions; i++)
 591         if (c->param_definitions[i]->param->parameter_id == parameter_id) {
 592             param_definition = c->param_definitions[i];
 593             break;
 594         }
 595
 596     parameter_rate = ffio_read_leb(pb);
 597     mode = avio_r8(pb) >> 7;
 598
 599     if (mode == 0) {
 600         duration = ffio_read_leb(pb);
 601         if (!duration)
 602             return AVERROR_INVALIDDATA;
 603         constant_subblock_duration = ffio_read_leb(pb);
 604         if (constant_subblock_duration == 0)
 605             nb_subblocks = ffio_read_leb(pb);
 606         else {
 607             nb_subblocks = duration / constant_subblock_duration;
 608             total_duration = duration;
 609         }
 610     }
 611
 612     param = av_iamf_param_definition_alloc(type, nb_subblocks, &param_size);
 613     if (!param)
 614         return AVERROR(ENOMEM);
 615
 616     for (int i = 0; i < nb_subblocks; i++) {
 617         void *subblock = av_iamf_param_definition_get_subblock(param, i);
 618         unsigned int subblock_duration = constant_subblock_duration;
 619
 620         if (constant_subblock_duration == 0) {
 621             subblock_duration = ffio_read_leb(pb);
 622             total_duration += subblock_duration;
 623         } else if (i == nb_subblocks - 1)
 624             subblock_duration = duration - i * constant_subblock_duration;
 625
 626         switch (type) {
 627         case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: {
 628             AVIAMFMixGain *mix = subblock;
 629             mix->subblock_duration = subblock_duration;
 630             break;
 631         }
 632         case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: {
 633             AVIAMFDemixingInfo *demix = subblock;
 634             demix->subblock_duration = subblock_duration;
 635             // DefaultDemixingInfoParameterData
 636             av_assert0(audio_element);
 637             demix->dmixp_mode = avio_r8(pb) >> 5;
 638             audio_element->element->default_w = avio_r8(pb) >> 4;
 639             break;
 640         }
 641         case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: {
 642             AVIAMFReconGain *recon = subblock;
 643             recon->subblock_duration = subblock_duration;
 644             break;
 645         }
 646         default:
 647             av_free(param);
 648             return AVERROR_INVALIDDATA;
 649         }
 650     }
 651
 652     if (!mode && !constant_subblock_duration && total_duration != duration) {
 653         av_log(s, AV_LOG_ERROR, "Invalid subblock durations in parameter_id %u\n", parameter_id);
 654         av_free(param);
 655         return AVERROR_INVALIDDATA;
 656     }
 657
 658     param->parameter_id = parameter_id;
 659     param->parameter_rate = parameter_rate;
 660     param->duration = duration;
 661     param->constant_subblock_duration = constant_subblock_duration;
 662     param->nb_subblocks = nb_subblocks;
 663
 664     if (param_definition) {
 665         if (param_definition->param_size != param_size || memcmp(param_definition->param, param, param_size)) {
 666             av_log(s, AV_LOG_ERROR, "Inconsistent parameters for parameter_id %u\n", parameter_id);
 667             av_free(param);
 668             return AVERROR_INVALIDDATA;
 669         }
 670     } else {
 671         IAMFParamDefinition **tmp = av_realloc_array(c->param_definitions, c->nb_param_definitions + 1,
 672                                                      sizeof(*c->param_definitions));
 673         if (!tmp) {
 674             av_free(param);
 675             return AVERROR(ENOMEM);
 676         }
 677         c->param_definitions = tmp;
 678
 679         param_definition = av_mallocz(sizeof(*param_definition));
 680         if (!param_definition) {
 681             av_free(param);
 682             return AVERROR(ENOMEM);
 683         }
 684         param_definition->param = param;
 685         param_definition->mode = !mode;
 686         param_definition->param_size = param_size;
 687         param_definition->audio_element = audio_element;
 688
 689         c->param_definitions[c->nb_param_definitions++] = param_definition;
 690     }
 691
 692     av_assert0(out_param_definition);
 693     *out_param_definition = param;
 694
 695     return 0;
 696 }
 697
 698 static int audio_element_obu(void *s, IAMFContext *c, AVIOContext *pb, int len)
 699 {
 700     const IAMFCodecConfig *codec_config;
 701     AVIAMFAudioElement *element;
 702     IAMFAudioElement **tmp, *audio_element = NULL;
 703     FFIOContext b;
 704     AVIOContext *pbc;
 705     uint8_t *buf;
 706     unsigned audio_element_id, nb_substreams, codec_config_id, num_parameters;
 707     int audio_element_type, ret;
 708
 709     buf = av_malloc(len);
 710     if (!buf)
 711         return AVERROR(ENOMEM);
 712
 713     ret = ffio_read_size(pb, buf, len);
 714     if (ret < 0)
 715         goto fail;
 716
 717     ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
 718     pbc = &b.pub;
 719
 720     audio_element_id = ffio_read_leb(pbc);
 721
 722     for (int i = 0; i < c->nb_audio_elements; i++)
 723         if (c->audio_elements[i]->audio_element_id == audio_element_id) {
 724             av_log(s, AV_LOG_ERROR, "Duplicate audio_element_id %d\n", audio_element_id);
 725             ret = AVERROR_INVALIDDATA;
 726             goto fail;
 727         }
 728
 729     audio_element_type = avio_r8(pbc) >> 5;
 730     if (audio_element_type > AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE) {
 731         av_log(s, AV_LOG_DEBUG, "Unknown audio_element_type referenced in an audio element. Ignoring\n");
 732         ret = 0;
 733         goto fail;
 734     }
 735
 736     codec_config_id = ffio_read_leb(pbc);
 737
 738     codec_config = ff_iamf_get_codec_config(c, codec_config_id);
 739     if (!codec_config) {
 740         av_log(s, AV_LOG_ERROR, "Non existent codec config id %d referenced in an audio element\n", codec_config_id);
 741         ret = AVERROR_INVALIDDATA;
 742         goto fail;
 743     }
 744
 745     if (codec_config->codec_id == AV_CODEC_ID_NONE) {
 746         av_log(s, AV_LOG_DEBUG, "Unknown codec id referenced in an audio element. Ignoring\n");
 747         ret = 0;
 748         goto fail;
 749     }
 750
 751     tmp = av_realloc_array(c->audio_elements, c->nb_audio_elements + 1, sizeof(*c->audio_elements));
 752     if (!tmp) {
 753         ret = AVERROR(ENOMEM);
 754         goto fail;
 755     }
 756     c->audio_elements = tmp;
 757
 758     audio_element = av_mallocz(sizeof(*audio_element));
 759     if (!audio_element) {
 760         ret = AVERROR(ENOMEM);
 761         goto fail;
 762     }
 763
 764     nb_substreams = ffio_read_leb(pbc);
 765     audio_element->codec_config_id = codec_config_id;
 766     audio_element->audio_element_id = audio_element_id;
 767     audio_element->substreams = av_calloc(nb_substreams, sizeof(*audio_element->substreams));
 768     if (!audio_element->substreams) {
 769         ret = AVERROR(ENOMEM);
 770         goto fail;
 771     }
 772     audio_element->nb_substreams = nb_substreams;
 773
 774     element = audio_element->element = av_iamf_audio_element_alloc();
 775     if (!element) {
 776         ret = AVERROR(ENOMEM);
 777         goto fail;
 778     }
 779     audio_element->celement = element;
 780
 781     element->audio_element_type = audio_element_type;
 782
 783     for (int i = 0; i < audio_element->nb_substreams; i++) {
 784         IAMFSubStream *substream = &audio_element->substreams[i];
 785
 786         substream->codecpar = avcodec_parameters_alloc();
 787         if (!substream->codecpar) {
 788             ret = AVERROR(ENOMEM);
 789             goto fail;
 790         }
 791
 792         substream->audio_substream_id = ffio_read_leb(pbc);
 793
 794         substream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
 795         substream->codecpar->codec_id   = codec_config->codec_id;
 796         substream->codecpar->frame_size = codec_config->nb_samples;
 797         substream->codecpar->sample_rate = codec_config->sample_rate;
 798         substream->codecpar->seek_preroll = -codec_config->audio_roll_distance * codec_config->nb_samples;
 799
 800         switch(substream->codecpar->codec_id) {
 801         case AV_CODEC_ID_AAC:
 802         case AV_CODEC_ID_FLAC:
 803         case AV_CODEC_ID_OPUS:
 804             substream->codecpar->extradata = av_malloc(codec_config->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
 805             if (!substream->codecpar->extradata) {
 806                 ret = AVERROR(ENOMEM);
 807                 goto fail;
 808             }
 809             memcpy(substream->codecpar->extradata, codec_config->extradata, codec_config->extradata_size);
 810             memset(substream->codecpar->extradata + codec_config->extradata_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
 811             substream->codecpar->extradata_size = codec_config->extradata_size;
 812             break;
 813         }
 814     }
 815
 816     num_parameters = ffio_read_leb(pbc);
 817     if (num_parameters > 2 && audio_element_type == 0) {
 818         av_log(s, AV_LOG_ERROR, "Audio Element parameter count %u is invalid"
 819                                 " for Channel representations\n", num_parameters);
 820         ret = AVERROR_INVALIDDATA;
 821         goto fail;
 822     }
 823     if (num_parameters && audio_element_type != 0) {
 824         av_log(s, AV_LOG_ERROR, "Audio Element parameter count %u is invalid"
 825                                 " for Scene representations\n", num_parameters);
 826         ret = AVERROR_INVALIDDATA;
 827         goto fail;
 828     }
 829
 830     for (int i = 0; i < num_parameters; i++) {
 831         unsigned type;
 832
 833         type = ffio_read_leb(pbc);
 834         if (type == AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN)
 835             ret = AVERROR_INVALIDDATA;
 836         else if (type == AV_IAMF_PARAMETER_DEFINITION_DEMIXING) {
 837             if (element->demixing_info) {
 838                 ret = AVERROR_INVALIDDATA;
 839                 goto fail;
 840             }
 841             ret = param_parse(s, c, pbc, type, audio_element, &element->demixing_info);
 842         } else if (type == AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN) {
 843             if (element->recon_gain_info) {
 844                 ret = AVERROR_INVALIDDATA;
 845                 goto fail;
 846             }
 847             ret = param_parse(s, c, pbc, type, audio_element, &element->recon_gain_info);
 848         } else {
 849             unsigned param_definition_size = ffio_read_leb(pbc);
 850             avio_skip(pbc, param_definition_size);
 851         }
 852         if (ret < 0)
 853             goto fail;
 854     }
 855
 856     if (audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL) {
 857         ret = scalable_channel_layout_config(s, pbc, audio_element, codec_config);
 858         if (ret < 0)
 859             goto fail;
 860     } else if (audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE) {
 861         ret = ambisonics_config(s, pbc, audio_element, codec_config);
 862         if (ret < 0)
 863             goto fail;
 864     } else {
 865         av_assert0(0);
 866     }
 867
 868     c->audio_elements[c->nb_audio_elements++] = audio_element;
 869
 870     len -= avio_tell(pbc);
 871     if (len)
 872        av_log(s, AV_LOG_WARNING, "Underread in audio_element_obu. %d bytes left at the end\n", len);
 873
 874     ret = 0;
 875 fail:
 876     av_free(buf);
 877     if (ret < 0)
 878         ff_iamf_free_audio_element(&audio_element);
 879     return ret;
 880 }
 881
 882 static int label_string(AVIOContext *pb, char **label)
 883 {
 884     uint8_t buf[128];
 885
 886     avio_get_str(pb, sizeof(buf), buf, sizeof(buf));
 887
 888     if (pb->error)
 889         return pb->error;
 890     if (pb->eof_reached)
 891         return AVERROR_INVALIDDATA;
 892     *label = av_strdup(buf);
 893     if (!*label)
 894         return AVERROR(ENOMEM);
 895
 896     return 0;
 897 }
 898
 899 static int mix_presentation_obu(void *s, IAMFContext *c, AVIOContext *pb, int len)
 900 {
 901     AVIAMFMixPresentation *mix;
 902     IAMFMixPresentation **tmp, *mix_presentation = NULL;
 903     FFIOContext b;
 904     AVIOContext *pbc;
 905     uint8_t *buf;
 906     unsigned nb_submixes, mix_presentation_id;
 907     int ret;
 908
 909     buf = av_malloc(len);
 910     if (!buf)
 911         return AVERROR(ENOMEM);
 912
 913     ret = ffio_read_size(pb, buf, len);
 914     if (ret < 0)
 915         goto fail;
 916
 917     ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
 918     pbc = &b.pub;
 919
 920     mix_presentation_id = ffio_read_leb(pbc);
 921
 922     for (int i = 0; i < c->nb_mix_presentations; i++)
 923         if (c->mix_presentations[i]->mix_presentation_id == mix_presentation_id) {
 924             av_log(s, AV_LOG_ERROR, "Duplicate mix_presentation_id %d\n", mix_presentation_id);
 925             ret = AVERROR_INVALIDDATA;
 926             goto fail;
 927         }
 928
 929     tmp = av_realloc_array(c->mix_presentations, c->nb_mix_presentations + 1, sizeof(*c->mix_presentations));
 930     if (!tmp) {
 931         ret = AVERROR(ENOMEM);
 932         goto fail;
 933     }
 934     c->mix_presentations = tmp;
 935
 936     mix_presentation = av_mallocz(sizeof(*mix_presentation));
 937     if (!mix_presentation) {
 938         ret = AVERROR(ENOMEM);
 939         goto fail;
 940     }
 941
 942     mix_presentation->mix_presentation_id = mix_presentation_id;
 943     mix = mix_presentation->mix = av_iamf_mix_presentation_alloc();
 944     if (!mix) {
 945         ret = AVERROR(ENOMEM);
 946         goto fail;
 947     }
 948     mix_presentation->cmix = mix;
 949
 950     mix_presentation->count_label = ffio_read_leb(pbc);
 951     mix_presentation->language_label = av_calloc(mix_presentation->count_label,
 952                                                  sizeof(*mix_presentation->language_label));
 953     if (!mix_presentation->language_label) {
 954         mix_presentation->count_label = 0;
 955         ret = AVERROR(ENOMEM);
 956         goto fail;
 957     }
 958
 959     for (int i = 0; i < mix_presentation->count_label; i++) {
 960         ret = label_string(pbc, &mix_presentation->language_label[i]);
 961         if (ret < 0)
 962             goto fail;
 963     }
 964
 965     for (int i = 0; i < mix_presentation->count_label; i++) {
 966         char *annotation = NULL;
 967         ret = label_string(pbc, &annotation);
 968         if (ret < 0)
 969             goto fail;
 970         ret = av_dict_set(&mix->annotations, mix_presentation->language_label[i], annotation,
 971                           AV_DICT_DONT_STRDUP_VAL | AV_DICT_DONT_OVERWRITE);
 972         if (ret < 0)
 973             goto fail;
 974     }
 975
 976     nb_submixes = ffio_read_leb(pbc);
 977     for (int i = 0; i < nb_submixes; i++) {
 978         AVIAMFSubmix *sub_mix;
 979         unsigned nb_elements, nb_layouts;
 980
 981         sub_mix = av_iamf_mix_presentation_add_submix(mix);
 982         if (!sub_mix) {
 983             ret = AVERROR(ENOMEM);
 984             goto fail;
 985         }
 986
 987         nb_elements = ffio_read_leb(pbc);
 988         for (int j = 0; j < nb_elements; j++) {
 989             AVIAMFSubmixElement *submix_element;
 990             IAMFAudioElement *audio_element = NULL;
 991             unsigned int rendering_config_extension_size;
 992
 993             submix_element = av_iamf_submix_add_element(sub_mix);
 994             if (!submix_element) {
 995                 ret = AVERROR(ENOMEM);
 996                 goto fail;
 997             }
 998
 999             submix_element->audio_element_id = ffio_read_leb(pbc);
1000
1001             for (int k = 0; k < c->nb_audio_elements; k++)
1002                 if (c->audio_elements[k]->audio_element_id == submix_element->audio_element_id) {
1003                     audio_element = c->audio_elements[k];
1004                     break;
1005                 }
1006
1007             if (!audio_element) {
1008                 av_log(s, AV_LOG_ERROR, "Invalid Audio Element with id %u referenced by Mix Parameters %u\n",
1009                        submix_element->audio_element_id, mix_presentation_id);
1010                 ret = AVERROR_INVALIDDATA;
1011                 goto fail;
1012             }
1013
1014             for (int k = 0; k < mix_presentation->count_label; k++) {
1015                 char *annotation = NULL;
1016                 ret = label_string(pbc, &annotation);
1017                 if (ret < 0)
1018                     goto fail;
1019                 ret = av_dict_set(&submix_element->annotations, mix_presentation->language_label[k], annotation,
1020                                   AV_DICT_DONT_STRDUP_VAL | AV_DICT_DONT_OVERWRITE);
1021                 if (ret < 0)
1022                     goto fail;
1023             }
1024
1025             submix_element->headphones_rendering_mode = avio_r8(pbc) >> 6;
1026
1027             rendering_config_extension_size = ffio_read_leb(pbc);
1028             avio_skip(pbc, rendering_config_extension_size);
1029
1030             ret = param_parse(s, c, pbc, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN,
1031                               NULL,
1032                               &submix_element->element_mix_config);
1033             if (ret < 0)
1034                 goto fail;
1035             submix_element->default_mix_gain = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
1036         }
1037
1038         ret = param_parse(s, c, pbc, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN, NULL, &sub_mix->output_mix_config);
1039         if (ret < 0)
1040             goto fail;
1041         sub_mix->default_mix_gain = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
1042
1043         nb_layouts = ffio_read_leb(pbc);
1044         for (int j = 0; j < nb_layouts; j++) {
1045             AVIAMFSubmixLayout *submix_layout;
1046             int info_type;
1047             int byte = avio_r8(pbc);
1048
1049             submix_layout = av_iamf_submix_add_layout(sub_mix);
1050             if (!submix_layout) {
1051                 ret = AVERROR(ENOMEM);
1052                 goto fail;
1053             }
1054
1055             submix_layout->layout_type = byte >> 6;
1056             if (submix_layout->layout_type < AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS ||
1057                 submix_layout->layout_type > AV_IAMF_SUBMIX_LAYOUT_TYPE_BINAURAL) {
1058                 av_log(s, AV_LOG_ERROR, "Invalid Layout type %u in a submix from Mix Presentation %u\n",
1059                        submix_layout->layout_type, mix_presentation_id);
1060                 ret = AVERROR_INVALIDDATA;
1061                 goto fail;
1062             }
1063             if (submix_layout->layout_type == 2) {
1064                 int sound_system;
1065                 sound_system = (byte >> 2) & 0xF;
1066                 if (sound_system >= FF_ARRAY_ELEMS(ff_iamf_sound_system_map)) {
1067                     ret = AVERROR_INVALIDDATA;
1068                     goto fail;
1069                 }
1070                 av_channel_layout_copy(&submix_layout->sound_system, &ff_iamf_sound_system_map[sound_system].layout);
1071             } else
1072                 submix_layout->sound_system = (AVChannelLayout)AV_CHANNEL_LAYOUT_BINAURAL;
1073
1074             info_type = avio_r8(pbc);
1075             submix_layout->integrated_loudness = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
1076             submix_layout->digital_peak = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
1077
1078             if (info_type & 1)
1079                 submix_layout->true_peak = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
1080             if (info_type & 2) {
1081                 unsigned int num_anchored_loudness = avio_r8(pbc);
1082
1083                 for (int k = 0; k < num_anchored_loudness; k++) {
1084                     unsigned int anchor_element = avio_r8(pbc);
1085                     AVRational anchored_loudness = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
1086                     if (anchor_element == IAMF_ANCHOR_ELEMENT_DIALOGUE)
1087                         submix_layout->dialogue_anchored_loudness = anchored_loudness;
1088                     else if (anchor_element <= IAMF_ANCHOR_ELEMENT_ALBUM)
1089                         submix_layout->album_anchored_loudness = anchored_loudness;
1090                     else
1091                         av_log(s, AV_LOG_DEBUG, "Unknown anchor_element. Ignoring\n");
1092                 }
1093             }
1094
1095             if (info_type & 0xFC) {
1096                 unsigned int info_type_size = ffio_read_leb(pbc);
1097                 avio_skip(pbc, info_type_size);
1098             }
1099         }
1100     }
1101
1102     c->mix_presentations[c->nb_mix_presentations++] = mix_presentation;
1103
1104     len -= avio_tell(pbc);
1105     if (len)
1106         av_log(s, AV_LOG_WARNING, "Underread in mix_presentation_obu. %d bytes left at the end\n", len);
1107
1108     ret = 0;
1109 fail:
1110     av_free(buf);
1111     if (ret < 0)
1112         ff_iamf_free_mix_presentation(&mix_presentation);
1113     return ret;
1114 }
1115
1116 int ff_iamf_parse_obu_header(const uint8_t *buf, int buf_size,
1117                              unsigned *obu_size, int *start_pos, enum IAMF_OBU_Type *type,
1118                              unsigned *skip_samples, unsigned *discard_padding)
1119 {
1120     GetBitContext gb;
1121     int ret, extension_flag, trimming, start;
1122     unsigned skip = 0, discard = 0;
1123     unsigned size;
1124
1125     ret = init_get_bits8(&gb, buf, FFMIN(buf_size, MAX_IAMF_OBU_HEADER_SIZE));
1126     if (ret < 0)
1127         return ret;
1128
1129     *type          = get_bits(&gb, 5);
1130     /*redundant      =*/ get_bits1(&gb);
1131     trimming       = get_bits1(&gb);
1132     extension_flag = get_bits1(&gb);
1133
1134     *obu_size = get_leb(&gb);
1135     if (*obu_size > INT_MAX)
1136         return AVERROR_INVALIDDATA;
1137
1138     start = get_bits_count(&gb) / 8;
1139
1140     if (trimming) {
1141         discard = get_leb(&gb); // num_samples_to_trim_at_end
1142         skip = get_leb(&gb); // num_samples_to_trim_at_start
1143     }
1144
1145     if (skip_samples)
1146         *skip_samples = skip;
1147     if (discard_padding)
1148         *discard_padding = discard;
1149
1150     if (extension_flag) {
1151         unsigned int extension_bytes;
1152         extension_bytes = get_leb(&gb);
1153         if (extension_bytes > INT_MAX / 8)
1154             return AVERROR_INVALIDDATA;
1155         skip_bits_long(&gb, extension_bytes * 8);
1156     }
1157
1158     if (get_bits_left(&gb) < 0)
1159         return AVERROR_INVALIDDATA;
1160
1161     size = *obu_size + start;
1162     if (size > INT_MAX)
1163         return AVERROR_INVALIDDATA;
1164
1165     *obu_size -= get_bits_count(&gb) / 8 - start;
1166     *start_pos = size - *obu_size;
1167
1168     return size;
1169 }
1170
1171 int ff_iamfdec_read_descriptors(IAMFContext *c, AVIOContext *pb,
1172                                 int max_size, void *log_ctx)
1173 {
1174     uint8_t header[MAX_IAMF_OBU_HEADER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
1175     int ret;
1176
1177     while (1) {
1178         unsigned obu_size;
1179         enum IAMF_OBU_Type type;
1180         int start_pos, len, size;
1181
1182         if ((ret = ffio_ensure_seekback(pb, FFMIN(MAX_IAMF_OBU_HEADER_SIZE, max_size))) < 0)
1183             return ret;
1184         size = avio_read(pb, header, FFMIN(MAX_IAMF_OBU_HEADER_SIZE, max_size));
1185         if (size < 0)
1186             return size;
1187         memset(header + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
1188
1189         len = ff_iamf_parse_obu_header(header, size, &obu_size, &start_pos, &type, NULL, NULL);
1190         if (len < 0 || obu_size > max_size) {
1191             av_log(log_ctx, AV_LOG_ERROR, "Failed to read obu header\n");
1192             avio_seek(pb, -size, SEEK_CUR);
1193             return len;
1194         }
1195
1196         if (type >= IAMF_OBU_IA_PARAMETER_BLOCK && type < IAMF_OBU_IA_SEQUENCE_HEADER) {
1197             avio_seek(pb, -size, SEEK_CUR);
1198             break;
1199         }
1200
1201         avio_seek(pb, -(size - start_pos), SEEK_CUR);
1202         switch (type) {
1203         case IAMF_OBU_IA_CODEC_CONFIG:
1204             ret = codec_config_obu(log_ctx, c, pb, obu_size);
1205             break;
1206         case IAMF_OBU_IA_AUDIO_ELEMENT:
1207             ret = audio_element_obu(log_ctx, c, pb, obu_size);
1208             break;
1209         case IAMF_OBU_IA_MIX_PRESENTATION:
1210             ret = mix_presentation_obu(log_ctx, c, pb, obu_size);
1211             break;
1212         default: {
1213             int64_t offset = avio_skip(pb, obu_size);
1214             if (offset < 0)
1215                 ret = offset;
1216             break;
1217         }
1218         }
1219         if (ret < 0) {
1220             av_log(log_ctx, AV_LOG_ERROR, "Failed to read obu type %d\n", type);
1221             return ret;
1222         }
1223         max_size -= obu_size + start_pos;
1224         if (max_size < 0)
1225             return AVERROR_INVALIDDATA;
1226         if (!max_size)
1227             break;
1228     }
1229
1230     return 0;
1231 }