2 * Depacketization for RTP Payload Format For AV1 (v1.0)
3 * https://aomediacodec.github.io/av1-rtp-spec/
4 * Copyright (c) 2024 Axis Communications
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 * @brief AV1 / RTP depacketization code (RTP Payload Format For AV1 (v1.0))
26 * @author Chris Hodges <chris.hodges@axis.com>
27 * @note The process will restore TDs and put back size fields into headers.
28 * It will also try to keep complete OBUs and remove partial OBUs
29 * caused by packet drops and thus keep the stream syntactically intact.
32 #include "libavutil/avstring.h"
33 #include "libavutil/mem.h"
37 #include "libavcodec/av1.h"
40 // enable tracing of packet data
41 //#define RTPDEC_AV1_VERBOSE_TRACE
44 * RTP/AV1 specific private data.
46 struct PayloadContext
{
47 uint32_t timestamp
; ///< last received timestamp for frame
48 uint8_t profile
; ///< profile (main/high/professional)
49 uint8_t level_idx
; ///< level (0-31)
50 uint8_t tier
; ///< main tier or high tier
51 uint16_t prev_seq
; ///< sequence number of previous packet
52 unsigned int frag_obu_size
; ///< current total size of fragmented OBU
53 unsigned int frag_pkt_leb_pos
; ///< offset in buffer where OBU LEB starts
54 unsigned int frag_lebs_res
; ///< number of bytes reserved for LEB
55 unsigned int frag_header_size
; ///< size of OBU header (1 or 2)
56 int needs_td
; ///< indicates that a TD should be output
57 int drop_fragment
; ///< drop all fragments until next frame
58 int keyframe_seen
; ///< keyframe was seen
59 int wait_for_keyframe
; ///< message about waiting for keyframe has been issued
62 static int sdp_parse_fmtp_config_av1(AVFormatContext
*s
,
64 PayloadContext
*av1_data
,
65 const char *attr
, const char *value
) {
66 if (!strcmp(attr
, "profile")) {
67 av1_data
->profile
= atoi(value
);
68 av_log(s
, AV_LOG_DEBUG
, "RTP AV1 profile: %u\n", av1_data
->profile
);
69 } else if (!strcmp(attr
, "level-idx")) {
70 av1_data
->level_idx
= atoi(value
);
71 av_log(s
, AV_LOG_DEBUG
, "RTP AV1 level: %u\n", av1_data
->profile
);
72 } else if (!strcmp(attr
, "tier")) {
73 av1_data
->tier
= atoi(value
);
74 av_log(s
, AV_LOG_DEBUG
, "RTP AV1 tier: %u\n", av1_data
->tier
);
79 // return 0 on complete packet, -1 on partial packet
80 static int av1_handle_packet(AVFormatContext
*ctx
, PayloadContext
*data
,
81 AVStream
*st
, AVPacket
*pkt
, uint32_t *timestamp
,
82 const uint8_t *buf
, int len
, uint16_t seq
,
87 int is_last_fragmented
;
89 unsigned int num_obus
;
90 unsigned int obu_cnt
= 1;
91 unsigned int rem_pkt_size
= len
;
93 const uint8_t *buf_ptr
= buf
;
94 uint16_t expected_seq
= data
->prev_seq
+ 1;
95 int16_t seq_diff
= seq
- expected_seq
;
100 av_log(ctx
, AV_LOG_ERROR
, "Empty AV1 RTP packet\n");
101 return AVERROR_INVALIDDATA
;
104 av_log(ctx
, AV_LOG_ERROR
, "AV1 RTP packet too short\n");
105 return AVERROR_INVALIDDATA
;
108 /* The payload structure is supposed to be straight-forward, but there are a
109 * couple of edge cases which need to be tackled and make things a bit more
111 * These are mainly due to:
112 * - To reconstruct the OBU size for fragmented packets and place it the OBU
113 * header, the final size will not be known until the last fragment has
114 * been parsed. However, the number LEBs in the header is variable
115 * depending on the length of the payload.
116 * - We are increasing the out-packet size while we are getting fragmented
117 * OBUs. If an RTP packet gets dropped, we would create corrupted OBUs.
118 * In this case we decide to drop the whole frame.
121 #ifdef RTPDEC_AV1_VERBOSE_TRACE
122 av_log(ctx
, AV_LOG_TRACE
, "RTP Packet %d in (%x), len=%d:\n",
124 av_hex_dump_log(ctx
, AV_LOG_TRACE
, buf
, FFMIN(len
, 64));
125 av_log(ctx
, AV_LOG_TRACE
, "... end at offset %x:\n", FFMAX(len
- 64, 0));
126 av_hex_dump_log(ctx
, AV_LOG_TRACE
, buf
+ FFMAX(len
- 64, 0), FFMIN(len
- 64, 64));
129 /* 8 bit aggregate header: Z Y W W N - - - */
130 aggr_hdr
= *buf_ptr
++;
133 /* Z: MUST be set to 1 if the first OBU element is an OBU fragment that is a
134 * continuation of an OBU fragment from the previous packet, and MUST be set
136 is_frag_cont
= (aggr_hdr
>> AV1B_AGGR_HDR_FRAG_CONT
) & 1;
138 /* Y: MUST be set to 1 if the last OBU element is an OBU fragment that will
139 * continue in the next packet, and MUST be set to 0 otherwise */
140 is_last_fragmented
= (aggr_hdr
>> AV1B_AGGR_HDR_LAST_FRAG
) & 1;
142 /* W: two bit field that describes the number of OBU elements in the packet.
143 * This field MUST be set equal to 0 or equal to the number of OBU elements
144 * contained in the packet.
145 * If set to 0, each OBU element MUST be preceded by a length field.
146 * If not set to 0 (i.e., W = 1, 2 or 3) the last OBU element MUST NOT be
147 * preceded by a length field (it's derived from RTP packet size minus other
149 num_obus
= (aggr_hdr
>> AV1S_AGGR_HDR_NUM_OBUS
) & AV1M_AGGR_HDR_NUM_OBUS
;
151 /* N: MUST be set to 1 if the packet is the first packet of a coded video
152 * sequence, and MUST be set to 0 otherwise.*/
153 is_first_pkt
= (aggr_hdr
>> AV1B_AGGR_HDR_FIRST_PKT
) & 1;
156 if (data
->drop_fragment
) {
157 return AVERROR_INVALIDDATA
;
160 av_log(ctx
, AV_LOG_ERROR
, "Illegal aggregation header in first AV1 RTP packet\n");
161 return AVERROR_INVALIDDATA
;
164 av_log(ctx
, AV_LOG_WARNING
, "AV1 RTP frag packet sequence mismatch (%d != %d), dropping temporal unit\n",
168 if (!pkt
->size
|| !data
->frag_obu_size
) {
169 av_log(ctx
, AV_LOG_WARNING
, "Unexpected fragment continuation in AV1 RTP packet\n");
170 goto drop_fragment
; // avoid repeated output for the same fragment
173 if (!is_first_pkt
&& !data
->keyframe_seen
) {
174 if (!data
->wait_for_keyframe
) {
175 data
->wait_for_keyframe
= 1;
176 av_log(ctx
, AV_LOG_WARNING
, "AV1 RTP packet before keyframe, dropping and waiting for next keyframe\n");
180 if (seq_diff
&& !is_first_pkt
) {
181 av_log(ctx
, AV_LOG_WARNING
, "AV1 RTP unfrag packet sequence mismatch (%d != %d), dropping temporal unit\n",
185 data
->drop_fragment
= 0;
186 if (!data
->needs_td
&& ((data
->timestamp
!= *timestamp
) || is_first_pkt
)) {
187 av_log(ctx
, AV_LOG_TRACE
, "Timestamp changed to %u (or first pkt %d), forcing TD\n", *timestamp
, is_first_pkt
);
189 data
->frag_obu_size
= 0; // new temporal unit might have been caused by dropped packets
191 if (data
->frag_obu_size
) {
192 data
->frag_obu_size
= 0; // make sure we recover
193 av_log(ctx
, AV_LOG_ERROR
, "Missing fragment continuation in AV1 RTP packet\n");
194 return AVERROR_INVALIDDATA
;
196 // update the timestamp in the frame packet with the one from the RTP packet
197 data
->timestamp
= *timestamp
;
201 #ifdef RTPDEC_AV1_VERBOSE_TRACE
202 av_log(ctx
, AV_LOG_TRACE
, "Input buffer size %d, aggr head 0x%02x fc %d, lf %d, no %d, fp %d\n",
203 len
, aggr_hdr
, is_frag_cont
, is_last_fragmented
, num_obus
, is_first_pkt
);
207 pkt
->flags
|= AV_PKT_FLAG_KEY
;
208 data
->keyframe_seen
= 1;
209 data
->wait_for_keyframe
= 0;
212 // loop over OBU elements
213 while (rem_pkt_size
) {
216 int needs_size_field
;
218 unsigned int obu_payload_size
;
221 obu_size
= rem_pkt_size
;
222 if (!num_obus
|| obu_cnt
< num_obus
) {
223 // read out explicit OBU element size (which almost corresponds to the original OBU size)
224 num_lebs
= parse_leb(ctx
, buf_ptr
, rem_pkt_size
, &obu_size
);
226 return AVERROR_INVALIDDATA
;
228 rem_pkt_size
-= num_lebs
;
231 // read first byte (which is the header byte only for non-fragmented elements)
233 if (obu_size
> rem_pkt_size
) {
234 av_log(ctx
, AV_LOG_ERROR
, "AV1 OBU size %u larger than remaining pkt size %d\n", obu_size
, rem_pkt_size
);
235 return AVERROR_INVALIDDATA
;
239 av_log(ctx
, AV_LOG_ERROR
, "Unreasonable AV1 OBU size %u\n", obu_size
);
240 return AVERROR_INVALIDDATA
;
244 uint8_t obu_type
= (obu_hdr
>> AV1S_OBU_TYPE
) & AV1M_OBU_TYPE
;
245 if (obu_hdr
& AV1F_OBU_FORBIDDEN
) {
246 av_log(ctx
, AV_LOG_ERROR
, "Forbidden bit set in AV1 OBU header (0x%02x)\n", obu_hdr
);
247 return AVERROR_INVALIDDATA
;
249 // ignore and remove OBUs according to spec
250 if ((obu_type
== AV1_OBU_TEMPORAL_DELIMITER
) ||
251 (obu_type
== AV1_OBU_TILE_LIST
)) {
253 rem_pkt_size
-= obu_size
;
254 // TODO: This probably breaks if the OBU_TILE_LIST is fragmented
255 // into the next RTP packet, so at least check and fail here
256 if (rem_pkt_size
== 0 && is_last_fragmented
) {
257 avpriv_report_missing_feature(ctx
, "AV1 OBU_TILE_LIST (should not be there!) to be ignored but is fragmented\n");
258 return AVERROR_PATCHWELCOME
;
265 // If we need to add a size field, out size will be different
266 output_size
= obu_size
;
267 // Spec says the OBUs should have their size fields removed,
268 // but this is not mandatory
269 if (is_frag_cont
|| (obu_hdr
& AV1F_OBU_HAS_SIZE_FIELD
)) {
270 needs_size_field
= 0;
272 needs_size_field
= 1;
273 // (re)calculate number of LEB bytes needed (if it was implicit, there were no LEBs)
274 output_size
+= calc_leb_size(obu_size
- (1 + ((obu_hdr
& AV1F_OBU_EXTENSION_FLAG
) ? 1 : 0)));
277 if (!is_frag_cont
&& (obu_cnt
== 1)) {
278 if (data
->needs_td
) {
279 output_size
+= 2; // for Temporal Delimiter (TD)
282 if ((result
= av_grow_packet(pkt
, output_size
)) < 0)
285 if ((result
= av_new_packet(pkt
, output_size
) < 0))
289 if (data
->needs_td
) {
291 pkt
->data
[pktpos
++] = 0x12;
292 pkt
->data
[pktpos
++] = 0x00;
296 if ((result
= av_grow_packet(pkt
, output_size
)) < 0)
300 obu_payload_size
= obu_size
;
301 // do we need to restore the OBU size field?
302 if (needs_size_field
) {
303 // set obu_has_size_field in header byte
304 pkt
->data
[pktpos
++] = *buf_ptr
++ | AV1F_OBU_HAS_SIZE_FIELD
;
305 data
->frag_header_size
= 1;
308 // copy extension byte, if available
309 if (obu_hdr
& AV1F_OBU_EXTENSION_FLAG
) {
310 /* TODO we cannot handle the edge case where last element is a
311 * fragment of exactly one byte AND the header has the extension
312 * flag set. Note that it would be more efficient to not send a
313 * fragment of one byte and instead drop the size field of the
315 if (!obu_payload_size
) {
316 av_log(ctx
, AV_LOG_ERROR
, "AV1 OBU too short for extension byte (0x%02x)\n",
318 return AVERROR_INVALIDDATA
;
320 pkt
->data
[pktpos
++] = *buf_ptr
++;
321 data
->frag_header_size
= 2;
325 // remember start position of LEB for possibly fragmented packet to
326 // fixup OBU size later
327 data
->frag_pkt_leb_pos
= pktpos
;
328 // write intermediate OBU size field
329 num_lebs
= write_leb(pkt
->data
+ pktpos
, obu_payload_size
);
330 data
->frag_lebs_res
= num_lebs
;
332 } else if (!is_frag_cont
) {
333 data
->frag_lebs_res
= 0;
335 // copy verbatim or without above header size patch
336 memcpy(pkt
->data
+ pktpos
, buf_ptr
, obu_payload_size
);
337 pktpos
+= obu_payload_size
;
338 buf_ptr
+= obu_payload_size
;
339 rem_pkt_size
-= obu_size
;
341 // if we were handling a fragmented packet and this was the last
342 // fragment, correct OBU size field
343 if (data
->frag_obu_size
&& (rem_pkt_size
|| !is_last_fragmented
)) {
344 if (data
->frag_lebs_res
) {
345 uint32_t final_obu_size
= data
->frag_obu_size
+ obu_size
- data
->frag_header_size
;
346 uint8_t *lebptr
= pkt
->data
+ data
->frag_pkt_leb_pos
;
347 num_lebs
= calc_leb_size(final_obu_size
);
349 // check if we had allocated enough LEB bytes in header,
350 // otherwise make some extra space
351 if (num_lebs
> data
->frag_lebs_res
) {
352 int extra_bytes
= num_lebs
- data
->frag_lebs_res
;
353 if ((result
= av_grow_packet(pkt
, extra_bytes
)) < 0)
355 // update pointer in case buffer address changed
356 lebptr
= pkt
->data
+ data
->frag_pkt_leb_pos
;
357 // move existing data for OBU back a bit
358 memmove(lebptr
+ extra_bytes
, lebptr
,
359 pkt
->size
- extra_bytes
- data
->frag_pkt_leb_pos
);
360 // move pktpos further down for following OBUs in same packet.
361 pktpos
+= extra_bytes
;
364 // update OBU size field
365 write_leb(lebptr
, final_obu_size
);
367 data
->frag_obu_size
= 0; // signal end of fragment
368 } else if (is_last_fragmented
&& !rem_pkt_size
) {
369 data
->frag_obu_size
+= obu_size
;
370 // fragment not yet finished!
375 if (!rem_pkt_size
&& num_obus
&& (num_obus
!= obu_cnt
)) {
376 av_log(ctx
, AV_LOG_WARNING
, "AV1 aggregation header indicated %u OBU elements, was %u\n",
382 if (flags
& RTP_FLAG_MARKER
) {
383 av_log(ctx
, AV_LOG_TRACE
, "TD on next packet due to marker\n");
386 // fragment may be complete, but temporal unit is not yet finished
390 if (!is_last_fragmented
) {
391 data
->frag_obu_size
= 0;
392 data
->frag_pkt_leb_pos
= 0;
395 #ifdef RTPDEC_AV1_VERBOSE_TRACE
397 av_log(ctx
, AV_LOG_TRACE
, "AV1 out pkt-size: %d\n", pkt
->size
);
398 av_hex_dump_log(ctx
, AV_LOG_TRACE
, pkt
->data
, FFMIN(pkt
->size
, 64));
399 av_log(ctx
, AV_LOG_TRACE
, "... end at offset %x:\n", FFMAX(pkt
->size
- 64, 0));
400 av_hex_dump_log(ctx
, AV_LOG_TRACE
, pkt
->data
+ FFMAX(pkt
->size
- 64, 0), FFMIN(pkt
->size
, 64));
403 pkt
->stream_index
= st
->index
;
408 data
->keyframe_seen
= 0;
409 data
->drop_fragment
= 1;
410 data
->frag_obu_size
= 0;
413 av_log(ctx
, AV_LOG_TRACE
, "Dumping current AV1 frame packet\n");
414 // we can't seem to deallocate the fragmented packet, but we can shrink it to 0
415 av_shrink_packet(pkt
, 0);
417 return AVERROR_INVALIDDATA
;
420 static void av1_close_context(PayloadContext
*data
) {
423 static int av1_need_keyframe(PayloadContext
*data
)
425 return !data
->keyframe_seen
;
428 static int parse_av1_sdp_line(AVFormatContext
*s
, int st_index
,
429 PayloadContext
*av1_data
, const char *line
) {
431 const char *p
= line
;
437 stream
= s
->streams
[st_index
];
439 /* Optional parameters are profile, level-idx, and tier.
440 * See Section 7.2.1 of https://aomediacodec.github.io/av1-rtp-spec/ */
441 if (av_strstart(p
, "fmtp:", &p
)) {
442 result
= ff_parse_fmtp(s
, stream
, av1_data
, p
, sdp_parse_fmtp_config_av1
);
443 av_log(s
, AV_LOG_DEBUG
, "RTP AV1 Profile: %u, Level: %u, Tier: %u\n",
444 av1_data
->profile
, av1_data
->level_idx
, av1_data
->tier
);
450 const RTPDynamicProtocolHandler ff_av1_dynamic_handler
= {
452 .codec_type
= AVMEDIA_TYPE_VIDEO
,
453 .codec_id
= AV_CODEC_ID_AV1
,
454 .need_parsing
= AVSTREAM_PARSE_FULL
,
455 .priv_data_size
= sizeof(PayloadContext
),
456 .parse_sdp_a_line
= parse_av1_sdp_line
,
457 .close
= av1_close_context
,
458 .parse_packet
= av1_handle_packet
,
459 .need_keyframe
= av1_need_keyframe
,