2 * WebM DASH Manifest XML muxer
3 * Copyright (c) 2014 Vignesh Venkatasubramanian
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * WebM DASH Specification:
24 * https://sites.google.com/a/webmproject.org/wiki/adaptive-streaming/webm-dash-specification
25 * ISO DASH Specification:
26 * http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
38 #include "libavutil/avstring.h"
39 #include "libavutil/dict.h"
40 #include "libavutil/mem.h"
41 #include "libavutil/opt.h"
42 #include "libavutil/time_internal.h"
44 #include "libavcodec/codec_desc.h"
46 typedef struct AdaptationSet
{
52 typedef struct WebMDashMuxContext
{
54 char *adaptation_sets
;
57 int representation_id
;
59 int chunk_start_index
;
62 double time_shift_buffer_depth
;
63 int minimum_update_period
;
66 static const char *get_codec_name(int codec_id
)
68 return avcodec_descriptor_get(codec_id
)->name
;
71 static double get_duration(AVFormatContext
*s
)
75 for (i
= 0; i
< s
->nb_streams
; i
++) {
76 AVDictionaryEntry
*duration
= av_dict_get(s
->streams
[i
]->metadata
,
78 if (!duration
|| atof(duration
->value
) < 0) continue;
79 if (atof(duration
->value
) > max
) max
= atof(duration
->value
);
84 static int write_header(AVFormatContext
*s
)
86 WebMDashMuxContext
*w
= s
->priv_data
;
87 AVIOContext
*pb
= s
->pb
;
88 double min_buffer_time
= 1.0;
89 avio_printf(pb
, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
90 avio_printf(pb
, "<MPD\n");
91 avio_printf(pb
, " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n");
92 avio_printf(pb
, " xmlns=\"urn:mpeg:DASH:schema:MPD:2011\"\n");
93 avio_printf(pb
, " xsi:schemaLocation=\"urn:mpeg:DASH:schema:MPD:2011\"\n");
94 avio_printf(pb
, " type=\"%s\"\n", w
->is_live
? "dynamic" : "static");
96 avio_printf(pb
, " mediaPresentationDuration=\"PT%gS\"\n",
99 avio_printf(pb
, " minBufferTime=\"PT%gS\"\n", min_buffer_time
);
100 avio_printf(pb
, " profiles=\"%s\"%s",
101 w
->is_live
? "urn:mpeg:dash:profile:isoff-live:2011" : "urn:mpeg:dash:profile:webm-on-demand:2012",
102 w
->is_live
? "\n" : ">\n");
104 time_t local_time
= time(NULL
);
105 struct tm gmt_buffer
;
106 struct tm
*gmt
= gmtime_r(&local_time
, &gmt_buffer
);
108 if (!strftime(gmt_iso
, 21, "%Y-%m-%dT%H:%M:%SZ", gmt
)) {
109 return AVERROR_UNKNOWN
;
111 if (s
->flags
& AVFMT_FLAG_BITEXACT
) {
112 av_strlcpy(gmt_iso
, "", 1);
114 avio_printf(pb
, " availabilityStartTime=\"%s\"\n", gmt_iso
);
115 avio_printf(pb
, " timeShiftBufferDepth=\"PT%gS\"\n", w
->time_shift_buffer_depth
);
116 avio_printf(pb
, " minimumUpdatePeriod=\"PT%dS\"", w
->minimum_update_period
);
117 avio_printf(pb
, ">\n");
118 if (w
->utc_timing_url
) {
119 avio_printf(pb
, "<UTCTiming\n");
120 avio_printf(pb
, " schemeIdUri=\"urn:mpeg:dash:utc:http-iso:2014\"\n");
121 avio_printf(pb
, " value=\"%s\"/>\n", w
->utc_timing_url
);
127 static void write_footer(AVFormatContext
*s
)
129 avio_printf(s
->pb
, "</MPD>\n");
132 static int subsegment_alignment(AVFormatContext
*s
, const AdaptationSet
*as
)
135 AVDictionaryEntry
*gold
= av_dict_get(s
->streams
[as
->streams
[0]]->metadata
,
136 CUE_TIMESTAMPS
, NULL
, 0);
138 for (i
= 1; i
< as
->nb_streams
; i
++) {
139 AVDictionaryEntry
*ts
= av_dict_get(s
->streams
[as
->streams
[i
]]->metadata
,
140 CUE_TIMESTAMPS
, NULL
, 0);
141 if (!ts
|| !av_strstart(ts
->value
, gold
->value
, NULL
)) return 0;
146 static int bitstream_switching(AVFormatContext
*s
, const AdaptationSet
*as
)
149 const AVStream
*gold_st
= s
->streams
[as
->streams
[0]];
150 AVDictionaryEntry
*gold_track_num
= av_dict_get(gold_st
->metadata
,
151 TRACK_NUMBER
, NULL
, 0);
152 AVCodecParameters
*gold_par
= gold_st
->codecpar
;
153 if (!gold_track_num
) return 0;
154 for (i
= 1; i
< as
->nb_streams
; i
++) {
155 const AVStream
*st
= s
->streams
[as
->streams
[i
]];
156 AVDictionaryEntry
*track_num
= av_dict_get(st
->metadata
,
157 TRACK_NUMBER
, NULL
, 0);
158 AVCodecParameters
*par
= st
->codecpar
;
160 !av_strstart(track_num
->value
, gold_track_num
->value
, NULL
) ||
161 gold_par
->codec_id
!= par
->codec_id
||
162 gold_par
->extradata_size
!= par
->extradata_size
||
163 (par
->extradata_size
> 0 &&
164 memcmp(gold_par
->extradata
, par
->extradata
, par
->extradata_size
))) {
172 * Writes a Representation within an Adaptation Set. Returns 0 on success and
175 static int write_representation(AVFormatContext
*s
, AVStream
*st
, char *id
,
176 int output_width
, int output_height
,
177 int output_sample_rate
)
179 WebMDashMuxContext
*w
= s
->priv_data
;
180 AVIOContext
*pb
= s
->pb
;
181 const AVCodecParameters
*par
= st
->codecpar
;
182 AVDictionaryEntry
*bandwidth
= av_dict_get(st
->metadata
, BANDWIDTH
, NULL
, 0);
183 const char *bandwidth_str
;
184 avio_printf(pb
, "<Representation id=\"%s\"", id
);
186 bandwidth_str
= bandwidth
->value
;
187 } else if (w
->is_live
) {
188 // if bandwidth for live was not provided, use a default
189 bandwidth_str
= (par
->codec_type
== AVMEDIA_TYPE_AUDIO
) ? "128000" : "1000000";
191 return AVERROR(EINVAL
);
193 avio_printf(pb
, " bandwidth=\"%s\"", bandwidth_str
);
194 if (par
->codec_type
== AVMEDIA_TYPE_VIDEO
&& output_width
)
195 avio_printf(pb
, " width=\"%d\"", par
->width
);
196 if (par
->codec_type
== AVMEDIA_TYPE_VIDEO
&& output_height
)
197 avio_printf(pb
, " height=\"%d\"", par
->height
);
198 if (par
->codec_type
== AVMEDIA_TYPE_AUDIO
&& output_sample_rate
)
199 avio_printf(pb
, " audioSamplingRate=\"%d\"", par
->sample_rate
);
201 // For live streams, Codec and Mime Type always go in the Representation tag.
202 avio_printf(pb
, " codecs=\"%s\"", get_codec_name(par
->codec_id
));
203 avio_printf(pb
, " mimeType=\"%s/webm\"",
204 par
->codec_type
== AVMEDIA_TYPE_VIDEO
? "video" : "audio");
205 // For live streams, subsegments always start with key frames. So this
207 avio_printf(pb
, " startsWithSAP=\"1\"");
208 avio_printf(pb
, ">");
210 AVDictionaryEntry
*irange
= av_dict_get(st
->metadata
, INITIALIZATION_RANGE
, NULL
, 0);
211 AVDictionaryEntry
*cues_start
= av_dict_get(st
->metadata
, CUES_START
, NULL
, 0);
212 AVDictionaryEntry
*cues_end
= av_dict_get(st
->metadata
, CUES_END
, NULL
, 0);
213 AVDictionaryEntry
*filename
= av_dict_get(st
->metadata
, FILENAME
, NULL
, 0);
214 if (!irange
|| !cues_start
|| !cues_end
|| !filename
)
215 return AVERROR(EINVAL
);
217 avio_printf(pb
, ">\n");
218 avio_printf(pb
, "<BaseURL>%s</BaseURL>\n", filename
->value
);
219 avio_printf(pb
, "<SegmentBase\n");
220 avio_printf(pb
, " indexRange=\"%s-%s\">\n", cues_start
->value
, cues_end
->value
);
221 avio_printf(pb
, "<Initialization\n");
222 avio_printf(pb
, " range=\"0-%s\" />\n", irange
->value
);
223 avio_printf(pb
, "</SegmentBase>\n");
225 avio_printf(pb
, "</Representation>\n");
230 * Checks if width of all streams are the same. Returns 1 if true, 0 otherwise.
232 static int check_matching_width(AVFormatContext
*s
, const AdaptationSet
*as
)
235 if (as
->nb_streams
< 2) return 1;
236 first_width
= s
->streams
[as
->streams
[0]]->codecpar
->width
;
237 for (i
= 1; i
< as
->nb_streams
; i
++)
238 if (first_width
!= s
->streams
[as
->streams
[i
]]->codecpar
->width
)
244 * Checks if height of all streams are the same. Returns 1 if true, 0 otherwise.
246 static int check_matching_height(AVFormatContext
*s
, const AdaptationSet
*as
)
249 if (as
->nb_streams
< 2) return 1;
250 first_height
= s
->streams
[as
->streams
[0]]->codecpar
->height
;
251 for (i
= 1; i
< as
->nb_streams
; i
++)
252 if (first_height
!= s
->streams
[as
->streams
[i
]]->codecpar
->height
)
258 * Checks if sample rate of all streams are the same. Returns 1 if true, 0 otherwise.
260 static int check_matching_sample_rate(AVFormatContext
*s
, const AdaptationSet
*as
)
262 int first_sample_rate
, i
;
263 if (as
->nb_streams
< 2) return 1;
264 first_sample_rate
= s
->streams
[as
->streams
[0]]->codecpar
->sample_rate
;
265 for (i
= 1; i
< as
->nb_streams
; i
++)
266 if (first_sample_rate
!= s
->streams
[as
->streams
[i
]]->codecpar
->sample_rate
)
271 static void free_adaptation_sets(AVFormatContext
*s
)
273 WebMDashMuxContext
*w
= s
->priv_data
;
275 for (i
= 0; i
< w
->nb_as
; i
++) {
276 av_freep(&w
->as
[i
].streams
);
283 * Parses a live header filename and returns the position of the '_' and '.'
284 * delimiting <file_description> and <representation_id>.
286 * Name of the header file should conform to the following pattern:
287 * <file_description>_<representation_id>.hdr where <file_description> can be
288 * anything. The chunks should be named according to the following pattern:
289 * <file_description>_<representation_id>_<chunk_number>.chk
291 static int split_filename(char *filename
, char **underscore_pos
,
294 *underscore_pos
= strrchr(filename
, '_');
295 if (!*underscore_pos
)
296 return AVERROR(EINVAL
);
297 *period_pos
= strchr(*underscore_pos
, '.');
299 return AVERROR(EINVAL
);
304 * Writes an Adaptation Set. Returns 0 on success and < 0 on failure.
306 static int write_adaptation_set(AVFormatContext
*s
, int as_index
)
308 WebMDashMuxContext
*w
= s
->priv_data
;
309 AdaptationSet
*as
= &w
->as
[as_index
];
310 const AVStream
*st
= s
->streams
[as
->streams
[0]];
311 AVCodecParameters
*par
= st
->codecpar
;
312 AVDictionaryEntry
*lang
;
313 AVIOContext
*pb
= s
->pb
;
315 static const char boolean
[2][6] = { "false", "true" };
316 int subsegmentStartsWithSAP
= 1;
318 // Width, Height and Sample Rate will go in the AdaptationSet tag if they
319 // are the same for all contained Representations. otherwise, they will go
320 // on their respective Representation tag. For live streams, they always go
321 // in the Representation tag.
322 int width_in_as
= 1, height_in_as
= 1, sample_rate_in_as
= 1;
323 if (par
->codec_type
== AVMEDIA_TYPE_VIDEO
) {
324 width_in_as
= !w
->is_live
&& check_matching_width (s
, as
);
325 height_in_as
= !w
->is_live
&& check_matching_height(s
, as
);
327 sample_rate_in_as
= !w
->is_live
&& check_matching_sample_rate(s
, as
);
330 avio_printf(pb
, "<AdaptationSet id=\"%s\"", as
->id
);
331 avio_printf(pb
, " mimeType=\"%s/webm\"",
332 par
->codec_type
== AVMEDIA_TYPE_VIDEO
? "video" : "audio");
333 avio_printf(pb
, " codecs=\"%s\"", get_codec_name(par
->codec_id
));
335 lang
= av_dict_get(st
->metadata
, "language", NULL
, 0);
337 avio_printf(pb
, " lang=\"%s\"", lang
->value
);
339 if (par
->codec_type
== AVMEDIA_TYPE_VIDEO
&& width_in_as
)
340 avio_printf(pb
, " width=\"%d\"", par
->width
);
341 if (par
->codec_type
== AVMEDIA_TYPE_VIDEO
&& height_in_as
)
342 avio_printf(pb
, " height=\"%d\"", par
->height
);
343 if (par
->codec_type
== AVMEDIA_TYPE_AUDIO
&& sample_rate_in_as
)
344 avio_printf(pb
, " audioSamplingRate=\"%d\"", par
->sample_rate
);
346 avio_printf(pb
, " bitstreamSwitching=\"%s\"",
347 boolean
[bitstream_switching(s
, as
)]);
348 avio_printf(pb
, " subsegmentAlignment=\"%s\"",
349 boolean
[w
->is_live
|| subsegment_alignment(s
, as
)]);
351 for (i
= 0; i
< as
->nb_streams
; i
++) {
352 AVDictionaryEntry
*kf
= av_dict_get(s
->streams
[as
->streams
[i
]]->metadata
,
353 CLUSTER_KEYFRAME
, NULL
, 0);
354 if (!w
->is_live
&& (!kf
|| !strncmp(kf
->value
, "0", 1))) subsegmentStartsWithSAP
= 0;
356 avio_printf(pb
, " subsegmentStartsWithSAP=\"%d\"", subsegmentStartsWithSAP
);
357 avio_printf(pb
, ">\n");
360 AVDictionaryEntry
*filename
=
361 av_dict_get(st
->metadata
, FILENAME
, NULL
, 0);
362 char *underscore_pos
, *period_pos
;
365 return AVERROR(EINVAL
);
366 ret
= split_filename(filename
->value
, &underscore_pos
, &period_pos
);
368 *underscore_pos
= '\0';
369 avio_printf(pb
, "<ContentComponent id=\"1\" type=\"%s\"/>\n",
370 par
->codec_type
== AVMEDIA_TYPE_VIDEO
? "video" : "audio");
371 avio_printf(pb
, "<SegmentTemplate");
372 avio_printf(pb
, " timescale=\"1000\"");
373 avio_printf(pb
, " duration=\"%d\"", w
->chunk_duration
);
374 avio_printf(pb
, " media=\"%s_$RepresentationID$_$Number$.chk\"",
376 avio_printf(pb
, " startNumber=\"%d\"", w
->chunk_start_index
);
377 avio_printf(pb
, " initialization=\"%s_$RepresentationID$.hdr\"",
379 avio_printf(pb
, "/>\n");
380 *underscore_pos
= '_';
383 for (i
= 0; i
< as
->nb_streams
; i
++) {
384 char buf
[25], *representation_id
= buf
, *underscore_pos
, *period_pos
;
385 AVStream
*st
= s
->streams
[as
->streams
[i
]];
388 AVDictionaryEntry
*filename
=
389 av_dict_get(st
->metadata
, FILENAME
, NULL
, 0);
391 return AVERROR(EINVAL
);
392 ret
= split_filename(filename
->value
, &underscore_pos
, &period_pos
);
395 representation_id
= underscore_pos
+ 1;
398 snprintf(buf
, sizeof(buf
), "%d", w
->representation_id
++);
400 ret
= write_representation(s
, st
, representation_id
, !width_in_as
,
401 !height_in_as
, !sample_rate_in_as
);
406 avio_printf(s
->pb
, "</AdaptationSet>\n");
410 static int parse_adaptation_sets(AVFormatContext
*s
)
412 WebMDashMuxContext
*w
= s
->priv_data
;
413 char *p
= w
->adaptation_sets
;
415 enum { new_set
, parsed_id
, parsing_streams
} state
;
416 if (!w
->adaptation_sets
) {
417 av_log(s
, AV_LOG_ERROR
, "The 'adaptation_sets' option must be set.\n");
418 return AVERROR(EINVAL
);
420 // syntax id=0,streams=0,1,2 id=1,streams=3,4 and so on
424 if (state
== new_set
)
427 return AVERROR(EINVAL
);
428 } else if (state
== new_set
&& *p
== ' ') {
431 } else if (state
== new_set
&& !strncmp(p
, "id=", 3)) {
432 void *mem
= av_realloc(w
->as
, sizeof(*w
->as
) * (w
->nb_as
+ 1));
435 return AVERROR(ENOMEM
);
438 w
->as
[w
->nb_as
- 1].nb_streams
= 0;
439 w
->as
[w
->nb_as
- 1].streams
= NULL
;
440 p
+= 3; // consume "id="
441 q
= w
->as
[w
->nb_as
- 1].id
;
442 comma
= strchr(p
, ',');
443 if (!comma
|| comma
- p
>= sizeof(w
->as
[w
->nb_as
- 1].id
)) {
444 av_log(s
, AV_LOG_ERROR
, "'id' in 'adaptation_sets' is malformed.\n");
445 return AVERROR(EINVAL
);
447 while (*p
!= ',') *q
++ = *p
++;
451 } else if (state
== parsed_id
&& !strncmp(p
, "streams=", 8)) {
452 p
+= 8; // consume "streams="
453 state
= parsing_streams
;
454 } else if (state
== parsing_streams
) {
455 struct AdaptationSet
*as
= &w
->as
[w
->nb_as
- 1];
457 int ret
= av_reallocp_array(&as
->streams
, ++as
->nb_streams
,
458 sizeof(*as
->streams
));
461 num
= strtoll(p
, &q
, 10);
462 if (!av_isdigit(*p
) || (*q
!= ' ' && *q
!= '\0' && *q
!= ',') ||
463 num
< 0 || num
>= s
->nb_streams
) {
464 av_log(s
, AV_LOG_ERROR
, "Invalid value for 'streams' in adapation_sets.\n");
465 return AVERROR(EINVAL
);
467 as
->streams
[as
->nb_streams
- 1] = num
;
468 if (*q
== '\0') break;
469 if (*q
== ' ') state
= new_set
;
478 static int webm_dash_manifest_write_header(AVFormatContext
*s
)
483 WebMDashMuxContext
*w
= s
->priv_data
;
485 for (unsigned i
= 0; i
< s
->nb_streams
; i
++) {
486 enum AVCodecID codec_id
= s
->streams
[i
]->codecpar
->codec_id
;
487 if (codec_id
!= AV_CODEC_ID_VP8
&& codec_id
!= AV_CODEC_ID_VP9
&&
488 codec_id
!= AV_CODEC_ID_AV1
&& codec_id
!= AV_CODEC_ID_VORBIS
&&
489 codec_id
!= AV_CODEC_ID_OPUS
)
490 return AVERROR(EINVAL
);
493 ret
= parse_adaptation_sets(s
);
497 ret
= write_header(s
);
501 avio_printf(s
->pb
, "<Period id=\"0\"");
502 avio_printf(s
->pb
, " start=\"PT%gS\"", start
);
504 avio_printf(s
->pb
, " duration=\"PT%gS\"", get_duration(s
));
506 avio_printf(s
->pb
, " >\n");
508 for (i
= 0; i
< w
->nb_as
; i
++) {
509 ret
= write_adaptation_set(s
, i
);
515 avio_printf(s
->pb
, "</Period>\n");
518 free_adaptation_sets(s
);
519 return ret
< 0 ? ret
: 0;
522 static int webm_dash_manifest_write_packet(AVFormatContext
*s
, AVPacket
*pkt
)
527 #define OFFSET(x) offsetof(WebMDashMuxContext, x)
528 static const AVOption options
[] = {
529 { "adaptation_sets", "Adaptation sets. Syntax: id=0,streams=0,1,2 id=1,streams=3,4 and so on", OFFSET(adaptation_sets
), AV_OPT_TYPE_STRING
, { 0 }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM
},
530 { "live", "create a live stream manifest", OFFSET(is_live
), AV_OPT_TYPE_BOOL
, {.i64
= 0}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM
},
531 { "chunk_start_index", "start index of the chunk", OFFSET(chunk_start_index
), AV_OPT_TYPE_INT
, {.i64
= 0}, 0, INT_MAX
, AV_OPT_FLAG_ENCODING_PARAM
},
532 { "chunk_duration_ms", "duration of each chunk (in milliseconds)", OFFSET(chunk_duration
), AV_OPT_TYPE_INT
, {.i64
= 1000}, 0, INT_MAX
, AV_OPT_FLAG_ENCODING_PARAM
},
533 { "utc_timing_url", "URL of the page that will return the UTC timestamp in ISO format", OFFSET(utc_timing_url
), AV_OPT_TYPE_STRING
, { 0 }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM
},
534 { "time_shift_buffer_depth", "Smallest time (in seconds) shifting buffer for which any Representation is guaranteed to be available.", OFFSET(time_shift_buffer_depth
), AV_OPT_TYPE_DOUBLE
, { .dbl
= 60.0 }, 1.0, DBL_MAX
, AV_OPT_FLAG_ENCODING_PARAM
},
535 { "minimum_update_period", "Minimum Update Period (in seconds) of the manifest.", OFFSET(minimum_update_period
), AV_OPT_TYPE_INT
, { .i64
= 0 }, 0, INT_MAX
, AV_OPT_FLAG_ENCODING_PARAM
},
539 static const AVClass webm_dash_class
= {
540 .class_name
= "WebM DASH Manifest muxer",
541 .item_name
= av_default_item_name
,
543 .version
= LIBAVUTIL_VERSION_INT
,
546 const FFOutputFormat ff_webm_dash_manifest_muxer
= {
547 .p
.name
= "webm_dash_manifest",
548 .p
.long_name
= NULL_IF_CONFIG_SMALL("WebM DASH Manifest"),
549 .p
.mime_type
= "application/xml",
550 .p
.extensions
= "xml",
551 .priv_data_size
= sizeof(WebMDashMuxContext
),
552 .write_header
= webm_dash_manifest_write_header
,
553 .write_packet
= webm_dash_manifest_write_packet
,
554 .p
.priv_class
= &webm_dash_class
,