2 * TED Talks captions format decoder
3 * Copyright (c) 2012 Nicolas George
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "libavutil/bprint.h"
23 #include "libavutil/log.h"
24 #include "libavutil/opt.h"
27 #include "subtitles.h"
32 FFDemuxSubtitlesQueue subs
;
35 static const AVOption tedcaptions_options
[] = {
36 { "start_time", "set the start time (offset) of the subtitles, in ms",
37 offsetof(TEDCaptionsDemuxer
, start_time
), AV_OPT_TYPE_INT64
,
38 { .i64
= 15000 }, INT64_MIN
, INT64_MAX
,
39 AV_OPT_FLAG_SUBTITLE_PARAM
| AV_OPT_FLAG_DECODING_PARAM
},
43 static const AVClass tedcaptions_demuxer_class
= {
44 .class_name
= "tedcaptions_demuxer",
45 .item_name
= av_default_item_name
,
46 .option
= tedcaptions_options
,
47 .version
= LIBAVUTIL_VERSION_INT
,
50 #define BETWEEN(a, amin, amax) ((unsigned)((a) - (amin)) <= (amax) - (amin))
52 #define HEX_DIGIT_TEST(c) (BETWEEN(c, '0', '9') || BETWEEN((c) | 32, 'a', 'z'))
53 #define HEX_DIGIT_VAL(c) ((c) <= '9' ? (c) - '0' : ((c) | 32) - 'a' + 10)
54 #define ERR_CODE(c) ((c) < 0 ? (c) : AVERROR_INVALIDDATA)
56 static void av_bprint_utf8(AVBPrint
*bp
, unsigned c
)
61 av_bprint_chars(bp
, c
, 1);
64 bytes
= (av_log2(c
) - 2) / 5;
65 av_bprint_chars(bp
, (c
>> (bytes
* 6)) | ((0xFF80 >> bytes
) & 0xFF), 1);
66 for (i
= bytes
- 1; i
>= 0; i
--)
67 av_bprint_chars(bp
, ((c
>> (i
* 6)) & 0x3F) | 0x80, 1);
70 static void next_byte(AVIOContext
*pb
, int *cur_byte
)
73 int ret
= avio_read(pb
, &b
, 1);
74 *cur_byte
= ret
> 0 ? b
: ret
== 0 ? AVERROR_EOF
: ret
;
77 static void skip_spaces(AVIOContext
*pb
, int *cur_byte
)
79 while (*cur_byte
== ' ' || *cur_byte
== '\t' ||
80 *cur_byte
== '\n' || *cur_byte
== '\r')
81 next_byte(pb
, cur_byte
);
84 static int expect_byte(AVIOContext
*pb
, int *cur_byte
, uint8_t c
)
86 skip_spaces(pb
, cur_byte
);
88 return ERR_CODE(*cur_byte
);
89 next_byte(pb
, cur_byte
);
93 static int parse_string(AVIOContext
*pb
, int *cur_byte
, AVBPrint
*bp
, int full
)
97 av_bprint_init(bp
, 0, full
? -1 : 1);
98 ret
= expect_byte(pb
, cur_byte
, '"');
101 while (*cur_byte
> 0 && *cur_byte
!= '"') {
102 if (*cur_byte
== '\\') {
103 next_byte(pb
, cur_byte
);
105 ret
= AVERROR_INVALIDDATA
;
108 if ((*cur_byte
| 32) == 'u') {
110 for (i
= 0; i
< 4; i
++) {
111 next_byte(pb
, cur_byte
);
112 if (!HEX_DIGIT_TEST(*cur_byte
)) {
113 ret
= ERR_CODE(*cur_byte
);
116 chr
= chr
* 16 + HEX_DIGIT_VAL(*cur_byte
);
118 av_bprint_utf8(bp
, chr
);
120 av_bprint_chars(bp
, *cur_byte
, 1);
123 av_bprint_chars(bp
, *cur_byte
, 1);
125 next_byte(pb
, cur_byte
);
127 ret
= expect_byte(pb
, cur_byte
, '"');
130 if (full
&& !av_bprint_is_complete(bp
)) {
131 ret
= AVERROR(ENOMEM
);
137 av_bprint_finalize(bp
, NULL
);
141 static int parse_label(AVIOContext
*pb
, int *cur_byte
, AVBPrint
*bp
)
145 ret
= parse_string(pb
, cur_byte
, bp
, 0);
148 ret
= expect_byte(pb
, cur_byte
, ':');
154 static int parse_boolean(AVIOContext
*pb
, int *cur_byte
, int *result
)
156 static const char * const text
[] = { "false", "true" };
160 skip_spaces(pb
, cur_byte
);
161 for (i
= 0; i
< 2; i
++) {
165 for (; *p
; p
++, next_byte(pb
, cur_byte
))
167 return AVERROR_INVALIDDATA
;
168 if (BETWEEN(*cur_byte
| 32, 'a', 'z'))
169 return AVERROR_INVALIDDATA
;
173 return AVERROR_INVALIDDATA
;
176 static int parse_int(AVIOContext
*pb
, int *cur_byte
, int64_t *result
)
180 skip_spaces(pb
, cur_byte
);
181 if ((unsigned)*cur_byte
- '0' > 9)
182 return AVERROR_INVALIDDATA
;
183 while (BETWEEN(*cur_byte
, '0', '9')) {
184 if (val
> INT_MAX
/10 - (*cur_byte
- '0'))
185 return AVERROR_INVALIDDATA
;
186 val
= val
* 10 + (*cur_byte
- '0');
187 next_byte(pb
, cur_byte
);
193 static int parse_file(AVIOContext
*pb
, FFDemuxSubtitlesQueue
*subs
)
195 int ret
, cur_byte
, start_of_par
;
196 AVBPrint label
, content
;
197 int64_t pos
, start
, duration
;
200 next_byte(pb
, &cur_byte
);
201 ret
= expect_byte(pb
, &cur_byte
, '{');
203 return AVERROR_INVALIDDATA
;
204 ret
= parse_label(pb
, &cur_byte
, &label
);
205 if (ret
< 0 || strcmp(label
.str
, "captions"))
206 return AVERROR_INVALIDDATA
;
207 ret
= expect_byte(pb
, &cur_byte
, '[');
209 return AVERROR_INVALIDDATA
;
212 start
= duration
= AV_NOPTS_VALUE
;
213 ret
= expect_byte(pb
, &cur_byte
, '{');
216 pos
= avio_tell(pb
) - 1;
218 ret
= parse_label(pb
, &cur_byte
, &label
);
221 if (!strcmp(label
.str
, "startOfParagraph")) {
222 ret
= parse_boolean(pb
, &cur_byte
, &start_of_par
);
225 } else if (!strcmp(label
.str
, "content")) {
226 ret
= parse_string(pb
, &cur_byte
, &content
, 1);
229 } else if (!strcmp(label
.str
, "startTime")) {
230 ret
= parse_int(pb
, &cur_byte
, &start
);
233 } else if (!strcmp(label
.str
, "duration")) {
234 ret
= parse_int(pb
, &cur_byte
, &duration
);
238 return AVERROR_INVALIDDATA
;
240 skip_spaces(pb
, &cur_byte
);
243 next_byte(pb
, &cur_byte
);
245 ret
= expect_byte(pb
, &cur_byte
, '}');
249 if (!content
.size
|| start
== AV_NOPTS_VALUE
||
250 duration
== AV_NOPTS_VALUE
)
251 return AVERROR_INVALIDDATA
;
252 pkt
= ff_subtitles_queue_insert(subs
, content
.str
, content
.len
, 0);
254 return AVERROR(ENOMEM
);
257 pkt
->duration
= duration
;
258 av_bprint_finalize(&content
, NULL
);
260 skip_spaces(pb
, &cur_byte
);
263 next_byte(pb
, &cur_byte
);
265 ret
= expect_byte(pb
, &cur_byte
, ']');
268 ret
= expect_byte(pb
, &cur_byte
, '}');
271 skip_spaces(pb
, &cur_byte
);
272 if (cur_byte
!= AVERROR_EOF
)
273 return ERR_CODE(cur_byte
);
277 static av_cold
int tedcaptions_read_header(AVFormatContext
*avf
)
279 TEDCaptionsDemuxer
*tc
= avf
->priv_data
;
280 AVStream
*st
= avformat_new_stream(avf
, NULL
);
285 return AVERROR(ENOMEM
);
287 ret
= parse_file(avf
->pb
, &tc
->subs
);
289 if (ret
== AVERROR_INVALIDDATA
)
290 av_log(avf
, AV_LOG_ERROR
, "Syntax error near offset %"PRId64
".\n",
292 ff_subtitles_queue_clean(&tc
->subs
);
295 ff_subtitles_queue_finalize(avf
, &tc
->subs
);
296 for (i
= 0; i
< tc
->subs
.nb_subs
; i
++)
297 tc
->subs
.subs
[i
].pts
+= tc
->start_time
;
299 last
= &tc
->subs
.subs
[tc
->subs
.nb_subs
- 1];
300 st
->codecpar
->codec_type
= AVMEDIA_TYPE_SUBTITLE
;
301 st
->codecpar
->codec_id
= AV_CODEC_ID_TEXT
;
302 avpriv_set_pts_info(st
, 64, 1, 1000);
303 st
->probe_packets
= 0;
305 st
->duration
= last
->pts
+ last
->duration
;
311 static int tedcaptions_read_packet(AVFormatContext
*avf
, AVPacket
*packet
)
313 TEDCaptionsDemuxer
*tc
= avf
->priv_data
;
315 return ff_subtitles_queue_read_packet(&tc
->subs
, packet
);
318 static int tedcaptions_read_close(AVFormatContext
*avf
)
320 TEDCaptionsDemuxer
*tc
= avf
->priv_data
;
322 ff_subtitles_queue_clean(&tc
->subs
);
326 static av_cold
int tedcaptions_read_probe(AVProbeData
*p
)
328 static const char *const tags
[] = {
329 "\"captions\"", "\"duration\"", "\"content\"",
330 "\"startOfParagraph\"", "\"startTime\"",
332 unsigned i
, count
= 0;
335 if (p
->buf
[strspn(p
->buf
, " \t\r\n")] != '{')
337 for (i
= 0; i
< FF_ARRAY_ELEMS(tags
); i
++) {
338 if (!(t
= strstr(p
->buf
, tags
[i
])))
340 t
+= strlen(tags
[i
]);
341 t
+= strspn(t
, " \t\r\n");
345 return count
== FF_ARRAY_ELEMS(tags
) ? AVPROBE_SCORE_MAX
:
346 count
? AVPROBE_SCORE_EXTENSION
: 0;
349 static int tedcaptions_read_seek(AVFormatContext
*avf
, int stream_index
,
350 int64_t min_ts
, int64_t ts
, int64_t max_ts
,
353 TEDCaptionsDemuxer
*tc
= avf
->priv_data
;
354 return ff_subtitles_queue_seek(&tc
->subs
, avf
, stream_index
,
355 min_ts
, ts
, max_ts
, flags
);
358 AVInputFormat ff_tedcaptions_demuxer
= {
359 .name
= "tedcaptions",
360 .long_name
= NULL_IF_CONFIG_SMALL("TED Talks captions"),
361 .priv_data_size
= sizeof(TEDCaptionsDemuxer
),
362 .priv_class
= &tedcaptions_demuxer_class
,
363 .read_header
= tedcaptions_read_header
,
364 .read_packet
= tedcaptions_read_packet
,
365 .read_close
= tedcaptions_read_close
,
366 .read_probe
= tedcaptions_read_probe
,
367 .read_seek2
= tedcaptions_read_seek
,