tests: lib: Add subtitle framework
[libquvi-scripts.git] / share / subtitle / export / subrip.lua
blob2338b3a127916a59a29011fab009dfe31fc6f773
1 -- libquvi-scripts
2 -- Copyright (C) 2013 Toni Gundogdu <legatvs@gmail.com>
3 --
4 -- This file is part of libquvi-scripts <http://quvi.sourceforge.net/>.
5 --
6 -- This program is free software: you can redistribute it and/or
7 -- modify it under the terms of the GNU Affero General Public
8 -- License as published by the Free Software Foundation, either
9 -- version 3 of the License, or (at your option) any later version.
11 -- This program is distributed in the hope that it will be useful,
12 -- but WITHOUT ANY WARRANTY; without even the implied warranty of
13 -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 -- GNU Affero General Public License for more details.
16 -- You should have received a copy of the GNU Affero General
17 -- Public License along with this program. If not, see
18 -- <http://www.gnu.org/licenses/>.
21 --[[
22 Notes
23 * http://en.wikipedia.org/wiki/SubRip#SubRip_text_file_format
24 * Uses comma (,) for a decimal separator
25 * Uses CRLF, with LF line terminators
26 ]]--
28 local SubRip = {format='subrip'} -- Utility functions unique to this script
30 -- Identify the script.
31 function ident(qargs)
32 return {
33 can_export_data = (qargs.to_format == SubRip.format),
34 export_format = SubRip.format
36 end
38 -- Export data.
39 function export(qargs)
40 local C = require 'quvi/const'
41 if qargs.from_format == C.sif_tt then
42 return SubRip.from_tt(qargs)
43 else
44 error(string.format('unsupported subtitle format: 0x%x',
45 qargs.from_format))
46 end
47 end
50 -- Utility functions
53 -- timed-text (tt) - YouTube uses this for both CCs and TTSes.
54 function SubRip.from_tt(qargs)
56 local f = '%d\r\n%02d:%02d:%06.3f --> %02d:%02d:%06.3f\r\n%s\r\n\r\n'
57 local E = require 'quvi/entity'
58 local U = require 'quvi/util'
59 local L = require 'lxp.lom'
61 local x = quvi.fetch(qargs.input_url)
62 local t = L.parse(x)
63 local r = {}
65 local last_start = 0
68 -- NOTE: Building up a large string by concatenation will create a lot
69 -- temporary strings burdening the Lua garbage collector. The
70 -- Lua way is to put the strings into a table.
73 for i=1, #t do
74 if t[i].tag == 'text' then
75 local start = tonumber(t[i].attr['start'] or 0)
76 local dur = tonumber(t[i].attr['dur'] or (start-last_start))
77 local end_sec = tonumber(start) + dur
79 local text = U.trim( E.convert_html(t[i][1]) )
81 local start_tc = SubRip.to_timecode(start)
82 local end_tc = SubRip.to_timecode(end_sec)
84 local s = string.format(f, i, start_tc.hh, start_tc.mm, start_tc.ss,
85 end_tc.hh, end_tc.mm, end_tc.ss, text)
87 -- Use comma for a decimal separator.
88 table.insert(r, (s:gsub('(%d+)%.(%d+)', '%1,%2')))
89 last_start = start
90 end
91 end
92 qargs.data = table.concat(r, '')
93 return qargs
94 end
96 function SubRip.to_timecode(s)
97 return {hh=(s/3600)%60, mm=(s/60)%60, ss=s%60}
98 end
100 -- vim: set ts=2 sw=2 tw=72 expandtab: