FIX: media/spiegel.lua: title pattern (BACKPORTpt4)
[libquvi-scripts.git] / share / subtitle / youtube.lua
blobe529de7f030d4981a33edfeca9274ab60210e31c
1 -- libquvi-scripts
2 -- Copyright (C) 2013 Toni Gundogdu <legatvs@gmail.com>
3 --
4 -- This file is part of libquvi-scripts <http://quvi.sourceforge.net/>.
5 --
6 -- This program is free software: you can redistribute it and/or
7 -- modify it under the terms of the GNU Affero General Public
8 -- License as published by the Free Software Foundation, either
9 -- version 3 of the License, or (at your option) any later version.
11 -- This program is distributed in the hope that it will be useful,
12 -- but WITHOUT ANY WARRANTY; without even the implied warranty of
13 -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 -- GNU Affero General Public License for more details.
16 -- You should have received a copy of the GNU Affero General
17 -- Public License along with this program. If not, see
18 -- <http://www.gnu.org/licenses/>.
21 local YouTube = {} -- Utility functions unique to this script
23 -- Identify the script.
24 function ident(qargs)
25 local Y = require 'quvi/youtube'
26 return Y.ident(qargs)
27 end
29 -- Parse subtitle properties.
30 function parse(qargs)
31 local Y = require 'quvi/youtube'
32 local C = require 'quvi/const'
34 local u = Y.normalize(qargs.input_url)
35 local v = u:match('v=([%w-_]+)')
36 or error('no match: media ID')
38 qargs.subtitles = {}
40 YouTube.tts_get(qargs, v, u, C)
41 YouTube.cc_get(qargs, v, C)
43 return qargs
44 end
47 -- Utility functions
50 -- Extract the TTS (text-to-speech, or transcript).
51 function YouTube.tts_get(qargs, v, u, C)
52 local u_fmt = "%s&tlang=%s&type=trackformat=1,&lang=en&kind=asr"
53 local p = quvi.http.fetch(u).data
55 local tts_url = p:match('[\'"]TTS_URL[\'"]:%s+[\'"](.-)[\'"]')
56 if not tts_url then return end
58 p = nil
60 tts_url = tts_url:gsub('\\u0026','&')
61 local U = require 'quvi/util'
62 tts_url = U.slash_unescape(tts_url)
63 tts_url = U.unescape(tts_url)
65 local langs = tts_url:match('asr_langs=(.-)&')
66 if not langs then return end
68 local r = {}
69 for c in langs:gmatch('(%w+)') do
70 table.insert(r, {
71 url = string.format(u_fmt, tts_url, c),
72 translated = '',
73 id = 'tts_'..c,
74 original = '',
75 code = c
77 end
79 table.insert(qargs.subtitles, {format=C.sif_tt, type=C.st_tts, lang=r})
80 end
82 -- Extract the CC (closed-captions) data.
83 function YouTube.cc_get(qargs, v, C)
84 local u_fmt =
85 "http://youtube.com/api/timedtext?hl=en&type=track&v=%s&name=%s&lang=%s"
87 local u = 'http://video.google.com/timedtext?hl=en&type=list&v=' ..v
88 local x = quvi.http.fetch(u).data
89 local L = require 'lxp.lom'
90 local t = L.parse(x)
91 local r = {}
93 for i=1, #t do
94 if t[i].tag == 'track' then
95 local lang = t[i].attr['lang_code']
96 if lang then
97 local name = t[i].attr['name'] or ''
98 table.insert(r, {
99 translated = t[i].attr['lang_translated'] or '',
100 original = t[i].attr['lang_original'] or '',
101 url = string.format(u_fmt, v, name, lang),
102 id = 'cc_'..lang,
103 code = lang
109 table.insert(qargs.subtitles, {format=C.sif_tt, type=C.st_cc, lang=r})
112 -- vim: set ts=2 sw=2 tw=72 expandtab: