subtitle/youtube.lua: Use HTTPS if input URL uses it
[libquvi-scripts.git] / share / subtitle / youtube.lua
blob7ab1e692cd3f4335533a2320e2d424af89e0c975
1 -- libquvi-scripts
2 -- Copyright (C) 2013 Toni Gundogdu <legatvs@gmail.com>
3 --
4 -- This file is part of libquvi-scripts <http://quvi.sourceforge.net/>.
5 --
6 -- This program is free software: you can redistribute it and/or
7 -- modify it under the terms of the GNU Affero General Public
8 -- License as published by the Free Software Foundation, either
9 -- version 3 of the License, or (at your option) any later version.
11 -- This program is distributed in the hope that it will be useful,
12 -- but WITHOUT ANY WARRANTY; without even the implied warranty of
13 -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 -- GNU Affero General Public License for more details.
16 -- You should have received a copy of the GNU Affero General
17 -- Public License along with this program. If not, see
18 -- <http://www.gnu.org/licenses/>.
21 local YouTube = {} -- Utility functions unique to this script
23 -- Identify the script.
24 function ident(qargs)
25 local Y = require 'quvi/youtube'
26 return Y.ident(qargs)
27 end
29 -- Parse subtitle properties.
30 function parse(qargs)
31 local Y = require 'quvi/youtube'
32 local C = require 'quvi/const'
34 local u = Y.normalize(qargs.input_url)
35 local v = u:match('v=([%w-_]+)') or error('no match: media ID')
37 qargs.subtitles = {}
39 YouTube.tts_get(qargs, v, u, C)
40 YouTube.cc_get(qargs, v, C)
42 return qargs
43 end
46 -- Utility functions
49 -- Extract the TTS (text-to-speech, or transcript).
50 function YouTube.tts_get(qargs, v, u, C)
51 local u_fmt = "%s&tlang=%s&type=trackformat=1,&lang=en&kind=asr"
52 local p = quvi.http.fetch(u).data
54 local tts_url = p:match('[\'"]TTS_URL[\'"]:%s+[\'"](.-)[\'"]')
55 if not tts_url then return end
57 p = nil
59 tts_url = tts_url:gsub('\\u0026','&')
60 local U = require 'quvi/util'
61 tts_url = U.slash_unescape(tts_url)
62 tts_url = U.unescape(tts_url)
64 local langs = tts_url:match('asr_langs=(.-)&')
65 if not langs then return end
67 local r = {}
68 for c in langs:gmatch('(%w+)') do
69 table.insert(r, {
70 url = string.format(u_fmt, tts_url, c),
71 translated = '',
72 id = 'tts_'..c,
73 original = '',
74 code = c
76 end
78 table.insert(qargs.subtitles, {format=C.sif_tt, type=C.st_tts, lang=r})
79 end
81 -- Return a new timed-text track URL.
82 function YouTube.tt_track_new(scheme, v, name, lang)
83 local t = {
84 scheme, '://youtube.com/api/timedtext?hl=en&type=track',
85 '&v=', v, '&name=', name, '&lang=', lang
87 return table.concat(t,'')
88 end
90 -- Return a new timed-text list URL.
91 function YouTube.tt_list_new(scheme, v)
92 local t = {scheme, '://video.google.com/timedtext?hl=en&type=list&v=', v}
93 return table.concat(t,'')
94 end
96 -- Extract the CC (closed-captions) data.
97 function YouTube.cc_get(qargs, v, C)
98 local U = require 'socket.url'
99 local u = U.parse(qargs.input_url)
101 local l = YouTube.tt_list_new(u.scheme, v)
102 local x = quvi.http.fetch(l).data
104 local L = require 'lxp.lom'
105 local t = L.parse(x)
107 local r = {}
108 for i=1, #t do
109 if t[i].tag == 'track' then
110 local lang = t[i].attr['lang_code']
111 if lang then
112 local name = t[i].attr['name'] or ''
113 table.insert(r, {
114 translated = t[i].attr['lang_translated'] or '',
115 url = YouTube.tt_track_new(u.scheme, v, name, lang),
116 original = t[i].attr['lang_original'] or '',
117 id = table.concat({'cc_', lang}, ''),
118 code = lang
123 table.insert(qargs.subtitles, {format=C.sif_tt, type=C.st_cc, lang=r})
126 -- vim: set ts=2 sw=2 tw=72 expandtab: