quvi/youtube.lua: can_parse_url: Improve patterns
[libquvi-scripts.git] / share / common / quvi / youtube.lua
blobf5c100566c851b01848baba3b4443e806340278c
1 -- libquvi-scripts
2 -- Copyright (C) 2012-2013 Toni Gundogdu <legatvs@gmail.com>
3 --
4 -- This file is part of libquvi-scripts <http://quvi.sourceforge.net/>.
5 --
6 -- This program is free software: you can redistribute it and/or
7 -- modify it under the terms of the GNU Affero General Public
8 -- License as published by the Free Software Foundation, either
9 -- version 3 of the License, or (at your option) any later version.
11 -- This program is distributed in the hope that it will be useful,
12 -- but WITHOUT ANY WARRANTY; without even the implied warranty of
13 -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 -- GNU Affero General Public License for more details.
16 -- You should have received a copy of the GNU Affero General
17 -- Public License along with this program. If not, see
18 -- <http://www.gnu.org/licenses/>.
21 local M = {}
23 --[[
24 Return the `ident' data for the {media,subtitle} scripts.
25 Parameters:
26 qargs .. quvi args
27 Returns:
28 A table containing the values expected by the library.
29 ]]--
30 function M.ident(qargs)
31 local u = M.normalize(qargs.input_url)
32 return {
33 domains = table.concat({'youtube.com'}, ','),
34 can_parse_url = M.can_parse_url(u)
36 end
38 --[[
39 Check if script can parse the URL.
40 Parameters:
41 url .. URL to check
42 Returns:
43 A boolean value.
44 ]]--
45 function M.can_parse_url(url)
46 local U = require 'socket.url'
47 local t = U.parse(url)
48 if t and t.scheme and t.scheme:lower():match('^https?$')
49 and t.host and t.host:lower():match('youtube%.com$')
50 and t.query and t.query:lower():match('^v=[%w-_]+')
51 and t.path and t.path:lower():match('^/watch$')
52 then
53 return true
54 else
55 return false
56 end
57 end
59 --[[
60 "Normalize" URL to YouTube media URL. See the test URLs for examples.
61 Parameters:
62 s .. URL to normalize
63 Returns:
64 Normalized URL
65 ]]--
66 function M.normalize(url)
67 if not url then return url end
69 local U = require 'socket.url'
70 local t = U.parse(url)
72 if not t.host then return url end
74 t.host = t.host:gsub('youtu%.be', 'youtube.com')
75 t.host = t.host:gsub('-nocookie', '')
77 if t.path then
78 local p = {'/embed/([-_%w]+)', '/%w/([-_%w]+)', '/([-_%w]+)'}
79 for _,v in pairs(p) do
80 local m = t.path:match(v)
81 if m and #m == 11 then
82 t.query = 'v=' .. m
83 t.path = '/watch'
84 end
85 end
86 end
87 return U.build(t)
88 end
90 --[[
91 Append URL to qargs.media_url if it is unique by comparing video IDs.
92 Parameters:
93 url .. URL to append
94 ]]--
95 function M.append_if_unique(qargs, url)
96 if not url then return end
98 url = M.normalize(url)
100 local U = require 'socket.url'
101 local t = U.parse(url)
103 if not t.host or not t.query then return end
105 local p = 'v=([%w-_]+)'
106 local v = t.query:match(p)
108 for _,u in pairs(qargs.media_url) do
109 local tt = U.parse(u)
110 if tt.query and v == tt.query:match(p) then
111 return -- Found duplicate. Ignore URL.
115 table.insert(qargs.media_url, url)
118 -- Uncomment to test.
119 --[[
120 package.path = package.path .. ';../?.lua'
121 local a = {
122 {u='http://youtu.be/3WSQH__H1XE', -- u=page url
123 e='http://youtube.com/watch?v=3WSQH__H1XE'}, -- e=expected url
124 {u='http://youtu.be/v/3WSQH__H1XE?hl=en',
125 e='http://youtube.com/watch?v=3WSQH__H1XE'},
126 {u='http://youtu.be/watch?v=3WSQH__H1XE',
127 e='http://youtube.com/watch?v=3WSQH__H1XE'},
128 {u='http://youtu.be/embed/3WSQH__H1XE',
129 e='http://youtube.com/watch?v=3WSQH__H1XE'},
130 {u='http://youtu.be/v/3WSQH__H1XE',
131 e='http://youtube.com/watch?v=3WSQH__H1XE'},
132 {u='http://youtu.be/e/3WSQH__H1XE',
133 e='http://youtube.com/watch?v=3WSQH__H1XE'},
134 {u='http://youtube.com/watch?v=3WSQH__H1XE',
135 e='http://youtube.com/watch?v=3WSQH__H1XE'},
136 {u='http://youtube.com/embed/3WSQH__H1XE',
137 e='http://youtube.com/watch?v=3WSQH__H1XE'},
138 {u='http://jp.youtube.com/watch?v=3WSQH__H1XE',
139 e='http://jp.youtube.com/watch?v=3WSQH__H1XE'},
140 {u='http://jp.youtube-nocookie.com/e/3WSQH__H1XE',
141 e='http://jp.youtube.com/watch?v=3WSQH__H1XE'},
142 {u='http://jp.youtube.com/embed/3WSQH__H1XE',
143 e='http://jp.youtube.com/watch?v=3WSQH__H1XE'},
144 {u='http://youtube.com/3WSQH__H1XE', -- invalid page url
145 e='http://youtube.com/watch?v=3WSQH__H1XE'}
147 local e = 0
148 for i,v in pairs(a) do
149 local s = M.normalize(v.u)
150 if s ~= v.e then
151 print('\n input: ' .. v.u .. " (#" .. i .. ")")
152 print('expected: ' .. v.e)
153 print(' got: ' .. s)
154 e = e + 1
157 print((e == 0) and 'Tests OK' or ('\nerrors: ' .. e))
158 ]]--
160 return M
162 -- vim: set ts=2 sw=2 tw=72 expandtab: