fix bug with the on_at() filtering method
[nstv.git] / nstv.py
blob87e6214578ae380c172a1b5484e697753d6cf950
1 # Copyright (C) 2008 Robert Vally
3 # This file is part of nstv (Now Showing TV).
5 # nstv is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # nstv is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with nstv. If not, see <http://www.gnu.org/licenses/>.
18 import re
19 import urllib
20 import string
21 import sys
22 import getopt
23 import datetime, time
24 from nstv_config import Config
26 # CONSTANTS
27 TVNZ_LISTINGS="""http://tvnz.co.nz/content/listings_data/tvnz_listings_all_skin"""
28 CHANNEL_NAME_PATTERN = "<td class=\"epg_logo_spacer\">.*/tvnz_epg_(.*)_logo_sm\.jpg"
29 CHANNEL_LISTING_PATTERN = "<table class=\"epg_border\" cellpadding=\"0\" cellspacing=\"0\" border=\"0\">\n<tr height=\"55\">\n((.*)\n)*?</tr>\n</table>"
30 PROGRAMME_LISTING_PATTERN = "title=\"(.+?)\".*\n<br>([0-9]*:[0-9]*)"
31 # The file in which the raw html will be placed once
32 # collected from the listing source (TVNZ_LISTINGS)
33 RAW_LISTING_FILE="""raw"""
34 # END CONSTANTS
36 # collects all the listing data (HTML) from the specified source
37 # and returns the result
38 def fetch_listing(source):
39 try:
40 f = urllib.urlopen(source)
41 data = f.read()
42 f.close()
43 return data;
44 except:
45 print 'Failed to fetch listing from source "%s"' % TVNZ_LISTINGS
46 sys.exit()
48 # saves the supplied data (HTML) into the specified target location (local)
49 def save_listing(target, data):
50 # Save raw data to the specified target
51 try:
52 print sys.path[0]
53 s = open(sys.path[0] + '/' + target, 'w')
54 s.write(data)
55 s.close()
56 except:
57 print 'Failed to save listing to local cache file "%s"' % RAW_LISTING_FILE
58 print 'Things will not work without it'
59 sys.exit()
61 def load_listing(source):
62 data = None
63 try:
64 path = ''
65 if len(sys.path[0]) > 0:
66 path = sys.path[0] + '/'
67 f = open(path + source, 'r')
68 data = f.read()
69 f.close()
70 return data
71 except IOError:
72 print 'Failed to open listing file "%s".' % source
73 sys.exit()
75 # returns the channel list and all the progs
76 def parse_listing(raw):
77 progs = []
78 channels = []
80 # pattern for matching each channel name
81 p = re.compile(CHANNEL_NAME_PATTERN)
82 channels = p.findall(raw)
84 # pattern for matching each channel programmes
85 p = re.compile(CHANNEL_LISTING_PATTERN)
87 # pattern for matching time and programme description
88 p2 = re.compile(PROGRAMME_LISTING_PATTERN)
90 count = 0
91 for m in p.finditer(raw):
92 result = p2.findall(m.group(0))
94 for r in result:
95 progs.append([count, {'time': r[1], 'title': r[0]}])
97 count = count + 1
99 return channels, progs
102 def print_out(li, chan_filter, fmt):
103 # $c\t- $t @ $st
104 # ($c)\t- $t @ $st
106 # check for a grouping in the format
107 r = re.match(".*\((.*)\)(.*)", fmt)
109 # if there is no grouping, just do a straight replace
110 if r == None:
111 for p in li:
112 if chan_filter == None or p['channel'] in chan_filter:
113 p_fmt = fmt
114 p_fmt = p_fmt.replace("$c", p['channel'])
115 p_fmt = p_fmt.replace("$t", p['title'])
116 p_fmt = p_fmt.replace("$st", p['time'])
117 print p_fmt
118 # if there is a grouping
119 else:
120 group_by = r.group(1)
121 group_by = group_by.replace("$c", "channel")
122 group_by = group_by.replace("$t", "title")
123 group_by = group_by.replace("$st", "time")
125 group_by_prev = None
126 for p in li:
127 if chan_filter == None or p['channel'] in chan_filter:
128 if group_by_prev == None or group_by_prev != p[group_by]:
129 print p[group_by]
131 p_fmt = r.group(2)
132 p_fmt = p_fmt.replace("$c", p['channel'])
133 p_fmt = p_fmt.replace("$t", p['title'])
134 p_fmt = p_fmt.replace("$st", p['time'])
135 print p_fmt
137 group_by_prev = p[group_by]
139 def chan_filter(progs, chan):
140 tmp = [elem for elem in progs if elem[0] == chan]
141 return tmp
143 def prog_filter(progs, start_time, end_time):
144 tmp = progs
146 if start_time != None:
147 # check if there is something starting at the start time
148 match = [elem for elem in tmp
149 if datetime.time(int(elem[1]['time'].split(":")[0]),
150 int(elem[1]['time'].split(":")[1])) == start_time]
152 if match:
153 tmp = tmp[tmp.index(match[0]):]
154 else:
155 # find everything that falls before the start time
156 before = [elem for elem in tmp
157 if datetime.time(int(elem[1]['time'].split(":")[0]),
158 int(elem[1]['time'].split(":")[1])) <= start_time]
159 # pop off what is last in the list... this is what is on now
160 if before:
161 before = before.pop()
162 tmp = tmp[tmp.index(before):]
163 else:
164 tmp = tmp[:]
165 if end_time != None:
166 tmp = [elem for elem in tmp
167 if datetime.time(int(elem[1]['time'].split(":")[0]),
168 int(elem[1]['time'].split(":")[1])) < end_time]
169 return tmp
171 # picks out which programmes are showing at the specified time across all channels
172 def on_at(channels, progs, cur_time, end_time, limit):
173 li = []
174 count = 0
176 while count <= len(channels) - 1:
177 limit_count = 0
178 filtered = prog_filter(chan_filter(progs, count), cur_time, end_time)
180 if (filtered and limit != None):
181 filtered = filtered[:int(limit)]
183 for cur_prog in filtered:
184 li.append({'channel': channels[count],
185 'title': cur_prog[1]['title'],
186 'time': cur_prog[1]['time']})
188 count = count + 1
190 return li
192 # picks out programmes with specific titles
193 def on_when(channels, progs, title):
194 # list which will contain all matched programmes
195 li = []
197 count = 0
198 while count <= len(channels) - 1:
199 tmp = [elem for elem in progs if elem[0] == count]
200 for cur_prog in tmp:
201 if title.upper() in cur_prog[1]['title'].upper():
202 li.append({'channel':channels[count],
203 'title':cur_prog[1]['title'], 'time':cur_prog[1]['time']})
204 count = count + 1
206 return li
208 # picks out all programmes scheduled today
209 def on_today(channels, progs):
210 # list which will contain all matched programmes
211 li = []
213 count = 0
214 while count <= len(channels) - 1:
215 tmp = [elem for elem in progs if elem[0] == count]
216 for cur_prog in tmp:
217 li.append({'channel':channels[count], 'title':cur_prog[1]['title'], 'time':cur_prog[1]['time']})
218 count = count + 1
220 return li
222 def usage():
223 print 'NSTV - Now Showing TV'
224 print 'A TV listing parser and reporting utility designed for use on the command line.'
225 print '\nOptions:'
226 print ' -f, --fetch fetch (otherwise use from cached)'
227 print ' -r, --raw print raw HTML data (debugging)'
228 print '\nFilter(s):'
229 print ' -p, --programme only show programmes with the specified name'
230 print ' -n, --now [0-9] what''s on now specifing the number ' + \
231 'of programmes to show per channel'
232 print ' -t, --today what''s on today'
233 print ' -a, --at [hh:mm] filter out entries starting before'
234 print ' -e, --ends [hh:mm] filter out entires starting after'
235 print ' -c, --channel comma delimited list of channels to filter by'
236 print ' -h, --help help'
238 def main(argv):
239 # by default we will always worked from cached info
240 fetch = False
241 raw = False
242 chan_filter = None
243 output_type = {}
244 end_time = None
246 try:
247 # get command line options
248 opts, args = getopt.getopt(argv, "p:ha:e:n:tfc:o:", [
249 "title",
250 "help",
251 "at",
252 "ends",
253 "now",
254 "today",
255 "fetch",
256 "channel",
257 "override"])
258 except getopt.GetoptError, err:
259 # if problem with parsing command line
260 # show usage and exit
261 print 'ERROR: ' + str(err) + '\n'
262 usage()
263 sys.exit(2)
265 # cycle through all the command line arguments
266 # opt stores the actual command, and arg the values these
267 # may or may not be assigned
268 for opt, arg in opts:
269 # print usage and shutdown
270 if opt in ("-h", "--help"):
271 usage()
272 sys.exit()
273 elif opt in ("-f", "--fetch"):
274 fetch = True
275 elif opt in ("-r", "--raw"):
276 raw = True
277 elif opt in ("-n", "--now"):
278 output_type['n'] = arg
279 elif opt in ("-t", "--today"):
280 output_type['t'] = arg
281 elif opt in ("-a", "--at"):
282 output_type['a'] = arg
283 elif opt in ("-e", "--ends"):
284 output_type['e'] = arg
285 elif opt in ("-c", "--channel"):
286 output_type['c'] = arg
287 elif opt in ("-o", "--override"):
288 output_type['o'] = arg
289 elif opt in ("-p", "--programme"):
290 output_type['p'] = arg
292 # fetch new listings if we've been told to
293 if fetch:
294 data = fetch_listing(TVNZ_LISTINGS)
295 save_listing(RAW_LISTING_FILE, data)
297 # load data from the cached file
298 data = load_listing(RAW_LISTING_FILE)
300 # parse data into channel and programme lists
301 channels, progs = parse_listing(data)
303 # set the channel filter
304 if 'c' in output_type:
305 chan_filter = output_type['c']
307 if 'n' in output_type: #now
308 cur_time = datetime.time(datetime.datetime.now().hour,
309 datetime.datetime.now().minute)
310 end_time = None
311 limit_num = 1
313 if output_type['n'] != None:
314 limit_num = output_type['n']
316 # print out at least 1 showing programme from each channel
317 print_out(on_at(channels, progs, cur_time, end_time, limit_num),
318 chan_filter, "$c\t- $t @ $st")
319 elif 't' in output_type: #today
320 cur_time = datetime.time(0, 0)
322 end_time = None
323 # if an end time has been specified, set it
324 if 'e' in output_type:
325 end_time = output_type['e'].split(":")
326 end_time = datetime.time(int(end_time[0]), int(end_time[1]))
328 print_out(on_at(channels, progs, cur_time, end_time, None), chan_filter,
329 "($c)\t- $t @ $st")
330 elif 'a' in output_type: #at
331 # set the start time
332 if output_type['a'] != None:
333 cur_time = output_type['a'].split(":")
334 cur_time = datetime.time(int(cur_time[0]), int(cur_time[1]))
336 end_time = None
337 # if an end time has been specified, set it
338 if 'e' in output_type:
339 end_time = output_type['e'].split(":")
340 end_time = datetime.time(int(end_time[0]), int(end_time[1]))
341 else:
342 # if no end time was given... add 1 minute to the start time
343 end_time = datetime.time(cur_time.hour, cur_time.minute + 1)
345 print_out(on_at(channels, progs, cur_time, end_time, None), chan_filter,
346 "($c)\t- $t @ $st")
347 elif 'p' in output_type: #programme
348 if output_type['p'] != None:
349 print_out(on_when(channels, progs, output_type['p']), chan_filter, "$c\t- $t @ $st")
351 # useful for debugging
352 if raw:
353 print channels
354 print progs
356 if __name__ == "__main__":
357 main(sys.argv[1:])