Version bump.
[radcan.git] / radcan.py
blob7ec98bd384cb6e16f8b2d35232dec777df9afee1
1 """
2 Client to Radio-Canada's Video-on-Demand web service. It can be reused
3 as a module, or invoked as a command line script.
4 """
5 #-------------------------------------------------------------------------------
6 # Legalese
8 __copyright__ = \
9 '''Copyright(C), 2007, Sylvain Fourmanoit <syfou@users.sourceforge.net>, 2007.
11 Permission is hereby granted, free of charge, to any person obtaining a copy
12 of this software and associated documentation files (the "Software"), to
13 deal in the Software without restriction, including without limitation the
14 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
15 sell copies of the Software, and to permit persons to whom the Software is
16 furnished to do so, subject to the following conditions:
18 The above copyright notice and this permission notice shall be included in
19 all copies of the Software and its documentation and acknowledgment shall be
20 given in the documentation and software packages that this Software was
21 used.
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26 THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
27 IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
28 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 '''
30 __version__ = '0.9.2'
32 #-------------------------------------------------------------------------------
34 import httplib, sys, os, optparse, pprint
36 try:
37 import xml.etree.ElementTree as etree
38 except ImportError:
39 import elementtree.ElementTree as etree
41 #-------------------------------------------------------------------------------
42 # Utility class
43 class Missing(dict):
44 """__missing__ keyword emulating container for python 2.4"""
45 def __init__(self, iterable=None, default=''):
46 self.default = default
47 if iterable: dict.__init__(self, iterable)
48 def __getitem__(self, k):
49 try:
50 return dict.__getitem__(self, k)
51 except KeyError:
52 try:
53 return self.default % k
54 except TypeError:
55 return self.default
57 #-------------------------------------------------------------------------------
58 # XML answer processing
60 class XML2Py:
61 """
62 Convert an arbitrary XML file to a pythonic structure.
64 Right now, it is instanciated from the Console.SOAPQuery instances
65 during calls to console: this could be modularised (we could
66 basically just return a file-like object, and let the caller deals
67 with the xml content) but we didn't see a real need: set
68 Console.debug to true if ever you need to debug.
70 Be aware that dynamically changing XML2Py will likely break
71 Console.GetList2Alt().
72 """
73 class TreeBuilder(etree.XMLTreeBuilder):
74 """Get rid of all the name space stuff"""
75 def _fixname(self, key): return key.split('}')[-1]
77 def __call__(self, f):
78 return self._myiter(
79 etree.parse(f, self.TreeBuilder()).getroot())
81 def _myiter(self, root):
82 def cond(subtree, text):
83 try:
84 if len(subtree)>0:
85 return subtree
86 elif text is None:
87 return None
88 elif text.isdigit():
89 return int(text)
90 elif text.lower() == 'true':
91 return True
92 elif text.lower() == 'false':
93 return False
94 except: pass
95 return text
96 tree = {}
97 for e in root:
98 subtree = self._myiter(e)
99 if e.tag in tree:
100 if type(tree[e.tag]) is list:
101 tree[e.tag].append(cond(subtree, e.text))
102 else:
103 tree[e.tag]= [tree[e.tag], cond(subtree, e.text)]
104 else:
105 tree[e.tag] = cond(subtree, e.text)
106 if len(tree) == 1:
107 tree = tree.values()[0]
108 return tree
110 #-------------------------------------------------------------------------------
111 # SOAP interface access
113 class Console:
115 Radio-Canada querying engine. Just use something like:
117 console.GetLists2(MotsCle='politique')
119 and you are done!
121 queries = {
122 'GetEmissions': {
123 'fields': {'IDRegion': 1}
125 'GetListSuggere': {
126 'fields': {'IdMedia': -1,
127 'IdRegion': -1}
129 'GetListAujourdhui': {
130 'fields': {'IdRegion': -1}
132 'GetList2': {
133 'fields': {'IDEmission': 'del',
134 'Chaine': None,
135 'Video': -1,
136 'IDTypeContenu': 0,
137 'MotsCle': None,
138 'StartItem': 1,
139 'NbrItem': 10,
140 'DateOffset': 7,
141 'NbJours': 0,
142 'Tri': 'date:D',
143 'StrIDTypeContenu': None,
144 'StrIDCategorie': None,
145 'StrIDGenre': None,
146 'IDReseau': 0,
147 'IDRegion': -1,
148 'Integrale': -1},
149 'prebody': '<requete>',
150 'postbody':'</requete>'}
153 class SOAPQuery:
155 Query a given port of Radio-Canada SOAP interface.
157 Returned by Console.__getattr__().
159 request = '''<?xml version="1.0" encoding="utf-8"?>
160 <soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
161 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
162 xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
163 <soap:Body>
164 <%(port)s xmlns="http://tempuri.org/">
165 %(prebody)s
166 %(body)s
167 %(postbody)s
168 </%(port)s>
169 </soap:Body>
170 </soap:Envelope>'''
171 class Attrs(dict):
172 def __missing__(self, name): return ''
173 class Tee:
174 '''Automated echo of file-like objects on read'''
175 def __init__(self, f, stream=None, tee=True):
176 self.f = f
177 if stream is not None:
178 self.stream = stream
179 else:
180 self.stream = sys.stdout
181 self.tee = tee
183 def read(self, size=-1):
184 s = self.f.read(size)
185 if self.tee:
186 self.stream.write(s)
187 return s
189 def __init__(self, **kw):
190 self.kw = Missing(kw)
192 def splitproxy(self, http_proxy):
193 if 'http://' == http_proxy[:7]:
194 return http_proxy[7:].split(':')
195 else:
196 raise RuntimeError('http proxy is not in the expected ' +
197 'http://host:port format')
199 def __call__(self, **fields):
200 def tag(k, v):
201 if v is not None:
202 return '<%s/>' % k
203 else:
204 return '<s>%s</s>' % (k, v, k)
206 # Prepare the request
208 self.kw['body'] = '\n'.join([tag(k, v) for k, v in
209 dict(self.kw['fields'].items() +
210 fields.items()).items()
211 if v != 'del'])
212 request = self.request % self.kw
213 headers = {'Content-Type': 'text/xml',
214 'SOAPAction': 'http://tempuri.org/%(port)s' % self.kw }
216 # Set up the connection parameters, either directly or through
217 # a proxy
218 if self.kw['proxy'] is not None:
219 host, port = self.splitproxy(self.kw['proxy'])
220 uri = 'http://www1.radio-canada.ca/aspx/WSConsole/console.asmx'
221 else:
222 host, port = ('www1.radio-canada.ca', 80)
223 uri = '/aspx/WSConsole/console.asmx'
225 # Finally, time to connect
227 conn = httplib.HTTPConnection(host, port)
228 conn.set_debuglevel((0,2)[self.kw['debug'] is True])
229 conn.connect()
230 conn.request('POST', uri, request, headers)
231 return XML2Py()(self.Tee(conn.getresponse(), tee=self.kw['debug']))
233 def __init__(self, debug=False, proxy=None):
235 Initialize the console, setting the debug flag and http proxy
237 proxy is expected to be a string, of the usual http://host:port
238 format, following the unix-style http_proxy environment variable.
240 self.debug = debug
241 self.proxy = proxy
243 def __getattr__(self, port):
245 Call a SOAP port, as defined in Radio-Canada's spec:
247 http://www1.radio-canada.ca/aspx/WSConsole/console.asmx?WSDL
249 # We do support calls to arbitrary ports, but we also
250 # includes out-of-the-box values for a couple of them:
251 # see self.queries.
252 if port[:2] == '__':
253 raise AttributeError('no attribute %s' % port)
254 else:
255 return self.SOAPQuery(port = port, debug = self.debug,
256 proxy = self.proxy,
257 **self.queries.get(port,{'fields':{}}))
259 def GetList2Alt(self, **kw):
261 Special wrapper to GetList2 port: for the sake of uniformity,
262 it is significantly easier to deal with some medias iterable
263 (as with other ports such as GetListAujourdhui or
264 GetListSuggere): this is what this method supplies, by calling
265 the port by chunks of ten items, then spoonfeeding the result
266 to the caller.
267 """
268 for k in ('NbrItem', 'StartItem'):
269 if k in kw:
270 del kw[k]
272 r = self.GetList2(NbrItem=1, **kw)
273 if 'Erreur' in r and r['Erreur'] is not None:
274 print >> sys.stderr, 'server error:', r['Erreur']
275 if 'NbResultats' not in r:
276 return
277 for i in range(1, r['NbResultats'], 10):
278 r = self.GetList2(StartItem=i, NbrItem=10, **kw)
279 for media in r['Medias']:
280 yield media
282 #-------------------------------------------------------------------------------
283 # Generic formatting
285 # Here is a couple of templates used by cli()...
287 templates = {'verboseMedia' : '''CLip %(Pos)d (Media ID %(IDMedia)s)
288 Diffusion: %(HeureDiffusionStr)s %(DateDiffusionStr)s
289 Duration: %(Duree)s seconds
290 Broadcast: %(NomEmission)s (Broadcast ID %(IDEmission)s)
291 Description: [%(ExtraitTitre)s]
292 Network: %(NomReseau)s
293 Integral: %(IsIntegral)s
294 Video: %(Video)s
295 URI: %(LienASX)s
296 ''' + '='*80,
297 'linkMedia': '%(LienASX)s',
298 'broadcasts': '%(Nom)s (ID %(IDEmission)d)'
301 # ... And there is the templating "engine"
303 def apply_template(iterable, template=None, missing = '<%s: N/A>',
304 items=None, encoding='utf-8', stream=None):
306 Output some query results obtained from Console on file-like
307 stream (or sys.stdout if none given), after formatting them; the
308 items yield by the iterable are expected to support being mapped
309 on a dictionnary.
311 See cli() for example on how it can be used.
313 if stream is None: stream = sys.stdout
315 for i, item in enumerate(iterable):
316 if items is None or i in items:
317 if template is None:
318 print >> stream, ('--- Clip %d ' % i) + '-'*60
319 pprint.pprint(item, stream=stream)
320 else:
321 item['Pos'] = i
322 print >>stream, (template %
323 Missing(item, missing)).encode(encoding)
325 #-------------------------------------------------------------------------------
326 # Now, specify a simple CLI interface
328 def cli():
330 Simple CLI interface covering the most useful/common cases (looks
331 at sys.argv to decide what to do).
334 # Generate and parse the command line
336 p = optparse.OptionParser(
337 usage="""%prog [options]
339 Radio-Canada's Video on demand non-interactive command line interface.""",
340 version='''%%prog %s
341 Copyright (C) 2007 Sylvain Fourmanoit <syfou@users.sourceforge.net>.
342 This is free software; see the source for copying conditions. There is NO
343 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
344 '''.strip() % __version__)
345 p.add_option('-b', '--broadcasts',
346 dest='list_broadcasts', action="store_true", default=False,
347 help='list all available broadcasts')
348 p.add_option('-s', '--suggested',
349 dest='list_suggested', action="store_true", default=False,
350 help='list suggested clips')
351 p.add_option('-t', '--today',
352 dest='list_today', action='store_true', default=False,
353 help='list today\'s clips')
354 p.add_option('-q', '--query',
355 dest='broadcast_id', default=None,
356 help='''query clips for a given broadcast (use --broadcasts
357 for a list of all possible broadcast IDs)''')
358 p.add_option('-k', '--search',
359 dest='keywords', default=None,
360 help='''query clips based on keyword search''')
361 p.add_option('-d', '--date-offset',
362 dest='days', default=7,
363 help='''specify how far in time to look back (default is 7,
364 i.e. looking one week back): this only applies to
365 --query and --search requests''')
366 p.add_option('-i', '--items',
367 dest='items', default=None,
368 help='''limit output to a list of comma-separated items,
369 based on their indexed position in the server
370 response (for instance, specifying "0, 2" will
371 make the script printout only the templated output
372 for the first and third item''')
373 p.add_option('--template',
374 dest='template', default=None,
375 help='''manually specify output template to replace default
376 (advanced usage: read the code if ever you need
377 this)''')
378 p.add_option('--encoding',
379 dest='encoding', default='utf-8',
380 help='specify output console encoding (default: utf-8)')
381 p.add_option('-r', '--raw',
382 dest='raw', action="store_true", default=False,
383 help='''Force brute, detailed output (ignore any template):
384 work for all requests''')
385 p.add_option('-a', '--asx',
386 dest='asx', action="store_true", default=False,
387 help='''Force output of ASX links only (ignore any template):
388 make sense for all but --broadcasts requests''')
389 p.add_option('-p', '--proxy',
390 dest='http_proxy', default=None,
391 help='''Specify what http proxy to use, as a string of
392 the form "http://host:port". By default,
393 the content of environment variable http_proxy
394 is used, if set. If neither this option nor
395 the http_proxy variable is specified, the script
396 connects to Radio-Canada directly''')
397 p.add_option('--debug',
398 dest='debug', action="store_true", default=False,
399 help='''Send complete trace of client<->server transactions
400 on stdout (data sent and data received,
401 including headers)''')
403 opts, args = p.parse_args()
405 # Initialize the console
407 if opts.http_proxy is None: opts.http_proxy = os.getenv('http_proxy')
408 console = Console(debug=opts.debug, proxy=opts.http_proxy)
410 # Set the various parameters based on mode
412 # Default keywords and templates...
413 kw = {}
414 template = templates['verboseMedia']
416 # Then, make adjustments based on invokation
418 if opts.list_broadcasts:
419 template = templates['broadcasts']
420 port = console.GetEmissions
421 elif opts.list_suggested:
422 port = console.GetListSuggere
423 elif opts.list_today:
424 port = console.GetListAujourdhui
425 elif opts.broadcast_id is not None or opts.keywords is not None:
426 if opts.broadcast_id is not None: kw['IDEmission'] = opts.broadcast_id
427 if opts.keywords is not None: kw['MotsCle'] = opts.keywords
428 kw['DateOffset'] = opts.days
429 port = console.GetList2Alt
430 else:
431 p.error(' '.join(
432 ['no request specified (one of --broadcasts, --suggested,',
433 '--today, --query or --search), bailing out. See --help',
434 'for details.']))
436 # Make last minutes adjustments to the template
438 if opts.template is not None: template = opts.template
439 if opts.raw: template=None
440 if opts.asx: template=templates['linkMedia']
442 # And check for items output limitations
444 if opts.items is not None:
445 opts.items = [int(i) for i in opts.items.split(',')]
447 # Fire in the hole!
449 try:
450 apply_template(port(**kw), items=opts.items,
451 template=template, encoding=opts.encoding)
452 except RuntimeError, e:
453 p.error(str(e))
454 except:
455 print >> sys.stderr, 'An error occured while processing the request:'
456 raise
458 #-------------------------------------------------------------------------------
460 if __name__ == '__main__':
461 cli()