Update to the versionning system
[radcan.git] / radcan.py
blobb046355f965d241902cbe7eeffa1734d7cf9fb5f
1 """
2 Client to Radio-Canada's Video-on-Demand web service. It can be reused
3 as a module, or invoked as a command line script.
4 """
5 #-------------------------------------------------------------------------------
6 # Legalese
8 __copyright__ = \
9 '''Copyright(C), 2007, Sylvain Fourmanoit <syfou@users.sourceforge.net>, 2007.
11 Permission is hereby granted, free of charge, to any person obtaining a copy
12 of this software and associated documentation files (the "Software"), to
13 deal in the Software without restriction, including without limitation the
14 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
15 sell copies of the Software, and to permit persons to whom the Software is
16 furnished to do so, subject to the following conditions:
18 The above copyright notice and this permission notice shall be included in
19 all copies of the Software and its documentation and acknowledgment shall be
20 given in the documentation and software packages that this Software was
21 used.
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26 THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
27 IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
28 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 '''
30 __version__ = '0.9.0'
32 #-------------------------------------------------------------------------------
34 import httplib, xml.etree.ElementTree
35 import sys, os, optparse, pprint
37 #-------------------------------------------------------------------------------
38 # XML answer processing
40 class XML2Py:
41 """
42 Convert an arbitrary XML file to a pythonic structure.
44 Right now, it is instanciated from the Console.SOAPQuery instances
45 during calls to console: this could be modularised (we could
46 basically just return a file-like object, and let the caller deals
47 with the xml content) but we didn't see a real need: set
48 Console.debug to true if ever you need to debug.
50 Be aware that dynamically changing XML2Py will likely break
51 Console.GetList2Alt().
52 """
53 class TreeBuilder(xml.etree.ElementTree.XMLTreeBuilder):
54 """Get rid of all the name space stuff"""
55 def _fixname(self, key): return key.split('}')[-1]
57 def __call__(self, f):
58 return self._myiter(
59 xml.etree.ElementTree.parse(f, self.TreeBuilder()).getroot())
61 def _myiter(self, root):
62 def cond(subtree, text):
63 try:
64 if len(subtree)>0:
65 return subtree
66 elif text is None:
67 return None
68 elif text.isdigit():
69 return int(text)
70 elif text.lower() == 'true':
71 return True
72 elif text.lower() == 'false':
73 return False
74 except: pass
75 return text
76 tree = {}
77 for e in root:
78 subtree = self._myiter(e)
79 if e.tag in tree:
80 if type(tree[e.tag]) is list:
81 tree[e.tag].append(cond(subtree, e.text))
82 else:
83 tree[e.tag]= [tree[e.tag], cond(subtree, e.text)]
84 else:
85 tree[e.tag] = cond(subtree, e.text)
86 if len(tree) == 1:
87 tree = tree.values()[0]
88 return tree
90 #-------------------------------------------------------------------------------
91 # SOAP interface access
93 class Console:
94 """
95 Radio-Canada querying engine. Just use something like:
97 console.GetLists2(MotsCle='politique')
99 and you are done!
101 queries = {
102 'GetEmissions': {
103 'fields': {'IDRegion': 1}
105 'GetListSuggere': {
106 'fields': {'IdMedia': -1,
107 'IdRegion': -1}
109 'GetListAujourdhui': {
110 'fields': {'IdRegion': -1}
112 'GetList2': {
113 'fields': {'IDEmission': 'del',
114 'Chaine': None,
115 'Video': -1,
116 'IDTypeContenu': 0,
117 'MotsCle': None,
118 'StartItem': 1,
119 'NbrItem': 10,
120 'DateOffset': 7,
121 'NbJours': 0,
122 'Tri': 'date:D',
123 'StrIDTypeContenu': None,
124 'StrIDCategorie': None,
125 'StrIDGenre': None,
126 'IDReseau': 0,
127 'IDRegion': -1,
128 'Integrale': -1},
129 'prebody': '<requete>',
130 'postbody':'</requete>'}
133 class SOAPQuery:
135 Query a given port of Radio-Canada SOAP interface.
137 Returned by Console.__getattr__().
139 request = '''<?xml version="1.0" encoding="utf-8"?>
140 <soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
141 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
142 xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
143 <soap:Body>
144 <%(port)s xmlns="http://tempuri.org/">
145 %(prebody)s
146 %(body)s
147 %(postbody)s
148 </%(port)s>
149 </soap:Body>
150 </soap:Envelope>'''
151 class Attrs(dict):
152 def __missing__(self, name): return ''
153 class Tee:
154 '''Automated echo of file-like objects on read'''
155 def __init__(self, f, stream=None, tee=True):
156 self.f = f
157 self.stream = stream if stream is not None else sys.stdout
158 self.tee = tee
160 def read(self, size=-1):
161 s = self.f.read(size)
162 if self.tee:
163 self.stream.write(s)
164 return s
166 def __init__(self, **kw):
167 self.kw = self.Attrs(kw)
169 def splitproxy(self, http_proxy):
170 if 'http://' == http_proxy[:7]:
171 return http_proxy[7:].split(':')
172 else:
173 raise RuntimeError('http proxy is not in the expected ' +
174 'http://host:port format')
176 def __call__(self, **fields):
177 # Prepare the request
179 self.kw['body'] = '\n'.join([('<%s>%s</%s>' % (k, v, k)
180 if v is not None else '<%s/>' % k)
181 for k, v, in
182 dict(self.kw['fields'].items() +
183 fields.items()).items()
184 if v != 'del'])
185 request = self.request % self.kw
186 headers = {'Content-Type': 'text/xml',
187 'SOAPAction': 'http://tempuri.org/%(port)s' % self.kw }
189 # Set up the connection parameters, either directly or through
190 # a proxy
191 if self.kw['proxy'] is not None:
192 host, port = self.splitproxy(self.kw['proxy'])
193 uri = 'http://www1.radio-canada.ca/aspx/WSConsole/console.asmx'
194 else:
195 host, port = ('www1.radio-canada.ca', 80)
196 uri = '/aspx/WSConsole/console.asmx'
198 # Finally, time to connect
200 conn = httplib.HTTPConnection(host, port)
201 conn.set_debuglevel(0 if not self.kw['debug'] else 2)
202 conn.connect()
203 conn.request('POST', uri, request, headers)
204 return XML2Py()(self.Tee(conn.getresponse(), tee=self.kw['debug']))
206 def __init__(self, debug=False, proxy=None):
208 Initialize the console, setting the debug flag and http proxy
210 proxy is expected to be a string, of the usual http://host:port
211 format, following the unix-style http_proxy environment variable.
213 self.debug = debug
214 self.proxy = proxy
216 def __getattr__(self, port):
218 Call a SOAP port, as defined in Radio-Canada's spec:
220 http://www1.radio-canada.ca/aspx/WSConsole/console.asmx?WSDL
222 # We do support calls to arbitrary ports, but we also
223 # includes out-of-the-box values for a couple of them:
224 # see self.queries.
225 if port[:2] == '__':
226 raise AttributeError('no attribute %s' % port)
227 else:
228 return self.SOAPQuery(port = port, debug = self.debug,
229 proxy = self.proxy,
230 **self.queries.get(port,{'fields':{}}))
232 def GetList2Alt(self, **kw):
234 Special wrapper to GetList2 port: for the sake of uniformity,
235 it is significantly easier to deal with some medias iterable
236 (as with other ports such as GetListAujourdhui or
237 GetListSuggere): this is what this method supplies, by calling
238 the port by chunks of ten items, then spoonfeeding the result
239 to the caller.
240 """
241 for k in ('NbrItem', 'StartItem'):
242 if k in kw:
243 del kw[k]
245 r = self.GetList2(NbrItem=1, **kw)
246 if 'Erreur' in r and r['Erreur'] is not None:
247 print >> sys.stderr, 'server error:', r['Erreur']
248 if 'NbResultats' not in r:
249 return
250 for i in range(1, r['NbResultats'], 10):
251 r = self.GetList2(StartItem=i, NbrItem=10, **kw)
252 for media in r['Medias']:
253 yield media
255 #-------------------------------------------------------------------------------
256 # Generic formatting
258 # Here is a couple of templates used by cli()...
260 templates = {'verboseMedia' : '''CLip %(Pos)d (Media ID %(IDMedia)s)
261 Diffusion: %(HeureDiffusionStr)s %(DateDiffusionStr)s
262 Duration: %(Duree)s seconds
263 Broadcast: %(NomEmission)s (Broadcast ID %(IDEmission)s)
264 Description: [%(ExtraitTitre)s]
265 Network: %(NomReseau)s
266 Integral: %(IsIntegral)s
267 Video: %(Video)s
268 URI: %(LienASX)s
269 ''' + '='*80,
270 'linkMedia': '%(LienASX)s',
271 'broadcasts': '%(Nom)s (ID %(IDEmission)d)'
274 # ... And there is the templating "engine"
276 def apply_template(iterable, template=None, missing = '<%s: N/A>',
277 items=None, encoding='utf-8', stream=None):
279 Output some query results obtained from Console on file-like
280 stream (or sys.stdout if none given), after formatting them; the
281 items yield by the iterable are expected to support being mapped
282 on a dictionnary.
284 See cli() for example on how it can be used.
286 class Attrs(dict):
287 def __init__(self, obj, missing):
288 dict.__init__(self, obj)
289 self.missing = missing
290 def __missing__(self, name):
291 return self.missing % name
293 if stream is None: stream = sys.stdout
295 for i, item in enumerate(iterable):
296 if items is None or i in items:
297 if template is None:
298 print >> stream, ('--- Clip %d ' % i) + '-'*60
299 pprint.pprint(item, stream=stream)
300 else:
301 item['Pos'] = i
302 print >>stream, (template %
303 Attrs(item, missing)).encode(encoding)
305 #-------------------------------------------------------------------------------
306 # Now, specify a simple CLI interface
308 def cli():
310 Simple CLI interface covering the most useful/common cases (looks
311 at sys.argv to decide what to do).
314 # Generate and parse the command line
316 p = optparse.OptionParser(
317 usage="""%prog [options]
319 Radio-Canada's Video on demand non-interactive command line interface.""",
320 version='''%%prog %s
321 Copyright (C) 2007 Sylvain Fourmanoit <syfou@users.sourceforge.net>.
322 This is free software; see the source for copying conditions. There is NO
323 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
324 '''.strip() % __version__)
325 p.add_option('-b', '--broadcasts',
326 dest='list_broadcasts', action="store_true", default=False,
327 help='list all available broadcasts')
328 p.add_option('-s', '--suggested',
329 dest='list_suggested', action="store_true", default=False,
330 help='list suggested clips')
331 p.add_option('-t', '--today',
332 dest='list_today', action='store_true', default=False,
333 help='list today\'s clips')
334 p.add_option('-q', '--query',
335 dest='broadcast_id', default=None,
336 help='''query clips for a given broadcast (use --broadcasts
337 for a list of all possible broadcast IDs)''')
338 p.add_option('-k', '--search',
339 dest='keywords', default=None,
340 help='''query clips based on keyword search''')
341 p.add_option('-d', '--date-offset',
342 dest='days', default=7,
343 help='''specify how far in time to look back (default is 7,
344 i.e. looking one week back): this only applies to
345 --query and --search requests''')
346 p.add_option('-i', '--items',
347 dest='items', default=None,
348 help='''limit output to a list of comma-separated items,
349 based on their indexed position in the server
350 response (for instance, specifying "0, 2" will
351 make the script printout only the templated output
352 for the first and third item''')
353 p.add_option('--template',
354 dest='template', default=None,
355 help='''manually specify output template to replace default
356 (advanced usage: read the code if ever you need
357 this)''')
358 p.add_option('--encoding',
359 dest='encoding', default='utf-8',
360 help='specify output console encoding (default: utf-8)')
361 p.add_option('-r', '--raw',
362 dest='raw', action="store_true", default=False,
363 help='''Force brute, detailed output (ignore any template):
364 work for all requests''')
365 p.add_option('-a', '--asx',
366 dest='asx', action="store_true", default=False,
367 help='''Force output of ASX links only (ignore any template):
368 make sense for all but --broadcasts requests''')
369 p.add_option('-p', '--proxy',
370 dest='http_proxy', default=None,
371 help='''Specify what http proxy to use, as a string of
372 the form "http://host:port". By default,
373 the content of environment variable http_proxy
374 is used, if set. If neither this option nor
375 the http_proxy variable is specified, the script
376 connects to Radio-Canada directly''')
377 p.add_option('--debug',
378 dest='debug', action="store_true", default=False,
379 help='''Send complete trace of client<->server transactions
380 on stdout (data sent and data received,
381 including headers)''')
383 opts, args = p.parse_args()
385 # Initialize the console
387 console = Console(debug=opts.debug,
388 proxy=os.getenv('http_proxy') if opts.http_proxy is None
389 else opts.http_proxy)
391 # Set the various parameters based on mode
393 # Default keywords and templates...
394 kw = {}
395 template = templates['verboseMedia']
397 # Then, make adjustments based on invokation
399 if opts.list_broadcasts:
400 template = templates['broadcasts']
401 port = console.GetEmissions
402 elif opts.list_suggested:
403 port = console.GetListSuggere
404 elif opts.list_today:
405 port = console.GetListAujourdhui
406 elif opts.broadcast_id is not None or opts.keywords is not None:
407 if opts.broadcast_id is not None: kw['IDEmission'] = opts.broadcast_id
408 if opts.keywords is not None: kw['MotsCle'] = opts.keywords
409 kw['DateOffset'] = opts.days
410 port = console.GetList2Alt
411 else:
412 p.error(' '.join(
413 ['no request specified (one of --broadcasts, --suggested,',
414 '--today, --query or --search), bailing out. See --help',
415 'for details.']))
417 # Make last minutes adjustments to the template
419 if opts.template is not None: template = opts.template
420 if opts.raw: template=None
421 if opts.asx: template=templates['linkMedia']
423 # And check for items output limitations
425 if opts.items is not None:
426 opts.items = [int(i) for i in opts.items.split(',')]
428 # Fire in the hole!
430 try:
431 apply_template(port(**kw), items=opts.items,
432 template=template, encoding=opts.encoding)
433 except RuntimeError, e:
434 p.error(str(e))
435 except:
436 print >> sys.stderr, 'An error occured while processing the request:'
437 raise
439 #-------------------------------------------------------------------------------
441 if __name__ == '__main__':
442 cli()