2 Client to Radio-Canada's Video-on-Demand web service. It can be reused
3 as a module, or invoked as a command line script.
5 #-------------------------------------------------------------------------------
9 '''Copyright(C), 2007, Sylvain Fourmanoit <syfou@users.sourceforge.net>, 2007.
11 Permission is hereby granted, free of charge, to any person obtaining a copy
12 of this software and associated documentation files (the "Software"), to
13 deal in the Software without restriction, including without limitation the
14 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
15 sell copies of the Software, and to permit persons to whom the Software is
16 furnished to do so, subject to the following conditions:
18 The above copyright notice and this permission notice shall be included in
19 all copies of the Software and its documentation and acknowledgment shall be
20 given in the documentation and software packages that this Software was
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26 THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
27 IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
28 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32 #-------------------------------------------------------------------------------
34 import httplib
, sys
, os
, optparse
, pprint
37 import xml
.etree
.ElementTree
as etree
39 import elementtree
.ElementTree
as etree
41 #-------------------------------------------------------------------------------
44 """__missing__ keyword emulating container for python 2.4"""
45 def __init__(self
, iterable
=None, default
=''):
46 self
.default
= default
47 if iterable
: dict.__init
__(self
, iterable
)
48 def __getitem__(self
, k
):
50 return dict.__getitem
__(self
, k
)
53 return self
.default
% k
57 #-------------------------------------------------------------------------------
58 # XML answer processing
62 Convert an arbitrary XML file to a pythonic structure.
64 Right now, it is instanciated from the Console.SOAPQuery instances
65 during calls to console: this could be modularised (we could
66 basically just return a file-like object, and let the caller deals
67 with the xml content) but we didn't see a real need: set
68 Console.debug to true if ever you need to debug.
70 Be aware that dynamically changing XML2Py will likely break
71 Console.GetList2Alt().
73 class TreeBuilder(etree
.XMLTreeBuilder
):
74 """Get rid of all the name space stuff"""
75 def _fixname(self
, key
): return key
.split('}')[-1]
77 def __call__(self
, f
):
79 etree
.parse(f
, self
.TreeBuilder()).getroot())
81 def _myiter(self
, root
):
82 def cond(subtree
, text
):
90 elif text
.lower() == 'true':
92 elif text
.lower() == 'false':
98 subtree
= self
._myiter
(e
)
100 if type(tree
[e
.tag
]) is list:
101 tree
[e
.tag
].append(cond(subtree
, e
.text
))
103 tree
[e
.tag
]= [tree
[e
.tag
], cond(subtree
, e
.text
)]
105 tree
[e
.tag
] = cond(subtree
, e
.text
)
107 tree
= tree
.values()[0]
110 #-------------------------------------------------------------------------------
111 # SOAP interface access
115 Radio-Canada querying engine. Just use something like:
117 console.GetLists2(MotsCle='politique')
123 'fields': {'IDRegion': 1}
126 'fields': {'IdMedia': -1,
129 'GetListAujourdhui': {
130 'fields': {'IdRegion': -1}
133 'fields': {'IDEmission': 'del',
143 'StrIDTypeContenu': None,
144 'StrIDCategorie': None,
149 'prebody': '<requete>',
150 'postbody':'</requete>'}
155 Query a given port of Radio-Canada SOAP interface.
157 Returned by Console.__getattr__().
159 request
= '''<?xml version="1.0" encoding="utf-8"?>
160 <soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
161 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
162 xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
164 <%(port)s xmlns="http://tempuri.org/">
172 def __missing__(self
, name
): return ''
174 '''Automated echo of file-like objects on read'''
175 def __init__(self
, f
, stream
=None, tee
=True):
177 if stream
is not None:
180 self
.stream
= sys
.stdout
183 def read(self
, size
=-1):
184 s
= self
.f
.read(size
)
189 def __init__(self
, **kw
):
190 self
.kw
= Missing(kw
)
192 def splitproxy(self
, http_proxy
):
193 if 'http://' == http_proxy
[:7]:
194 return http_proxy
[7:].split(':')
196 raise RuntimeError('http proxy is not in the expected ' +
197 'http://host:port format')
199 def __call__(self
, **fields
):
204 return '<s>%s</s>' % (k
, v
, k
)
206 # Prepare the request
208 self
.kw
['body'] = '\n'.join([tag(k
, v
) for k
, v
in
209 dict(self
.kw
['fields'].items() +
210 fields
.items()).items()
212 request
= self
.request
% self
.kw
213 headers
= {'Content-Type': 'text/xml',
214 'SOAPAction': 'http://tempuri.org/%(port)s' % self
.kw
}
216 # Set up the connection parameters, either directly or through
218 if self
.kw
['proxy'] is not None:
219 host
, port
= self
.splitproxy(self
.kw
['proxy'])
220 uri
= 'http://www1.radio-canada.ca/aspx/WSConsole/console.asmx'
222 host
, port
= ('www1.radio-canada.ca', 80)
223 uri
= '/aspx/WSConsole/console.asmx'
225 # Finally, time to connect
227 conn
= httplib
.HTTPConnection(host
, port
)
228 conn
.set_debuglevel((0,2)[self
.kw
['debug'] is True])
230 conn
.request('POST', uri
, request
, headers
)
231 return XML2Py()(self
.Tee(conn
.getresponse(), tee
=self
.kw
['debug']))
233 def __init__(self
, debug
=False, proxy
=None):
235 Initialize the console, setting the debug flag and http proxy
237 proxy is expected to be a string, of the usual http://host:port
238 format, following the unix-style http_proxy environment variable.
243 def __getattr__(self
, port
):
245 Call a SOAP port, as defined in Radio-Canada's spec:
247 http://www1.radio-canada.ca/aspx/WSConsole/console.asmx?WSDL
249 # We do support calls to arbitrary ports, but we also
250 # includes out-of-the-box values for a couple of them:
253 raise AttributeError('no attribute %s' % port
)
255 return self
.SOAPQuery(port
= port
, debug
= self
.debug
,
257 **self
.queries
.get(port
,{'fields':{}}))
259 def GetList2Alt(self
, **kw
):
261 Special wrapper to GetList2 port: for the sake of uniformity,
262 it is significantly easier to deal with some medias iterable
263 (as with other ports such as GetListAujourdhui or
264 GetListSuggere): this is what this method supplies, by calling
265 the port by chunks of ten items, then spoonfeeding the result
268 for k
in ('NbrItem', 'StartItem'):
272 r
= self
.GetList2(NbrItem
=1, **kw
)
273 if 'Erreur' in r
and r
['Erreur'] is not None:
274 print >> sys
.stderr
, 'server error:', r
['Erreur']
275 if 'NbResultats' not in r
:
277 for i
in range(1, r
['NbResultats'], 10):
278 r
= self
.GetList2(StartItem
=i
, NbrItem
=10, **kw
)
279 for media
in r
['Medias']:
282 #-------------------------------------------------------------------------------
285 # Here is a couple of templates used by cli()...
287 templates
= {'verboseMedia' : '''CLip %(Pos)d (Media ID %(IDMedia)s)
288 Diffusion: %(HeureDiffusionStr)s %(DateDiffusionStr)s
289 Duration: %(Duree)s seconds
290 Broadcast: %(NomEmission)s (Broadcast ID %(IDEmission)s)
291 Description: [%(ExtraitTitre)s]
292 Network: %(NomReseau)s
293 Integral: %(IsIntegral)s
297 'linkMedia': '%(LienASX)s',
298 'broadcasts': '%(Nom)s (ID %(IDEmission)d)'
301 # ... And there is the templating "engine"
303 def apply_template(iterable
, template
=None, missing
= '<%s: N/A>',
304 items
=None, encoding
='utf-8', stream
=None):
306 Output some query results obtained from Console on file-like
307 stream (or sys.stdout if none given), after formatting them; the
308 items yield by the iterable are expected to support being mapped
311 See cli() for example on how it can be used.
313 if stream
is None: stream
= sys
.stdout
315 for i
, item
in enumerate(iterable
):
316 if items
is None or i
in items
:
318 print >> stream
, ('--- Clip %d ' % i
) + '-'*60
319 pprint
.pprint(item
, stream
=stream
)
322 print >>stream
, (template
%
323 Missing(item
, missing
)).encode(encoding
)
325 #-------------------------------------------------------------------------------
326 # Now, specify a simple CLI interface
330 Simple CLI interface covering the most useful/common cases (looks
331 at sys.argv to decide what to do).
334 # Generate and parse the command line
336 p
= optparse
.OptionParser(
337 usage
="""%prog [options]
339 Radio-Canada's Video on demand non-interactive command line interface.""",
341 Copyright (C) 2007 Sylvain Fourmanoit <syfou@users.sourceforge.net>.
342 This is free software; see the source for copying conditions. There is NO
343 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
344 '''.strip() % __version__
)
345 p
.add_option('-b', '--broadcasts',
346 dest
='list_broadcasts', action
="store_true", default
=False,
347 help='list all available broadcasts')
348 p
.add_option('-s', '--suggested',
349 dest
='list_suggested', action
="store_true", default
=False,
350 help='list suggested clips')
351 p
.add_option('-t', '--today',
352 dest
='list_today', action
='store_true', default
=False,
353 help='list today\'s clips')
354 p
.add_option('-q', '--query',
355 dest
='broadcast_id', default
=None,
356 help='''query clips for a given broadcast (use --broadcasts
357 for a list of all possible broadcast IDs)''')
358 p
.add_option('-k', '--search',
359 dest
='keywords', default
=None,
360 help='''query clips based on keyword search''')
361 p
.add_option('-d', '--date-offset',
362 dest
='days', default
=7,
363 help='''specify how far in time to look back (default is 7,
364 i.e. looking one week back): this only applies to
365 --query and --search requests''')
366 p
.add_option('-i', '--items',
367 dest
='items', default
=None,
368 help='''limit output to a list of comma-separated items,
369 based on their indexed position in the server
370 response (for instance, specifying "0, 2" will
371 make the script printout only the templated output
372 for the first and third item''')
373 p
.add_option('--template',
374 dest
='template', default
=None,
375 help='''manually specify output template to replace default
376 (advanced usage: read the code if ever you need
378 p
.add_option('--encoding',
379 dest
='encoding', default
='utf-8',
380 help='specify output console encoding (default: utf-8)')
381 p
.add_option('-r', '--raw',
382 dest
='raw', action
="store_true", default
=False,
383 help='''Force brute, detailed output (ignore any template):
384 work for all requests''')
385 p
.add_option('-a', '--asx',
386 dest
='asx', action
="store_true", default
=False,
387 help='''Force output of ASX links only (ignore any template):
388 make sense for all but --broadcasts requests''')
389 p
.add_option('-p', '--proxy',
390 dest
='http_proxy', default
=None,
391 help='''Specify what http proxy to use, as a string of
392 the form "http://host:port". By default,
393 the content of environment variable http_proxy
394 is used, if set. If neither this option nor
395 the http_proxy variable is specified, the script
396 connects to Radio-Canada directly''')
397 p
.add_option('--debug',
398 dest
='debug', action
="store_true", default
=False,
399 help='''Send complete trace of client<->server transactions
400 on stdout (data sent and data received,
401 including headers)''')
403 opts
, args
= p
.parse_args()
405 # Initialize the console
407 if opts
.http_proxy
is None: opts
.http_proxy
= os
.getenv('http_proxy')
408 console
= Console(debug
=opts
.debug
, proxy
=opts
.http_proxy
)
410 # Set the various parameters based on mode
412 # Default keywords and templates...
414 template
= templates
['verboseMedia']
416 # Then, make adjustments based on invokation
418 if opts
.list_broadcasts
:
419 template
= templates
['broadcasts']
420 port
= console
.GetEmissions
421 elif opts
.list_suggested
:
422 port
= console
.GetListSuggere
423 elif opts
.list_today
:
424 port
= console
.GetListAujourdhui
425 elif opts
.broadcast_id
is not None or opts
.keywords
is not None:
426 if opts
.broadcast_id
is not None: kw
['IDEmission'] = opts
.broadcast_id
427 if opts
.keywords
is not None: kw
['MotsCle'] = opts
.keywords
428 kw
['DateOffset'] = opts
.days
429 port
= console
.GetList2Alt
432 ['no request specified (one of --broadcasts, --suggested,',
433 '--today, --query or --search), bailing out. See --help',
436 # Make last minutes adjustments to the template
438 if opts
.template
is not None: template
= opts
.template
439 if opts
.raw
: template
=None
440 if opts
.asx
: template
=templates
['linkMedia']
442 # And check for items output limitations
444 if opts
.items
is not None:
445 opts
.items
= [int(i
) for i
in opts
.items
.split(',')]
450 apply_template(port(**kw
), items
=opts
.items
,
451 template
=template
, encoding
=opts
.encoding
)
452 except RuntimeError, e
:
455 print >> sys
.stderr
, 'An error occured while processing the request:'
458 #-------------------------------------------------------------------------------
460 if __name__
== '__main__':