2 Client to Radio-Canada's Video-on-Demand web service. It can be reused
3 as a module, or invoked as a command line script.
5 #-------------------------------------------------------------------------------
9 '''Copyright(C), 2007, Sylvain Fourmanoit <syfou@users.sourceforge.net>, 2007.
11 Permission is hereby granted, free of charge, to any person obtaining a copy
12 of this software and associated documentation files (the "Software"), to
13 deal in the Software without restriction, including without limitation the
14 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
15 sell copies of the Software, and to permit persons to whom the Software is
16 furnished to do so, subject to the following conditions:
18 The above copyright notice and this permission notice shall be included in
19 all copies of the Software and its documentation and acknowledgment shall be
20 given in the documentation and software packages that this Software was
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26 THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
27 IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
28 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32 #-------------------------------------------------------------------------------
34 import httplib
, sys
, os
, optparse
, pprint
37 import xml
.etree
.ElementTree
as etree
39 import elementtree
.ElementTree
as etree
41 import xml
.parsers
.expat
43 #-------------------------------------------------------------------------------
46 """__missing__ keyword emulating container for python 2.4"""
47 def __init__(self
, iterable
=None, default
=''):
48 self
.default
= default
49 if iterable
: dict.__init
__(self
, iterable
)
50 def __getitem__(self
, k
):
52 return dict.__getitem
__(self
, k
)
55 return self
.default
% k
59 #-------------------------------------------------------------------------------
60 # XML answer processing
64 Convert an arbitrary XML file to a pythonic structure.
66 Right now, it is instanciated from the Console.SOAPQuery instances
67 during calls to console: this could be modularised (we could
68 basically just return a file-like object, and let the caller deals
69 with the xml content) but we didn't see a real need: set
70 Console.debug to true if ever you need to debug.
72 Be aware that dynamically changing XML2Py will likely break
73 Console.GetList2Alt().
75 class TreeBuilder(etree
.XMLTreeBuilder
):
76 """Get rid of all the name space cruft"""
77 def _fixname(self
, key
): return key
.split('}')[-1]
79 def __call__(self
, f
):
81 etree
.parse(f
, self
.TreeBuilder()).getroot())
83 def _myiter(self
, root
):
84 def cond(subtree
, text
):
92 elif text
.lower() == 'true':
94 elif text
.lower() == 'false':
100 subtree
= self
._myiter
(e
)
102 if type(tree
[e
.tag
]) is list:
103 tree
[e
.tag
].append(cond(subtree
, e
.text
))
105 tree
[e
.tag
]= [tree
[e
.tag
], cond(subtree
, e
.text
)]
107 tree
[e
.tag
] = cond(subtree
, e
.text
)
109 tree
= tree
.values()[0]
112 #-------------------------------------------------------------------------------
113 # SOAP interface access
117 Radio-Canada querying engine. Just use something like:
119 console.GetLists2(MotsCle='politique')
125 'fields': {'IDRegion': 1}
128 'fields': {'IdMedia': -1,
131 'GetListAujourdhui': {
132 'fields': {'IdRegion': -1}
135 'fields': {'IDEmission': 'del',
145 'StrIDTypeContenu': None,
146 'StrIDCategorie': None,
151 'prebody': '<requete>',
152 'postbody':'</requete>'}
157 Query a given port of Radio-Canada SOAP interface.
159 Returned by Console.__getattr__().
161 request
= '''<?xml version="1.0" encoding="utf-8"?>
162 <soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
163 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
164 xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
166 <%(port)s xmlns="http://tempuri.org/">
174 def __missing__(self
, name
): return ''
176 '''Automated echo of file-like objects on read'''
177 def __init__(self
, f
, stream
=None, tee
=True):
179 if stream
is not None:
182 self
.stream
= sys
.stdout
185 def read(self
, size
=-1):
186 s
= self
.f
.read(size
)
191 def __init__(self
, **kw
):
192 self
.kw
= Missing(kw
)
194 def splitproxy(self
, http_proxy
):
195 if 'http://' == http_proxy
[:7]:
196 return http_proxy
[7:].split(':')
198 raise RuntimeError('http proxy is not in the expected ' +
199 'http://host:port format')
201 def __call__(self
, **fields
):
204 return '<%s>%s</%s>' % (k
, v
, k
)
208 # Prepare the request
210 self
.kw
['body'] = '\n'.join([tag(k
, v
) for k
, v
in
211 dict(self
.kw
['fields'].items() +
212 fields
.items()).items()
214 request
= self
.request
% self
.kw
215 headers
= {'Content-Type': 'text/xml',
216 'SOAPAction': 'http://tempuri.org/%(port)s' % self
.kw
}
218 # Set up the connection parameters, either directly or through
220 if self
.kw
['proxy'] is not None:
221 host
, port
= self
.splitproxy(self
.kw
['proxy'])
222 uri
= 'http://www1.radio-canada.ca/aspx/WSConsole/console.asmx'
224 host
, port
= ('www1.radio-canada.ca', 80)
225 uri
= '/aspx/WSConsole/console.asmx'
227 # Finally, time to connect
228 for i
in range(self
.kw
['retry']):
229 conn
= httplib
.HTTPConnection(host
, port
)
230 conn
.set_debuglevel((0,2)[self
.kw
['debug'] is True])
232 conn
.request('POST', uri
, request
, headers
)
234 ret
= XML2Py()(self
.Tee(conn
.getresponse(), tee
=self
.kw
['debug']))
236 except xml
.parsers
.expat
.ExpatError
:
240 def __init__(self
, debug
=False, proxy
=None, retry
= 3):
242 Initialize the console, setting the debug flag, http proxy,
243 and number of retries in case of malformed answer.
245 proxy is expected to be a string, of the usual http://host:port
246 format, following the unix-style http_proxy environment variable.
252 def __getattr__(self
, port
):
254 Call a SOAP port, as defined in Radio-Canada's spec:
256 http://www1.radio-canada.ca/aspx/WSConsole/console.asmx?WSDL
258 # We do support calls to arbitrary ports, but we also
259 # includes out-of-the-box values for a couple of them:
262 raise AttributeError('no attribute %s' % port
)
264 return self
.SOAPQuery(port
= port
, debug
= self
.debug
,
265 proxy
= self
.proxy
, retry
= self
.retry
,
266 **self
.queries
.get(port
,{'fields':{}}))
268 def GetList2Alt(self
, **kw
):
270 Special wrapper to GetList2 port: for the sake of uniformity,
271 it is significantly easier to deal with some medias iterable
272 (as with other ports such as GetListAujourdhui or
273 GetListSuggere): this is what this method supplies, by calling
274 the port by chunks of ten items, then spoonfeeding the result
277 for k
in ('NbrItem', 'StartItem'):
281 r
= self
.GetList2(NbrItem
=1, **kw
)
282 if 'Erreur' in r
and r
['Erreur'] is not None:
283 print >> sys
.stderr
, 'server error:', r
['Erreur']
284 if 'NbResultats' not in r
:
286 for i
in range(1, r
['NbResultats'], 10):
287 r
= self
.GetList2(StartItem
=i
, NbrItem
=10, **kw
)
288 for media
in r
['Medias']:
291 #-------------------------------------------------------------------------------
294 # Here is a couple of templates used by cli()...
296 templates
= {'verboseMedia' : '''CLip %(Pos)d (Media ID %(IDMedia)s)
297 Diffusion: %(HeureDiffusionStr)s %(DateDiffusionStr)s
298 Duration: %(Duree)s seconds
299 Broadcast: %(NomEmission)s (Broadcast ID %(IDEmission)s)
300 Description: [%(ExtraitTitre)s]
301 Network: %(NomReseau)s
302 Integral: %(IsIntegral)s
306 'linkMedia': '%(LienASX)s',
307 'broadcasts': '%(Nom)s (ID %(IDEmission)d)'
310 # ... And there is the templating "engine"
312 def apply_template(iterable
, template
=None, missing
= '<%s: N/A>',
313 items
=None, encoding
='utf-8', stream
=None):
315 Output some query results obtained from Console on file-like
316 stream (or sys.stdout if none given), after formatting them; the
317 items yield by the iterable are expected to support being mapped
320 See cli() for example on how it can be used.
322 if stream
is None: stream
= sys
.stdout
324 for i
, item
in enumerate(iterable
):
325 if items
is None or i
in items
:
327 print >> stream
, ('--- Clip %d ' % i
) + '-'*60
328 pprint
.pprint(item
, stream
=stream
)
331 print >>stream
, (template
%
332 Missing(item
, missing
)).encode(encoding
)
334 #-------------------------------------------------------------------------------
335 # Now, specify a simple CLI interface
339 Simple CLI interface covering the most useful/common cases (looks
340 at sys.argv to decide what to do).
343 # Generate and parse the command line
345 p
= optparse
.OptionParser(
346 usage
="""%prog [options]
348 Radio-Canada's Video on demand non-interactive command line interface.""",
350 Copyright (C) 2007 Sylvain Fourmanoit <syfou@users.sourceforge.net>.
351 This is free software; see the source for copying conditions. There is NO
352 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
353 '''.strip() % __version__
)
354 p
.add_option('-b', '--broadcasts',
355 dest
='list_broadcasts', action
="store_true", default
=False,
356 help='list all available broadcasts')
357 p
.add_option('-s', '--suggested',
358 dest
='list_suggested', action
="store_true", default
=False,
359 help='list suggested clips')
360 p
.add_option('-t', '--today',
361 dest
='list_today', action
='store_true', default
=False,
362 help='list today\'s clips')
363 p
.add_option('-q', '--query',
364 dest
='broadcast_id', default
=None,
365 help='''query clips for a given broadcast (use --broadcasts
366 for a list of all possible broadcast IDs)''')
367 p
.add_option('-k', '--search',
368 dest
='keywords', default
=None,
369 help='''query clips based on keyword search''')
370 p
.add_option('-d', '--date-offset',
371 dest
='days', default
=7,
372 help='''specify how far in time to look back (default is 7,
373 i.e. looking one week back): this only applies to
374 --query and --search requests''')
375 p
.add_option('-i', '--items',
376 dest
='items', default
=None,
377 help='''limit output to a list of comma-separated items,
378 based on their indexed position in the server
379 response (for instance, specifying "0, 2" will
380 make the script printout only the templated output
381 for the first and third item''')
382 p
.add_option('--template',
383 dest
='template', default
=None,
384 help='''manually specify output template to replace default
385 (advanced usage: read the code if ever you need
387 p
.add_option('--encoding',
388 dest
='encoding', default
='utf-8',
389 help='specify output console encoding (default: utf-8)')
390 p
.add_option('-r', '--raw',
391 dest
='raw', action
="store_true", default
=False,
392 help='''Force brute, detailed output (ignore any template):
393 work for all requests''')
394 p
.add_option('-a', '--asx',
395 dest
='asx', action
="store_true", default
=False,
396 help='''Force output of ASX links only (ignore any template):
397 make sense for all but --broadcasts requests''')
398 p
.add_option('-p', '--proxy',
399 dest
='http_proxy', default
=None,
400 help='''specify what http proxy to use, as a string of
401 the form "http://host:port". By default,
402 the content of environment variable http_proxy
403 is used, if set. If neither this option nor
404 the http_proxy variable is specified, the script
405 connects to Radio-Canada directly''')
406 p
.add_option('--retry',
407 dest
='retry', default
=3, type="int",
408 help='''specify how many times to resend a request in case of
409 a malformed XML answer. Default is 3 times.''')
410 p
.add_option('--debug',
411 dest
='debug', action
="store_true", default
=False,
412 help='''Send complete trace of client<->server transactions
413 on stdout (data sent and data received,
414 including headers)''')
416 opts
, args
= p
.parse_args()
418 # Initialize the console
420 if opts
.http_proxy
is None: opts
.http_proxy
= os
.getenv('http_proxy')
421 console
= Console(debug
=opts
.debug
, proxy
=opts
.http_proxy
, retry
=opts
.retry
)
423 # Set the various parameters based on mode
425 # Default keywords and templates...
427 template
= templates
['verboseMedia']
429 # Then, make adjustments based on invokation
431 if opts
.list_broadcasts
:
432 template
= templates
['broadcasts']
433 port
= console
.GetEmissions
434 elif opts
.list_suggested
:
435 port
= console
.GetListSuggere
436 elif opts
.list_today
:
437 port
= console
.GetListAujourdhui
438 elif opts
.broadcast_id
is not None or opts
.keywords
is not None:
439 if opts
.broadcast_id
is not None: kw
['IDEmission'] = opts
.broadcast_id
440 if opts
.keywords
is not None: kw
['MotsCle'] = opts
.keywords
441 kw
['DateOffset'] = opts
.days
442 port
= console
.GetList2Alt
445 ['no request specified (one of --broadcasts, --suggested,',
446 '--today, --query or --search), bailing out. See --help',
449 # Make last minutes adjustments to the template
451 if opts
.template
is not None: template
= opts
.template
452 if opts
.raw
: template
=None
453 if opts
.asx
: template
=templates
['linkMedia']
455 # And check for items output limitations
457 if opts
.items
is not None:
458 opts
.items
= [int(i
) for i
in opts
.items
.split(',')]
463 apply_template(port(**kw
), items
=opts
.items
,
464 template
=template
, encoding
=opts
.encoding
)
465 except RuntimeError, e
:
468 print >> sys
.stderr
, 'An error occured while processing the request:'
471 #-------------------------------------------------------------------------------
473 if __name__
== '__main__':