3 Example script to control a Scrapy server using its JSON-RPC web service.
5 It only provides a reduced functionality as its main purpose is to illustrate
6 how to write a web service client. Feel free to improve or write you own.
8 Also, keep in mind that the JSON-RPC API is not stable. The recommended way for
9 controlling a Scrapy server is through the execution queue (see the "queue"
14 import sys
, optparse
, urllib
15 from urlparse
import urljoin
17 from scrapy
.utils
.jsonrpc
import jsonrpc_client_call
, JsonRpcError
18 from scrapy
.utils
.py26
import json
25 'list-available': cmd_list_available
,
26 'list-running': cmd_list_running
,
27 'list-resources': cmd_list_resources
,
28 'get-global-stats': cmd_get_global_stats
,
29 'get-spider-stats': cmd_get_spider_stats
,
32 def cmd_help(args
, opts
):
33 """help - list available commands"""
34 print "Available commands:"
35 for _
, func
in sorted(get_commands().items()):
36 print " ", func
.__doc
__
38 def cmd_run(args
, opts
):
39 """run <spider_name> - schedule spider for running"""
40 jsonrpc_call(opts
, 'crawler/queue', 'append_spider_name', args
[0])
42 def cmd_stop(args
, opts
):
43 """stop <spider> - stop a running spider"""
44 jsonrpc_call(opts
, 'crawler/engine', 'close_spider', args
[0])
46 def cmd_list_running(args
, opts
):
47 """list-running - list running spiders"""
48 for x
in json_get(opts
, 'crawler/engine/open_spiders'):
51 def cmd_list_available(args
, opts
):
52 """list-available - list name of available spiders"""
53 for x
in jsonrpc_call(opts
, 'crawler/spiders', 'list'):
56 def cmd_list_resources(args
, opts
):
57 """list-resources - list available web service resources"""
58 for x
in json_get(opts
, '')['resources']:
61 def cmd_get_spider_stats(args
, opts
):
62 """get-spider-stats <spider> - get stats of a running spider"""
63 stats
= jsonrpc_call(opts
, 'stats', 'get_stats', args
[0])
64 for name
, value
in stats
.items():
65 print "%-40s %s" % (name
, value
)
67 def cmd_get_global_stats(args
, opts
):
68 """get-global-stats - get global stats"""
69 stats
= jsonrpc_call(opts
, 'stats', 'get_stats')
70 for name
, value
in stats
.items():
71 print "%-40s %s" % (name
, value
)
73 def get_wsurl(opts
, path
):
74 return urljoin("http://%s:%s/"% (opts
.host
, opts
.port
), path
)
76 def jsonrpc_call(opts
, path
, method
, *args
, **kwargs
):
77 url
= get_wsurl(opts
, path
)
78 return jsonrpc_client_call(url
, method
, *args
, **kwargs
)
80 def json_get(opts
, path
):
81 url
= get_wsurl(opts
, path
)
82 return json
.loads(urllib
.urlopen(url
).read())
85 usage
= "%prog [options] <command> [arg] ..."
86 description
= "Scrapy web service control script. Use '%prog help' " \
87 "to see the list of available commands."
88 op
= optparse
.OptionParser(usage
=usage
, description
=description
)
89 op
.add_option("-H", dest
="host", default
="localhost", \
90 help="Scrapy host to connect to")
91 op
.add_option("-P", dest
="port", type="int", default
=6080, \
92 help="Scrapy port to connect to")
93 opts
, args
= op
.parse_args()
97 cmdname
, cmdargs
, opts
= args
[0], args
[1:], opts
98 commands
= get_commands()
99 if cmdname
not in commands
:
100 sys
.stderr
.write("Unknown command: %s\n\n" % cmdname
)
103 return commands
[cmdname
], cmdargs
, opts
106 cmd
, args
, opts
= parse_opts()
111 except JsonRpcError
, e
:
114 print "Server Traceback below:"
118 if __name__
== '__main__':