added tests for SpiderState extension
[scrapy.git] / extras / scrapy-ws.py
blob6750947089e661a9680deaa906cb4d1ac53a9bc8
1 #!/usr/bin/env python
2 """
3 Example script to control a Scrapy server using its JSON-RPC web service.
5 It only provides a reduced functionality as its main purpose is to illustrate
6 how to write a web service client. Feel free to improve or write you own.
8 Also, keep in mind that the JSON-RPC API is not stable. The recommended way for
9 controlling a Scrapy server is through the execution queue (see the "queue"
10 command).
12 """
14 import sys, optparse, urllib
15 from urlparse import urljoin
17 from scrapy.utils.jsonrpc import jsonrpc_client_call, JsonRpcError
18 from scrapy.utils.py26 import json
20 def get_commands():
21 return {
22 'help': cmd_help,
23 'run': cmd_run,
24 'stop': cmd_stop,
25 'list-available': cmd_list_available,
26 'list-running': cmd_list_running,
27 'list-resources': cmd_list_resources,
28 'get-global-stats': cmd_get_global_stats,
29 'get-spider-stats': cmd_get_spider_stats,
32 def cmd_help(args, opts):
33 """help - list available commands"""
34 print "Available commands:"
35 for _, func in sorted(get_commands().items()):
36 print " ", func.__doc__
38 def cmd_run(args, opts):
39 """run <spider_name> - schedule spider for running"""
40 jsonrpc_call(opts, 'crawler/queue', 'append_spider_name', args[0])
42 def cmd_stop(args, opts):
43 """stop <spider> - stop a running spider"""
44 jsonrpc_call(opts, 'crawler/engine', 'close_spider', args[0])
46 def cmd_list_running(args, opts):
47 """list-running - list running spiders"""
48 for x in json_get(opts, 'crawler/engine/open_spiders'):
49 print x
51 def cmd_list_available(args, opts):
52 """list-available - list name of available spiders"""
53 for x in jsonrpc_call(opts, 'crawler/spiders', 'list'):
54 print x
56 def cmd_list_resources(args, opts):
57 """list-resources - list available web service resources"""
58 for x in json_get(opts, '')['resources']:
59 print x
61 def cmd_get_spider_stats(args, opts):
62 """get-spider-stats <spider> - get stats of a running spider"""
63 stats = jsonrpc_call(opts, 'stats', 'get_stats', args[0])
64 for name, value in stats.items():
65 print "%-40s %s" % (name, value)
67 def cmd_get_global_stats(args, opts):
68 """get-global-stats - get global stats"""
69 stats = jsonrpc_call(opts, 'stats', 'get_stats')
70 for name, value in stats.items():
71 print "%-40s %s" % (name, value)
73 def get_wsurl(opts, path):
74 return urljoin("http://%s:%s/"% (opts.host, opts.port), path)
76 def jsonrpc_call(opts, path, method, *args, **kwargs):
77 url = get_wsurl(opts, path)
78 return jsonrpc_client_call(url, method, *args, **kwargs)
80 def json_get(opts, path):
81 url = get_wsurl(opts, path)
82 return json.loads(urllib.urlopen(url).read())
84 def parse_opts():
85 usage = "%prog [options] <command> [arg] ..."
86 description = "Scrapy web service control script. Use '%prog help' " \
87 "to see the list of available commands."
88 op = optparse.OptionParser(usage=usage, description=description)
89 op.add_option("-H", dest="host", default="localhost", \
90 help="Scrapy host to connect to")
91 op.add_option("-P", dest="port", type="int", default=6080, \
92 help="Scrapy port to connect to")
93 opts, args = op.parse_args()
94 if not args:
95 op.print_help()
96 sys.exit(2)
97 cmdname, cmdargs, opts = args[0], args[1:], opts
98 commands = get_commands()
99 if cmdname not in commands:
100 sys.stderr.write("Unknown command: %s\n\n" % cmdname)
101 cmd_help(None, None)
102 sys.exit(1)
103 return commands[cmdname], cmdargs, opts
105 def main():
106 cmd, args, opts = parse_opts()
107 try:
108 cmd(args, opts)
109 except IndexError:
110 print cmd.__doc__
111 except JsonRpcError, e:
112 print str(e)
113 if e.data:
114 print "Server Traceback below:"
115 print e.data
118 if __name__ == '__main__':
119 main()