Document xapian-compact --blocksize takes an argument
[xapian.git] / xapian-core / tests / urllib2_file.py
blob4e762275ca3071833ba6c84b00e6bd70657c43a1
1 #!/usr/bin/env python
2 ####
3 # Version: 0.2.0
4 # - UTF-8 filenames are now allowed (Eli Golovinsky)<br/>
5 # - File object is no more mandatory, Object only needs to have seek() read() attributes (Eli Golovinsky)<br/>
7 # Version: 0.1.0
8 # - upload is now done with chunks (Adam Ambrose)
10 # Version: older
11 # THANKS TO:
12 # bug fix: kosh @T aesaeion.com
13 # HTTPS support : Ryan Grow <ryangrow @T yahoo.com>
15 # Copyright (C) 2004,2005,2006 Fabien SEISEN
17 # This library is free software; you can redistribute it and/or
18 # modify it under the terms of the GNU Lesser General Public
19 # License as published by the Free Software Foundation; either
20 # version 2.1 of the License, or (at your option) any later version.
22 # This library is distributed in the hope that it will be useful,
23 # but WITHOUT ANY WARRANTY; without even the implied warranty of
24 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25 # Lesser General Public License for more details.
27 # You should have received a copy of the GNU Lesser General Public
28 # License along with this library; if not, write to the Free Software
29 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
31 # you can contact me at: <fabien@seisen.org>
32 # http://fabien.seisen.org/python/
34 # Also modified by Adam Ambrose (aambrose @T pacbell.net) to write data in
35 # chunks (hardcoded to CHUNK_SIZE for now), so the entire contents of the file
36 # don't need to be kept in memory.
38 """
39 enable to upload files using multipart/form-data
41 idea from:
42 upload files in python:
43 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/146306
45 timeoutsocket.py: overriding Python socket API:
46 http://www.timo-tasi.org/python/timeoutsocket.py
47 http://mail.python.org/pipermail/python-announce-list/2001-December/001095.html
49 import urllib2_file
50 import urllib2
51 u = urllib2.urlopen('http://site.com/path' [, data])
53 data can be a mapping object or a sequence of two-elements tuples
54 (like in original urllib2.urlopen())
55 varname still need to be a string and
56 value can be string of a file object
57 eg:
58 ((varname, value),
59 (varname2, value),
62 { name: value,
63 name2: value2
66 """
68 import os
69 import socket
70 import sys
71 import stat
72 import mimetypes
73 import mimetools
74 import httplib
75 import urllib
76 import urllib2
78 CHUNK_SIZE = 65536
80 def get_content_type(filename):
81 return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
83 # if sock is None, juste return the estimate size
84 def send_data(v_vars, v_files, boundary, sock=None):
85 l = 0
86 for (k, v) in v_vars:
87 buffer=''
88 buffer += '--%s\r\n' % boundary
89 buffer += 'Content-Disposition: form-data; name="%s"\r\n' % k
90 buffer += '\r\n'
91 buffer += v + '\r\n'
92 if sock:
93 sock.send(buffer)
94 l += len(buffer)
95 for (k, v) in v_files:
96 fd = v
97 file_size = os.fstat(fd.fileno())[stat.ST_SIZE]
98 name = fd.name.split('/')[-1]
99 if isinstance(name, unicode):
100 name = name.encode('UTF-8')
101 buffer=''
102 buffer += '--%s\r\n' % boundary
103 buffer += 'Content-Disposition: form-data; name="%s"; filename="%s"\r\n' \
104 % (k, name)
105 buffer += 'Content-Type: %s\r\n' % get_content_type(name)
106 buffer += 'Content-Length: %s\r\n' % file_size
107 buffer += '\r\n'
109 l += len(buffer)
110 if sock:
111 sock.send(buffer)
112 if hasattr(fd, 'seek'):
113 fd.seek(0)
114 while True:
115 chunk = fd.read(CHUNK_SIZE)
116 if not chunk: break
117 sock.send(chunk)
119 l += file_size
120 buffer='\r\n'
121 buffer += '--%s--\r\n' % boundary
122 buffer += '\r\n'
123 if sock:
124 sock.send(buffer)
125 l += len(buffer)
126 return l
128 # mainly a copy of HTTPHandler from urllib2
129 class newHTTPHandler(urllib2.BaseHandler):
130 def http_open(self, req):
131 return self.do_open(httplib.HTTP, req)
133 def do_open(self, http_class, req):
134 data = req.get_data()
135 v_files=[]
136 v_vars=[]
137 # mapping object (dict)
138 if req.has_data() and type(data) != str:
139 if hasattr(data, 'items'):
140 data = data.items()
141 else:
142 try:
143 if len(data) and not isinstance(data[0], tuple):
144 raise TypeError
145 except TypeError:
146 ty, va, tb = sys.exc_info()
147 raise TypeError, "not a valid non-string sequence or mapping object", tb
149 for (k, v) in data:
150 if hasattr(v, 'read'):
151 v_files.append((k, v))
152 else:
153 v_vars.append( (k, v) )
154 # no file ? convert to string
155 if len(v_vars) > 0 and len(v_files) == 0:
156 data = urllib.urlencode(v_vars)
157 v_files=[]
158 v_vars=[]
159 host = req.get_host()
160 if not host:
161 raise urllib2.URLError('no host given')
163 h = http_class(host) # will parse host:port
164 if req.has_data():
165 h.putrequest('POST', req.get_selector())
166 if not 'Content-type' in req.headers:
167 if len(v_files) > 0:
168 boundary = mimetools.choose_boundary()
169 l = send_data(v_vars, v_files, boundary)
170 h.putheader('Content-Type',
171 'multipart/form-data; boundary=%s' % boundary)
172 h.putheader('Content-length', str(l))
173 else:
174 h.putheader('Content-type',
175 'application/x-www-form-urlencoded')
176 if not 'Content-length' in req.headers:
177 h.putheader('Content-length', '%d' % len(data))
178 else:
179 h.putrequest('GET', req.get_selector())
181 scheme, sel = urllib.splittype(req.get_selector())
182 sel_host, sel_path = urllib.splithost(sel)
183 h.putheader('Host', sel_host or host)
184 for name, value in self.parent.addheaders:
185 name = name.capitalize()
186 if name not in req.headers:
187 h.putheader(name, value)
188 for k, v in req.headers.items():
189 h.putheader(k, v)
190 # httplib will attempt to connect() here. be prepared
191 # to convert a socket error to a URLError.
192 try:
193 h.endheaders()
194 except socket.error, err:
195 raise urllib2.URLError(err)
197 if req.has_data():
198 if len(v_files) >0:
199 l = send_data(v_vars, v_files, boundary, h)
200 elif len(v_vars) > 0:
201 # if data is passed as dict ...
202 data = urllib.urlencode(v_vars)
203 h.send(data)
204 else:
205 # "normal" urllib2.urlopen()
206 h.send(data)
208 code, msg, hdrs = h.getreply()
209 fp = h.getfile()
210 if code == 200:
211 resp = urllib.addinfourl(fp, hdrs, req.get_full_url())
212 resp.code = code
213 resp.msg = msg
214 return resp
215 else:
216 return self.parent.error('http', req, fp, code, msg, hdrs)
218 urllib2._old_HTTPHandler = urllib2.HTTPHandler
219 urllib2.HTTPHandler = newHTTPHandler
221 class newHTTPSHandler(newHTTPHandler):
222 def https_open(self, req):
223 return self.do_open(httplib.HTTPS, req)
225 urllib2.HTTPSHandler = newHTTPSHandler
227 if __name__ == '__main__':
228 import getopt
229 import urllib2
230 import urllib2_file
231 import string
232 import sys
234 def usage(progname):
235 print """
236 SYNTAX: %s -u url -f file [-v]
237 """ % progname
239 try:
240 opts, args = getopt.getopt(sys.argv[1:], 'hvu:f:')
241 except getopt.GetoptError, errmsg:
242 print "ERROR:", errmsg
243 sys.exit(1)
245 v_url = ''
246 v_verbose = 0
247 v_file = ''
249 for name, value in opts:
250 if name in ('-h',):
251 usage(sys.argv[0])
252 sys.exit(0)
253 elif name in ('-v',):
254 v_verbose += 1
255 elif name in ('-u',):
256 v_url = value
257 elif name in ('-f',):
258 v_file = value
259 else:
260 print "invalid argument:", name
261 sys.exit(2)
263 error = 0
264 if v_url == '':
265 print "need -u"
266 error += 1
267 if v_file == '':
268 print "need -f"
269 error += 1
271 if error > 0:
272 sys.exit(3)
274 fd = open(v_file, 'r')
275 data = {
276 'filename' : fd,
278 # u = urllib2.urlopen(v_url, data)
279 req = urllib2.Request(v_url, data, {})
280 try:
281 u = urllib2.urlopen(req)
282 except urllib2.HTTPError, errobj:
283 print "HTTPError:", errobj.code
285 else:
286 buf = u.read()
287 print "OK"