nya
[archive.git] / Apkawa / for_torrents.ru / urllib2_file.py
blobf7e714b3762e054405a52f8500033bed6a53bfb5
1 #!/usr/bin/env python
2 ####
3 # Version: 0.2.0
4 # - UTF-8 filenames are now allowed (Eli Golovinsky)<br/>
5 # - File object is no more mandatory, Object only needs to have seek() read() attributes (Eli Golovinsky)<br/>
7 # Version: 0.1.0
8 # - upload is now done with chunks (Adam Ambrose)
10 # Version: older
11 # THANKS TO:
12 # bug fix: kosh @T aesaeion.com
13 # HTTPS support : Ryan Grow <ryangrow @T yahoo.com>
15 # Copyright (C) 2004,2005,2006 Fabien SEISEN
17 # This library is free software; you can redistribute it and/or
18 # modify it under the terms of the GNU Lesser General Public
19 # License as published by the Free Software Foundation; either
20 # version 2.1 of the License, or (at your option) any later version.
22 # This library is distributed in the hope that it will be useful,
23 # but WITHOUT ANY WARRANTY; without even the implied warranty of
24 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25 # Lesser General Public License for more details.
27 # You should have received a copy of the GNU Lesser General Public
28 # License along with this library; if not, write to the Free Software
29 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
31 # you can contact me at: <fabien@seisen.org>
32 # http://fabien.seisen.org/python/
34 # Also modified by Adam Ambrose (aambrose @T pacbell.net) to write data in
35 # chunks (hardcoded to CHUNK_SIZE for now), so the entire contents of the file
36 # don't need to be kept in memory.
38 """
39 enable to upload files using multipart/form-data
41 idea from:
42 upload files in python:
43 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/146306
45 timeoutsocket.py: overriding Python socket API:
46 http://www.timo-tasi.org/python/timeoutsocket.py
47 http://mail.python.org/pipermail/python-announce-list/2001-December/001095.html
49 import urllib2_files
50 import urllib2
51 u = urllib2.urlopen('http://site.com/path' [, data])
53 data can be a mapping object or a sequence of two-elements tuples
54 (like in original urllib2.urlopen())
55 varname still need to be a string and
56 value can be string of a file object
57 eg:
58 ((varname, value),
59 (varname2, value),
62 { name: value,
63 name2: value2
66 """
68 import os
69 import socket
70 import sys
71 import stat
72 import mimetypes
73 import mimetools
74 import httplib
75 import urllib
76 import urllib2
78 #CHUNK_SIZE = 65536
79 CHUNK_SIZE = 2048
80 PROGRESS = 0
82 def get_content_type(filename):
83 return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
85 # if sock is None, juste return the estimate size
86 def send_data(v_vars, v_files, boundary, sock=None):
87 l = 0
88 for (k, v) in v_vars:
89 buffer=''
90 buffer += '--%s\r\n' % boundary
91 buffer += 'Content-Disposition: form-data; name="%s"\r\n' % k
92 buffer += '\r\n'
93 buffer += v + '\r\n'
94 if sock:
95 sock.send(buffer)
96 l += len(buffer)
97 for (k, v) in v_files:
98 fd = v
99 file_size = os.fstat(fd.fileno())[stat.ST_SIZE]
100 name = fd.name.split('/')[-1]
101 if isinstance(name, unicode):
102 name = name.encode('UTF-8')
103 buffer=''
104 buffer += '--%s\r\n' % boundary
105 buffer += 'Content-Disposition: form-data; name="%s"; filename="%s"\r\n' \
106 % (k, name)
107 buffer += 'Content-Type: %s\r\n' % get_content_type(name)
108 buffer += 'Content-Length: %s\r\n' % file_size
109 buffer += '\r\n'
111 l += len(buffer)
113 sent = 0
114 if sock:
115 sock.send(buffer)
116 if hasattr(fd, 'seek'):
117 fd.seek(0)
118 while True:
119 chunk = fd.read(CHUNK_SIZE)
120 sent += len(chunk)
121 PROGRESS = int(float(sent)/ float(file_size)*100)
122 #global PROGRESS
123 if not chunk: break
124 sock.send(chunk)
126 l += file_size
127 buffer = '\r\n'
128 buffer += '--%s--\r\n' % boundary
129 buffer += '\r\n'
130 if sock:
131 sock.send(buffer)
132 l += len(buffer)
133 return l
135 # mainly a copy of HTTPHandler from urllib2
136 class newHTTPHandler(urllib2.BaseHandler):
137 def http_open(self, req):
138 return self.do_open(httplib.HTTP, req)
140 def do_open(self, http_class, req):
141 data = req.get_data()
142 v_files=[]
143 v_vars=[]
144 # mapping object (dict)
145 if req.has_data() and type(data) != str:
146 if hasattr(data, 'items'):
147 data = data.items()
148 else:
149 try:
150 if len(data) and not isinstance(data[0], tuple):
151 raise TypeError
152 except TypeError:
153 ty, va, tb = sys.exc_info()
154 raise TypeError, "not a valid non-string sequence or mapping object", tb
156 for (k, v) in data:
157 if hasattr(v, 'read'):
158 v_files.append((k, v))
159 else:
160 v_vars.append( (k, v) )
161 # no file ? convert to string
162 if len(v_vars) > 0 and len(v_files) == 0:
163 data = urllib.urlencode(v_vars)
164 v_files=[]
165 v_vars=[]
166 host = req.get_host()
167 if not host:
168 raise urllib2.URLError('no host given')
170 h = http_class(host) # will parse host:port
171 if req.has_data():
172 h.putrequest('POST', req.get_selector())
173 if not 'Content-type' in req.headers:
174 if len(v_files) > 0:
175 boundary = mimetools.choose_boundary()
176 l = send_data(v_vars, v_files, boundary)
177 h.putheader('Content-Type',
178 'multipart/form-data; boundary=%s' % boundary)
179 h.putheader('Content-length', str(l))
180 else:
181 h.putheader('Content-type',
182 'application/x-www-form-urlencoded')
183 if not 'Content-length' in req.headers:
184 h.putheader('Content-length', '%d' % len(data))
185 else:
186 h.putrequest('GET', req.get_selector())
188 scheme, sel = urllib.splittype(req.get_selector())
189 sel_host, sel_path = urllib.splithost(sel)
190 h.putheader('Host', sel_host or host)
191 for name, value in self.parent.addheaders:
192 name = name.capitalize()
193 if name not in req.headers:
194 h.putheader(name, value)
195 for k, v in req.headers.items():
196 h.putheader(k, v)
197 # httplib will attempt to connect() here. be prepared
198 # to convert a socket error to a URLError.
199 try:
200 h.endheaders()
201 except socket.error, err:
202 raise urllib2.URLError(err)
204 if req.has_data():
205 if len(v_files) >0:
206 l = send_data(v_vars, v_files, boundary, h)
207 elif len(v_vars) > 0:
208 # if data is passed as dict ...
209 data = urllib.urlencode(v_vars)
210 h.send(data)
211 else:
212 # "normal" urllib2.urlopen()
213 h.send(data)
215 code, msg, hdrs = h.getreply()
216 fp = h.getfile()
217 if code == 200:
218 resp = urllib.addinfourl(fp, hdrs, req.get_full_url())
219 resp.code = code
220 resp.msg = msg
221 return resp
222 else:
223 return self.parent.error('http', req, fp, code, msg, hdrs)
225 urllib2._old_HTTPHandler = urllib2.HTTPHandler
226 urllib2.HTTPHandler = newHTTPHandler
228 class newHTTPSHandler(newHTTPHandler):
229 def https_open(self, req):
230 return self.do_open(httplib.HTTPS, req)
232 urllib2.HTTPSHandler = newHTTPSHandler
234 if __name__ == '__main__':
235 import getopt
236 import urllib2
237 import urllib2_file
238 import string
239 import sys
241 def usage(progname):
242 print """
243 SYNTAX: %s -u url -f file [-v]
244 """ % progname
246 try:
247 opts, args = getopt.getopt(sys.argv[1:], 'hvu:f:')
248 except getopt.GetoptError, errmsg:
249 print "ERROR:", errmsg
250 sys.exit(1)
252 v_url = ''
253 v_verbose = 0
254 v_file = ''
256 for name, value in opts:
257 if name in ('-h',):
258 usage(sys.argv[0])
259 sys.exit(0)
260 elif name in ('-v',):
261 v_verbose += 1
262 elif name in ('-u',):
263 v_url = value
264 elif name in ('-f',):
265 v_file = value
266 else:
267 print "invalid argument:", name
268 sys.exit(2)
270 error = 0
271 if v_url == '':
272 print "need -u"
273 error += 1
274 if v_file == '':
275 print "need -f"
276 error += 1
278 if error > 0:
279 sys.exit(3)
281 fd = open(v_file, 'r')
282 data = {
283 'filename' : fd,
285 # u = urllib2.urlopen(v_url, data)
286 req = urllib2.Request(v_url, data, {})
287 try:
288 u = urllib2.urlopen(req)
289 except urllib2.HTTPError, errobj:
290 print "HTTPError:", errobj.code
292 else:
293 buf = u.read()
294 print "OK"