1 # -*- coding: utf-8 -*-
3 from rocksock
import Rocksock
, RocksockException
9 def _parse_errorcode(line
):
27 if url_l
.startswith('https://'):
31 elif url_l
.startswith('http://'):
35 elif url_l
.startswith('//'):
36 # can happen with a redirect
40 elif url_l
.startswith('/'):
41 # can happen with a redirect
47 if not '/' in url
: url
= url
+ '/'
50 return "", 0, False, url
53 fixed_amazon_redirect
= False
54 for i
in range(len(url
)):
56 if not fixed_amazon_redirect
:
57 url
= url
.replace('?','/?',True)
58 fixed_amazon_redirect
= True
64 port
= int(url
[port_index
:i
])
69 return host
, port
, ssl
, url
71 def _parse_content_type(line
):
76 if x
.lower().startswith('charset='):
77 cs
= x
[len('charset='):]
82 TEXTUAL_CONTENT_TYPES_LIST
= ['text/html', 'text/plain']
83 def _is_textual_content_type(ct
):
85 return ct
in TEXTUAL_CONTENT_TYPES_LIST
88 def __init__(self
, host
, port
=80, ssl
=False, follow_redirects
=False, \
89 auto_set_cookies
=False, keep_alive
=False, timeout
=60, \
90 user_agent
=None, proxies
=None, max_tries
=10, log_errors
=True, \
97 self
.follow_redirects
= follow_redirects
98 self
.redirect_counter
= 0
99 self
.auto_set_cookies
= auto_set_cookies
100 self
.keep_alive
= keep_alive
101 self
.timeout
= timeout
102 self
.user_agent
= user_agent
if user_agent
else 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
103 self
.proxies
= proxies
104 self
.cookies
= dict()
105 self
.max_tries
= max_tries
106 self
.log_errors
= log_errors
107 self
.last_rs_exception
= None
108 self
.verify_cert
=verify_cert
111 def get_last_rocksock_exception(self
):
112 return self
.last_rs_exception
114 def _err_log(self
, s
):
116 sys
.stderr
.write(s
+ '\n')
119 return self
.reconnect()
121 def _key_match(self
, want
, got
):
122 return want
.lower() == got
.lower()
124 def _make_request(self
, typ
, url
, extras
=None):
125 extras
= extras
if extras
else []
126 s
= typ
+ ' '+ url
+' HTTP/1.1\r\n'
127 if self
.port
!= 80 and self
.port
!= 443:
128 s
+= 'Host: %s:%d\r\n'%(self
.host
,self
.port
)
130 s
+= 'Host: %s\r\n'%(self
.host
)
132 s
+= 'Connection: keep-alive\r\n'
134 s
+= 'Connection: close\r\n'
135 s
+= 'Accept: */*\r\n'
136 s
+= 'Accept-Encoding: gzip, deflate\r\n'
137 s
+= 'User-Agent: %s\r\n'%self
.user_agent
139 for i
in self
.headers
:
143 for c
in self
.cookies
:
146 if self
.cookies
[c
] != '':
147 cs
+= c
+ '=' + self
.cookies
[c
]
151 s
+= 'Cookie: ' + cs
+ '\r\n'
154 if i
.startswith('p0$tD4ta:'):
165 def _make_head_request(self
, url
, extras
=None):
166 return self
._make
_request
('HEAD', url
, extras
)
168 def _make_get_request(self
, url
, extras
=None):
169 return self
._make
_request
('GET', url
, extras
)
171 def _make_post_request_raw(self
, url
, data
, extras
=None):
172 x
= extras
if extras
else []
173 x
.append('Content-Type: application/x-www-form-urlencoded')
174 x
.append('Content-Length: ' + str(len(data
)))
175 x
.append('p0$tD4ta:' + data
)
176 return self
._make
_request
('POST', url
, x
)
178 def _make_post_request(self
, url
, values
, extras
=None):
179 data
= urllib
.urlencode(values
)
180 return self
._make
_post
_request
_raw
(url
, data
, extras
)
182 def _try_gunzip(self
, data
):
184 res
= zlib
.decompress(data
, 16+zlib
.MAX_WBITS
)
186 except zlib
.error
as e
:
187 if 'incomplete' in e
.message
:
191 def _get_response(self
):
192 def parse_header_fields(line
):
193 if not ':' in line
: return line
.rstrip(' '), ""
194 if not ': ' in line
: return line
.split(':', 1)
195 return line
.split(': ', 1)
201 # some sites don't set content-length, -1 will cause to fetch as much as possible
205 #'HTTP/1.1 302 Found\r\n'
207 while not l
.startswith('HTTP/'):
208 l
= self
.conn
.recvline().strip()
210 foo
, code
, msg
= _parse_errorcode(l
)
212 l
= self
.conn
.recvline().strip()
215 key
, val
= parse_header_fields(l
)
216 if self
._key
_match
(key
, 'Transfer-Encoding') and 'chunked' in val
:
218 elif self
._key
_match
(key
, 'Set-Cookie') and self
.auto_set_cookies
:
220 elif self
._key
_match
(key
, 'Location'):
222 elif self
._key
_match
(key
, 'Content-Type'):
223 ct
, cs
= _parse_content_type(val
)
224 if cs
.lower() == 'utf-8':
225 if _is_textual_content_type(ct
):
227 elif self
._key
_match
(key
, 'Content-Encoding'):
230 elif val
== 'deflate':
232 elif self
._key
_match
(key
, 'Content-Length'):
235 if q
== -1 and code
>= 400 and code
< 600:
236 return (s
, res
, redirect
)
239 res
= self
.conn
.recv(q
)
242 l
= self
.conn
.recvline().strip().split(';', 1)
243 if(l
[0]) == '': break
245 data
= self
.conn
.recv(q
)
246 assert(len(data
) == q
)
248 crlf
= self
.conn
.recv(2)
249 assert(crlf
== '\r\n')
254 ec
, extr
= self
._try
_gunzip
(res
)
256 res
+= self
.conn
.recv(-1)
257 ec
, extr
= self
._try
_gunzip
(res
)
261 elif unzip
== 'deflate':
263 res
= zlib
.decompress(res
)
265 res
= zlib
.decompress(res
, -zlib
.MAX_WBITS
)
268 res
= res
.decode(charset
)
271 print "<<<\n", s
, res
273 return (s
, res
, redirect
)
277 while tries
< self
.max_tries
:
280 self
.conn
= Rocksock(host
=self
.host
, port
=self
.port
, proxies
=self
.proxies
, ssl
=self
.use_ssl
, timeout
=self
.timeout
, verifycert
=self
.verify_cert
)
283 except RocksockException
as e
:
284 self
.last_rs_exception
= e
285 if e
.errortype
== rocksock
.RS_ET_GAI
and e
.error
==-2:
286 # -2: Name does not resolve
287 self
.conn
.disconnect()
290 self
._err
_log
(e
.get_errormessage())
293 except socket
.gaierror
:
294 self
._err
_log
("gaie")
297 except ssl
.SSLError
as e
:
298 self
._err
_log
("ssle" + e
.reason
)
303 def _send_and_recv_i(self
, req
):
304 if self
._send
_raw
(req
):
305 return self
._get
_response
()
306 else: return "", "", ""
308 def _send_and_recv(self
, req
):
310 while tries
< self
.max_tries
:
312 a
= self
._catch
(self
._send
_and
_recv
_i
, None, req
)
313 if a
is not None: return a
316 def _catch(self
, func
, failret
, *args
):
319 except RocksockException
as e
:
320 self
.last_rs_exception
= e
321 self
.conn
.disconnect()
322 if not self
.reconnect(): return failret
324 self
.conn
.disconnect()
325 if not self
.reconnect(): return failret
327 self
.conn
.disconnect()
328 if not self
.reconnect(): return failret
330 self
.conn
.disconnect()
331 if not self
.reconnect(): return failret
334 def _send_raw(self
, req
):
335 if self
.conn
is None:
336 if not self
.reconnect(): return False
337 res
= self
.conn
.send(req
)
338 if res
is not False: return True
342 def get(self
, url
, extras
=None):
343 req
= self
._make
_get
_request
(url
, extras
)
344 hdr
, res
, redirect
= self
._send
_and
_recv
(req
)
346 if redirect
!= '' and self
.follow_redirects
:
348 self
.redirect_counter
+= 1
349 if self
.redirect_counter
> MAX_REDIRECTS
:
352 host
, port
, use_ssl
, url
= _parse_url(redirect
)
355 if port
!= -1: # -1: use existing port/ssl
357 self
.use_ssl
= use_ssl
358 self
.conn
.disconnect()
361 return self
.get(url
, extras
)
363 self
.redirect_counter
= 0
367 def _head_i(self
, url
, extras
=None):
368 req
= self
._make
_head
_request
(url
, extras
)
369 if not self
._send
_raw
(req
): return ""
372 #'HTTP/1.1 302 Found\r\n'
373 l
= self
.conn
.recvline().strip()
375 foo
, code
, msg
= _parse_errorcode(l
)
377 l
= self
.conn
.recvline().strip()
380 if self
.debugreq
: print "<<<\n", s
383 def head(self
, url
, extras
=None):
385 while tries
< self
.max_tries
:
387 res
= self
._catch
(self
._head
_i
, None, url
, extras
)
388 if res
is not None: return res
391 def post_raw(self
, url
, data
, extras
=None):
392 req
= self
._make
_post
_request
_raw
(url
, data
, extras
)
393 hdr
, res
, redirect
= self
._send
_and
_recv
(req
)
396 def post(self
, url
, values
, extras
=None):
397 req
= self
._make
_post
_request
(url
, values
, extras
)
398 hdr
, res
, redirect
= self
._send
_and
_recv
(req
)
401 def xhr_get(self
, url
):
402 return self
.get(url
, ['X-Requested-With: XMLHttpRequest'])
404 def xhr_post(self
, url
, values
={}):
405 return self
.post(url
, values
, ['X-Requested-With: XMLHttpRequest'])
407 def add_header(self
, s
):
408 # copy a header verbatim into each request, example:
409 # http.add_header("Referer: http://bbc.com")
410 self
.headers
.append(s
)
412 def add_headers(self
, lines
):
413 # copy a multi-line header chunk verbatim into each request:
414 for line
in lines
.split('\n'):
415 line
= line
.rstrip('\r')
416 if len(line
): self
.headers
.append(line
)
418 def set_cookie(self
, c
):
419 if c
.lower().startswith('set-cookie: '):
420 c
= c
[len('Set-Cookie: '):]
422 if j
== -1: j
= len(c
)
425 if i
== -1: i
= len(c
)
427 self
.cookies
[c
[:i
]] = s
430 if __name__
== '__main__':
431 url
= 'https://www.openssl.org/news/secadv/20170126.txt'
432 host
, port
, use_ssl
, uri
= _parse_url(url
)
433 http
= RsHttp(host
=host
, port
=port
, timeout
=15, ssl
=use_ssl
, follow_redirects
=True, auto_set_cookies
=True)
435 if not http
.connect():
436 print "sorry, couldn't connect"
439 hdr
, res
= http
.get(uri
)