fix picture fetching
[rofl0r-twatscrape.git] / rocksock.py
blob0ed0ffa63840309e5784ca06df5e26b92d927b1c
1 # rocksock socket library routines for python.
2 # Copyright (C) 2018-2020 rofl0r
4 # This library is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU Lesser General Public
6 # License as published by the Free Software Foundation; either
7 # version 2.1 of the License, or (at your option) any later version.
9 # This library is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 # Lesser General Public License for more details.
14 # You should have received a copy of the GNU Lesser General Public
15 # License along with this library; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 # you can find the full license text at
19 # https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html
21 import socket, ssl, select, copy, errno
23 # rs_proxyType
24 RS_PT_NONE = 0
25 RS_PT_SOCKS4 = 1
26 RS_PT_SOCKS5 = 2
27 RS_PT_HTTP = 3
29 # rs_errorType
30 RS_ET_OWN = 0 # rocksock-specific error
31 RS_ET_SYS = 1 # system error with errno
32 RS_ET_GAI = 2 # dns resolution subsystem error
33 RS_ET_SSL = 3 # ssl subsystem error
35 # rs_error
36 RS_E_NO_ERROR = 0
37 RS_E_NULL = 1
38 RS_E_EXCEED_PROXY_LIMIT = 2
39 RS_E_NO_SSL = 3
40 RS_E_NO_SOCKET = 4
41 RS_E_HIT_TIMEOUT = 5
42 RS_E_OUT_OF_BUFFER = 6
43 RS_E_SSL_GENERIC = 7
44 RS_E_SOCKS4_NOAUTH = 8
45 RS_E_SOCKS5_AUTH_EXCEEDSIZE = 9
46 RS_E_SOCKS4_NO_IP6 = 10
47 RS_E_PROXY_UNEXPECTED_RESPONSE = 11
48 RS_E_TARGETPROXY_CONNECT_FAILED = 12
49 RS_E_PROXY_AUTH_FAILED = 13
50 RS_E_HIT_READTIMEOUT = 14
51 RS_E_HIT_WRITETIMEOUT = 15
52 RS_E_HIT_CONNECTTIMEOUT = 16
53 RS_E_PROXY_GENERAL_FAILURE = 17
54 RS_E_TARGET_NET_UNREACHABLE = 18
55 RS_E_TARGETPROXY_NET_UNREACHABLE = 18
56 RS_E_TARGET_HOST_UNREACHABLE = 19
57 RS_E_TARGETPROXY_HOST_UNREACHABLE = 19
58 RS_E_TARGET_CONN_REFUSED = 20
59 RS_E_TARGETPROXY_CONN_REFUSED = 20
60 RS_E_TARGET_TTL_EXPIRED = 21
61 RS_E_TARGETPROXY_TTL_EXPIRED = 21
62 RS_E_PROXY_COMMAND_NOT_SUPPORTED = 22
63 RS_E_PROXY_ADDRESSTYPE_NOT_SUPPORTED = 23
64 RS_E_REMOTE_DISCONNECTED = 24
65 RS_E_NO_PROXYSTORAGE = 25
66 RS_E_HOSTNAME_TOO_LONG = 26
67 RS_E_INVALID_PROXY_URL = 27
68 RS_E_SSL_CERTIFICATE_ERROR = 28
70 class RocksockException(Exception):
71 def __init__(self, error, failedproxy=None, errortype=RS_ET_OWN, *args, **kwargs):
72 Exception.__init__(self,*args,**kwargs)
73 self.error = error
74 self.errortype = errortype
75 self.failedproxy = failedproxy
77 def get_failedproxy(self):
78 return self.failedproxy
80 def get_error(self):
81 return self.error
83 def get_errortype(self):
84 return self.errortype
86 def reraise(self):
87 import sys
88 ei = sys.exc_info()
89 raise(ei[0], ei[1], ei[2])
90 # import traceback, sys
91 # traceback.print_exc(file=sys.stderr)
92 # raise(self)
94 def get_errormessage(self):
95 errordict = {
96 RS_E_NO_ERROR : "no error",
97 RS_E_NULL: "NULL pointer passed",
98 RS_E_EXCEED_PROXY_LIMIT: "exceeding maximum number of proxies",
99 RS_E_NO_SSL: "can not establish SSL connection, since library was not compiled with USE_SSL define",
100 RS_E_NO_SOCKET: "socket is not set up, maybe you should call connect first",
101 RS_E_HIT_TIMEOUT: "timeout reached on operation",
102 RS_E_OUT_OF_BUFFER: "supplied buffer is too small",
103 RS_E_SSL_GENERIC: "generic SSL error", # the C version uses this error when the SSL library does not report any specific error, otherwise errortype SSL will be set and the SSL errorcode be used
104 RS_E_SOCKS4_NOAUTH:"SOCKS4 authentication not implemented",
105 RS_E_SOCKS5_AUTH_EXCEEDSIZE: "maximum length for SOCKS5 servername/password/username is 255",
106 RS_E_SOCKS4_NO_IP6: "SOCKS4 is not compatible with IPv6",
107 RS_E_PROXY_UNEXPECTED_RESPONSE: "the proxy sent an unexpected response",
108 RS_E_TARGETPROXY_CONNECT_FAILED: "could not connect to target proxy",
109 RS_E_PROXY_AUTH_FAILED: "proxy authentication failed or authd not enabled",
110 RS_E_HIT_READTIMEOUT : "timeout reached on read operation",
111 RS_E_HIT_WRITETIMEOUT : "timeout reached on write operation",
112 RS_E_HIT_CONNECTTIMEOUT : "timeout reached on connect operation",
113 RS_E_PROXY_GENERAL_FAILURE : "proxy general failure",
114 RS_E_TARGETPROXY_NET_UNREACHABLE : "proxy-target: net unreachable",
115 RS_E_TARGETPROXY_HOST_UNREACHABLE : "proxy-target: host unreachable",
116 RS_E_TARGETPROXY_CONN_REFUSED : "proxy-target: connection refused",
117 RS_E_TARGETPROXY_TTL_EXPIRED : "proxy-target: TTL expired",
118 RS_E_PROXY_COMMAND_NOT_SUPPORTED : "proxy: command not supported",
119 RS_E_PROXY_ADDRESSTYPE_NOT_SUPPORTED : "proxy: addresstype not supported",
120 RS_E_REMOTE_DISCONNECTED : "remote socket closed connection",
121 RS_E_NO_PROXYSTORAGE : "no proxy storage assigned",
122 RS_E_HOSTNAME_TOO_LONG : "hostname exceeds 255 chars",
123 RS_E_INVALID_PROXY_URL : "invalid proxy URL string",
124 RS_E_SSL_CERTIFICATE_ERROR : "certificate check error"
126 if self.errortype == RS_ET_SYS:
127 if self.error in errno.errorcode:
128 msg = "ERRNO: " + errno.errorcode[self.error]
129 else:
130 msg = "ERRNO: invalid errno: " + str(self.error)
131 elif self.errortype == RS_ET_GAI:
132 msg = "GAI: " + self.failedproxy
133 elif self.errortype == RS_ET_SSL:
134 msg = errordict[self.error]
135 if (self.error == RS_E_SSL_GENERIC or self.error == RS_E_SSL_CERTIFICATE_ERROR) and self.failedproxy != None:
136 msg += ': ' + self.failedproxy #failedproxy is repurposed for SSL exceptions
137 else: #RS_ET_OWN
138 msg = errordict[self.error] + " (proxy %d)"%self.failedproxy
139 return msg
142 class RocksockHostinfo():
143 def __init__(self, host, port):
144 if port < 0 or port > 65535:
145 raise(RocksockException(RS_E_INVALID_PROXY_URL, failedproxy=-1))
146 self.host = host
147 self.port = port
149 def RocksockHostinfoFromString(s):
150 host, port = s.split(':')
151 return RocksockHostinfo(host, port)
153 def isnumericipv4(ip):
154 try:
155 a,b,c,d = ip.split('.')
156 if int(a) < 256 and int(b) < 256 and int(c) < 256 and int(d) < 256:
157 return True
158 return False
159 except:
160 return False
162 def resolve(hostinfo, want_v4=True):
163 if isnumericipv4(hostinfo.host):
164 return socket.AF_INET, (hostinfo.host, hostinfo.port)
165 try:
166 for res in socket.getaddrinfo(hostinfo.host, hostinfo.port, \
167 socket.AF_UNSPEC, socket.SOCK_STREAM, 0, socket.AI_PASSIVE):
168 af, socktype, proto, canonname, sa = res
169 if want_v4 and af != socket.AF_INET: continue
170 if af != socket.AF_INET and af != socket.AF_INET6: continue
171 else: return af, sa
173 except socket.gaierror as e:
174 eno, str = e.args
175 raise(RocksockException(eno, str, errortype=RS_ET_GAI))
177 return None, None
180 class RocksockProxy():
181 def __init__(self, host, port, type, username = None, password=None, **kwargs):
182 typemap = { 'none' : RS_PT_NONE,
183 'socks4' : RS_PT_SOCKS4,
184 'socks5' : RS_PT_SOCKS5,
185 'http' : RS_PT_HTTP }
186 self.type = typemap[type] if type in typemap else type
187 if not self.type in [RS_PT_NONE, RS_PT_SOCKS4, RS_PT_SOCKS5, RS_PT_HTTP]:
188 raise(ValueError('Invalid proxy type'))
189 self.username = username
190 self.password = password
191 self.hostinfo = RocksockHostinfo(host, port)
193 def RocksockProxyFromURL(url):
194 # valid URL: socks5://[user:pass@]hostname:port
195 x = url.find('://')
196 if x == -1: return None
197 t = url[:x]
198 url = url[x+len('://'):]
199 x = url.rfind(':')
200 if x == -1: return None # port is obligatory
201 port = int(url[x+len(':'):]) #TODO: catch exception when port is non-numeric
202 url = url[:x]
203 x = url.rfind('@')
204 if x != -1:
205 u, p = url[:x].split(':')
206 url = url[x+len('@'):]
207 else:
208 u, p = (None, None)
209 return RocksockProxy(host=url, port=port, type=t, username=u, password=p)
212 class Rocksock():
213 def __init__(self, host=None, port=0, verifycert=False, timeout=0, proxies=None, **kwargs):
214 if 'ssl' in kwargs and kwargs['ssl'] == True:
215 self.sslcontext = ssl.create_default_context()
216 self.sslcontext.check_hostname = False
217 self.sslcontext.verify_mode = ssl.CERT_NONE
218 if verifycert:
219 self.sslcontext.verify_mode = ssl.CERT_OPTIONAL
220 self.sslcontext.check_hostname = True
221 else:
222 self.sslcontext = None
223 self.proxychain = []
224 if proxies is not None:
225 for p in proxies:
226 if isinstance(p, basestring):
227 self.proxychain.append(RocksockProxyFromURL(p))
228 else:
229 self.proxychain.append(p)
230 target = RocksockProxy(host, port, RS_PT_NONE)
231 self.proxychain.append(target)
232 self.sock = None
233 self.timeout = timeout
235 def _translate_socket_error(self, e, pnum):
236 fp = self._failed_proxy(pnum)
237 if e.errno == errno.ECONNREFUSED:
238 return RocksockException(RS_E_TARGET_CONN_REFUSED, failedproxy=fp)
239 return RocksockException(e.errno, errortype=RS_ET_SYS, failedproxy=fp)
241 def _failed_proxy(self, pnum):
242 if pnum < 0: return -1
243 if pnum >= len(self.proxychain)-1: return -1
244 return pnum
246 def connect(self):
248 af, sa = resolve(self.proxychain[0].hostinfo, True)
249 try:
250 x = af+1
251 except TypeError:
252 raise(RocksockException(-3, "unexpected problem resolving DNS, try again", failedproxy=self._failed_proxy(0), errortype=RS_ET_GAI))
253 # print("GOT A WEIRD AF")
254 # print(af)
255 # raise(RocksockException(-6666, af, errortype=RS_ET_GAI))
257 self.sock = socket.socket(af, socket.SOCK_STREAM)
258 self.sock.settimeout(None if self.timeout == 0 else self.timeout)
259 try:
260 self.sock.connect((sa[0], sa[1]))
261 except socket.timeout:
262 raise(RocksockException(RS_E_HIT_TIMEOUT, failedproxy=self._failed_proxy(0)))
263 except socket.error as e:
264 raise(self._translate_socket_error(e, 0))
266 for pnum in range(1, len(self.proxychain)):
267 curr = self.proxychain[pnum]
268 prev = self.proxychain[pnum-1]
269 self._connect_step(pnum)
271 if self.sslcontext:
272 try:
273 self.sock = self.sslcontext.wrap_socket(self.sock, server_hostname=self.proxychain[len(self.proxychain)-1].hostinfo.host)
274 except ssl.CertificateError as e:
275 reason = self._get_ssl_exception_reason(e)
276 raise(RocksockException(RS_E_SSL_CERTIFICATE_ERROR, failedproxy=reason, errortype=RS_ET_SSL))
277 except ssl.SSLError as e:
278 reason = self._get_ssl_exception_reason(e)
279 #if hasattr(e, 'library'): subsystem = e.library
280 raise(RocksockException(RS_E_SSL_GENERIC, failedproxy=reason, errortype=RS_ET_SSL))
281 except socket.error as e:
282 raise(self._translate_socket_error(e, -1))
283 except Exception as e:
284 raise(e)
286 while True:
287 try:
288 self.sock.do_handshake()
289 break
290 except ssl.SSLWantReadError:
291 select.select([self.sock], [], [])
292 except ssl.SSLWantWriteError:
293 select.select([], [self.sock], [])
297 def disconnect(self):
298 if self.sock is None: return
299 try:
300 self.sock.shutdown(socket.SHUT_RDWR)
301 except socket.error:
302 pass
303 self.sock.close()
304 self.sock = None
306 def canread(self):
307 return select.select([self.sock], [], [], 0)[0]
309 def send(self, buf, pnum=-1):
310 if self.sock is None:
311 raise(RocksockException(RS_E_NO_SOCKET, failedproxy=self._failed_proxy(pnum)))
312 try:
313 return self.sock.sendall(buf)
314 except socket.error as e:
315 raise(self._translate_socket_error(e, pnum))
317 def _get_ssl_exception_reason(self, e):
318 s = ''
319 if hasattr(e, 'reason'): s = e.reason
320 elif hasattr(e, 'message'): s = e.message
321 elif hasattr(e, 'args'): s = e.args[0]
322 return s
324 def recv(self, count=-1, pnum=-1):
325 data = ''
326 while count:
327 try:
328 n = count if count != -1 else 4096
329 if n >= 1024*1024: n = 1024*1024
330 chunk = self.sock.recv(n)
331 except socket.timeout:
332 raise(RocksockException(RS_E_HIT_TIMEOUT, failedproxy=self._failed_proxy(pnum)))
333 except socket.error as e:
334 raise(self._translate_socket_error(e, pnum))
335 except ssl.SSLError as e:
336 s = self._get_ssl_exception_reason(e)
337 if s == 'The read operation timed out':
338 raise(RocksockException(RS_E_HIT_READTIMEOUT, failedproxy=self._failed_proxy(pnum)))
339 else:
340 raise(RocksockException(RS_E_SSL_GENERIC, failedproxy=s, errortype=RS_ET_SSL))
341 if len(chunk) == 0:
342 raise(RocksockException(RS_E_REMOTE_DISCONNECTED, failedproxy=self._failed_proxy(pnum)))
343 data += chunk
344 if count == -1: break
345 else: count -= len(chunk)
346 return data
348 def recvline(self):
349 s = ''
350 c = '\0'
351 while c != '\n':
352 c = self.recv(1)
353 if c == '': return s
354 s += c
355 return s
357 def recvuntil(self, until):
358 s = self.recv(len(until))
359 endc = until[-1:]
360 while not (s[-1:] == endc and s.endswith(until)):
361 s += self.recv(1)
362 return s
364 def _ip_to_int(self, ip):
365 a,b,c,d = ip.split('.')
366 h = "0x%.2X%.2X%.2X%.2X"%(int(a),int(b),int(c),int(d))
367 return int(h, 16)
369 def _ip_to_bytes(self, ip):
370 ip = self._ip_to_int(ip)
371 a = (ip & 0xff000000) >> 24
372 b = (ip & 0x00ff0000) >> 16
373 c = (ip & 0x0000ff00) >> 8
374 d = (ip & 0x000000ff) >> 0
375 return chr(a) + chr(b) + chr(c) + chr(d)
377 def _setup_socks4_header(self, v4a, dest):
378 buf = '\x04\x01'
379 buf += chr(dest.hostinfo.port / 256)
380 buf += chr(dest.hostinfo.port % 256)
381 if v4a:
382 buf += '\0\0\0\x01'
383 else:
384 af, sa = resolve(dest.hostinfo, True)
385 if af != socket.AF_INET: raise(RocksockException(RS_E_SOCKS4_NO_IP6, failedproxy=-1))
386 buf += self._ip_to_bytes(sa[0])
387 buf += '\0'
388 if v4a: buf += dest.hostinfo.host + '\0'
389 return buf
391 def _connect_socks4(self, header, pnum):
392 self.send(header)
393 res = self.recv(8, pnum=pnum)
394 if len(res) < 8 or ord(res[0]) != 0:
395 raise(RocksockException(RS_E_PROXY_UNEXPECTED_RESPONSE, failedproxy=self._failed_proxy(pnum)))
396 ch = ord(res[1])
397 if ch == 0x5a:
398 pass
399 elif ch == 0x5b:
400 raise(RocksockException(RS_E_TARGETPROXY_CONNECT_FAILED, failedproxy=self._failed_proxy(pnum)))
401 elif ch == 0x5c or ch == 0x5d:
402 return RocksockException(RS_E_PROXY_AUTH_FAILED, failedproxy=self._failed_proxy(pnum))
403 else:
404 raise(RocksockException(RS_E_PROXY_UNEXPECTED_RESPONSE, failedproxy=self._failed_proxy(pnum)))
406 def _setup_socks5_header(self, proxy):
407 buf = '\x05'
408 if proxy.username and proxy.password:
409 buf += '\x02\x00\x02'
410 else:
411 buf += '\x01\x00'
412 return buf
414 def _connect_socks5(self, header, pnum):
415 self.send(header)
416 res = self.recv(2, pnum=pnum)
417 if len(res) != 2 or res[0] != '\x05':
418 raise(RocksockException(RS_E_PROXY_UNEXPECTED_RESPONSE, failedproxy=self._failed_proxy(pnum)))
419 if res[1] == '\xff':
420 raise(RocksockException(RS_E_PROXY_AUTH_FAILED, failedproxy=self._failed_proxy(pnum)))
422 if ord(res[1]) == 2:
423 px = self.proxychain[pnum-1]
424 if px.username and px.password:
425 pkt = '\x01%c%s%c%s'%(len(px.username),px.username,len(px.password),px.password)
426 self.send(pkt)
427 res = self.recv(2, pnum=pnum)
428 if len(res) < 2 or res[1] != '\0':
429 raise(RocksockException(RS_E_PROXY_AUTH_FAILED, failedproxy=self._failed_proxy(pnum)))
430 else: raise(RocksockException(RS_E_PROXY_AUTH_FAILED, failedproxy=self._failed_proxy(pnum)))
431 dst = self.proxychain[pnum]
432 numeric = isnumericipv4(dst.hostinfo.host)
433 if numeric:
434 dstaddr = self._ip_to_bytes(dst.hostinfo.host)
435 else:
436 dstaddr = chr(len(dst.hostinfo.host)) + dst.hostinfo.host
438 pkt = '\x05\x01\x00%c%s%c%c'% (1 if numeric else 3, dstaddr, dst.hostinfo.port / 256, dst.hostinfo.port % 256)
439 self.send(pkt)
440 res = self.recv(pnum=pnum)
441 if len(res) < 2 or res[0] != '\x05':
442 raise(RocksockException(RS_E_PROXY_UNEXPECTED_RESPONSE, failedproxy=self._failed_proxy(pnum)))
443 ch = ord(res[1])
444 if ch == 0: pass
445 elif ch == 1: raise(RocksockException(RS_E_PROXY_GENERAL_FAILURE, failedproxy=self._failed_proxy(pnum)))
446 elif ch == 2: raise(RocksockException(RS_E_PROXY_AUTH_FAILED, failedproxy=self._failed_proxy(pnum)))
447 elif ch == 3: raise(RocksockException(RS_E_TARGETPROXY_NET_UNREACHABLE, failedproxy=self._failed_proxy(pnum)))
448 elif ch == 4: raise(RocksockException(RS_E_TARGETPROXY_HOST_UNREACHABLE, failedproxy=self._failed_proxy(pnum)))
449 elif ch == 5: raise(RocksockException(RS_E_TARGETPROXY_CONN_REFUSED, failedproxy=self._failed_proxy(pnum)))
450 elif ch == 6: raise(RocksockException(RS_E_TARGETPROXY_TTL_EXPIRED, failedproxy=self._failed_proxy(pnum)))
451 elif ch == 7: raise(RocksockException(RS_E_PROXY_COMMAND_NOT_SUPPORTED, failedproxy=self._failed_proxy(pnum)))
452 elif ch == 8: raise(RocksockException(RS_E_PROXY_ADDRESSTYPE_NOT_SUPPORTED, failedproxy=self._failed_proxy(pnum)))
453 else: raise(RocksockException(RS_E_PROXY_UNEXPECTED_RESPONSE, failedproxy=self._failed_proxy(pnum)))
456 def _connect_step(self, pnum):
457 prev = self.proxychain[pnum -1]
458 curr = self.proxychain[pnum]
459 if prev.type == RS_PT_SOCKS4:
460 s4a = self._setup_socks4_header(True, curr)
461 try:
462 self._connect_socks4(s4a, pnum)
463 except RocksockException as e:
464 if e.get_error() == RS_E_TARGETPROXY_CONNECT_FAILED:
465 s4 = self._setup_socks4_header(False, curr)
466 self._connect_socks4(s4a, pnum)
467 else: raise(e)
468 elif prev.type == RS_PT_SOCKS5:
469 s5 = self._setup_socks5_header(prev)
470 self._connect_socks5(s5, pnum)
471 elif prev.type == RS_PT_HTTP:
472 dest = self.proxychain[pnum]
473 self.send("CONNECT %s:%d HTTP/1.1\r\n\r\n"%(dest.hostinfo.host, dest.hostinfo.port))
474 resp = self.recv(pnum=pnum)
475 if len(resp) <12:
476 raise(RocksockException(RS_E_PROXY_UNEXPECTED_RESPONSE, failedproxy=self._failed_proxy(pnum)))
477 if resp[9] != '2':
478 raise(RocksockException(RS_E_TARGETPROXY_CONNECT_FAILED, failedproxy=self._failed_proxy(pnum)))
481 if __name__ == '__main__':
482 proxies = [
483 # RocksockProxyFromURL("socks5://foo:bar@localhost:1080"),
484 # RocksockProxyFromURL("socks5://10.0.0.3:1080"),
485 RocksockProxyFromURL("socks5://127.0.0.1:31339"),
487 proxies = None
488 #rs = Rocksock(host='googleff242342423f.com', port=443, ssl=True, proxies=proxies)
489 rs = Rocksock(host='google.com', port=80, ssl=False, proxies=proxies)
490 try:
491 rs.connect()
492 except RocksockException as e:
493 print(e.get_errormessage())
494 e.reraise()
495 rs.send('GET / HTTP/1.0\r\n\r\n')
496 print(rs.recvline())
497 rs.disconnect()
498 rs.connect()
499 rs.send('GET / HTTP/1.0\r\n\r\n')
500 print(rs.recvline())