From 95df1aa8886695cf55b2cdf662abba04f6f245ac Mon Sep 17 00:00:00 2001 From: Magnus Hagander Date: Mon, 21 Jan 2019 12:21:46 +0100 Subject: [PATCH] Switch recaptcha, search, spiders and varnish to requests Easiser to switch to the requests package than to figure out how to deal with some of the encoding changes manually. And as a bonus, it's much nicer to work with the requests package for the future. --- pgweb/account/recaptcha.py | 34 +++++++++++----------------------- pgweb/search/views.py | 32 +++++++++++++++++--------------- tools/ftp/spider_ftp.py | 21 ++++++++++++--------- tools/ftp/spider_yum.py | 22 +++++++++++++--------- tools/varnishqueue/varnish_queue.py | 13 ++++++------- 5 files changed, 59 insertions(+), 63 deletions(-) diff --git a/pgweb/account/recaptcha.py b/pgweb/account/recaptcha.py index a3688c56..1a10f606 100644 --- a/pgweb/account/recaptcha.py +++ b/pgweb/account/recaptcha.py @@ -7,9 +7,7 @@ from django.forms import ValidationError from django.utils.safestring import mark_safe from django.conf import settings -import httplib -import urllib -import json +import requests import logging log = logging.getLogger(__name__) @@ -45,38 +43,28 @@ class ReCaptchaField(forms.CharField): super(ReCaptchaField, self).clean(value) # Validate the recaptcha - c = httplib.HTTPSConnection('www.google.com', strict=True, timeout=5) param = { 'secret': settings.RECAPTCHA_SECRET_KEY, 'response': value, } - - # Temporarily don't include remoteip, because it only shows our ssl terminating - # frontends. -# if self.remoteip: -# param['remoteip'] = self.remoteip - try: - c.request('POST', '/recaptcha/api/siteverify', urllib.urlencode(param), { - 'Content-type': 'application/x-www-form-urlencoded', - }) - c.sock.settimeout(10) - except Exception as e: - # Error to connect at TCP level + r = requests.post( + "https://www.google.com/recaptcha/api/siteverify", param, + headers={ + 'Content-type': 'application/x-www-form-urlencoded', + }, + timeout=5, + ) + except requests.exceptions.Timeout: log.error('Failed to connect to google recaptcha API: %s' % e) raise ValidationError('Failed in API call to google recaptcha') - try: - r = c.getresponse() - except: - log.error('Failed in API call to google recaptcha') - raise ValidationError('Failed in API call to google recaptcha') - if r.status != 200: + if r.status_code != 200: log.error('Invalid response code from google recaptcha') raise ValidationError('Invalid response code from google recaptcha') try: - j = json.loads(r.read()) + j = r.json() except: log.error('Invalid response structure from google recaptcha') raise ValidationError('Invalid response structure from google recaptcha') diff --git a/pgweb/search/views.py b/pgweb/search/views.py index c1b0e11c..677f05e2 100644 --- a/pgweb/search/views.py +++ b/pgweb/search/views.py @@ -5,12 +5,9 @@ from django.conf import settings from pgweb.util.decorators import cache -import httplib -import urllib +import urllib.parse +import requests import psycopg2 -import json -import socket -import ssl from pgweb.lists.models import MailingList @@ -177,24 +174,29 @@ def search(request): memc = None if not hits: # No hits found - so try to get them from the search server - if settings.ARCHIVES_SEARCH_PLAINTEXT: - c = httplib.HTTPConnection(settings.ARCHIVES_SEARCH_SERVER, strict=True, timeout=5) - else: - c = httplib.HTTPSConnection(settings.ARCHIVES_SEARCH_SERVER, strict=True, timeout=5) - c.request('POST', '/archives-search/', urlstr, {'Content-type': 'application/x-www-form-urlencoded; charset=utf-8'}) - c.sock.settimeout(20) # Set a 20 second timeout try: - r = c.getresponse() - except (socket.timeout, ssl.SSLError): + r = requests.post( + "{}://{}/archives-search/".format(settings.ARCHIVES_SEARCH_PLAINTEXT and 'http' or 'https', settings.ARCHIVES_SEARCH_SERVER), + urlstr, + headers={ + 'Content-type': 'application/x-www-form-urlencoded; charset=utf-8', + }, + timeout=5, + ) + except requests.exceptions.Timeout: return render(request, 'search/listsearch.html', { 'search_error': 'Timeout when talking to search server. Please try your search again later, or with a more restrictive search terms.', }) - if r.status != 200: + except: + return render(request, 'search/listsearch.html', { + 'search_error': 'General error when talking to search server.', + }) + if r.status_code != 200: memc = None return render(request, 'search/listsearch.html', { 'search_error': 'Error talking to search server: %s' % r.reason, }) - hits = json.loads(r.read()) + hits = r.json() if has_memcached and memc: # Store them in memcached too! But only for 10 minutes... # And always compress it, just because we can diff --git a/tools/ftp/spider_ftp.py b/tools/ftp/spider_ftp.py index 4582bc2a..68361299 100755 --- a/tools/ftp/spider_ftp.py +++ b/tools/ftp/spider_ftp.py @@ -11,7 +11,7 @@ import os from datetime import datetime import pickle as pickle import codecs -import urllib2 +import requests # Directories, specified from the root of the ftp tree and down, that # will be recursively excluded from the pickle. @@ -80,14 +80,17 @@ if len(sys.argv) != 3: parse_directory(sys.argv[1], len(sys.argv[1])) if sys.argv[2].startswith("http://") or sys.argv[2].startswith("https://"): - o = urllib2.build_opener(urllib2.HTTPHandler) - r = urllib2.Request(sys.argv[2], data=pickle.dumps(allnodes)) - r.add_header('Content-type', 'application/octet-stream') - r.add_header('Host', 'www.postgresql.org') - r.get_method = lambda: 'PUT' - u = o.open(r) - x = u.read() - if x != "NOT CHANGED" and x != "OK": + r = requests.put( + sys.argv[2], + data=pickle.dumps(allnodes), + headers={ + 'Content-type': 'application/octet-stream', + }, + ) + if r.status_code != 200: + print("Failed to upload, code: %s" % r.status_code) + sys.exit(1) + elif r.text != "NOT CHANGED" and r.text != "OK": print("Failed to upload: %s" % x) sys.exit(1) else: diff --git a/tools/ftp/spider_yum.py b/tools/ftp/spider_yum.py index 79ab45bd..912d1d68 100755 --- a/tools/ftp/spider_yum.py +++ b/tools/ftp/spider_yum.py @@ -4,7 +4,7 @@ import sys import os import re import json -import urllib2 +import requests from decimal import Decimal from tempfile import NamedTemporaryFile @@ -96,14 +96,18 @@ if __name__ == "__main__": j = json.dumps({'platforms': platforms, 'reporpms': reporpms}) if args.target.startswith('http://') or args.target.startswith('https://'): - o = urllib.request.build_opener(urllib.request.HTTPHandler) - r = urllib.request.Request(sys.argv[2], data=j) - r.add_header('Content-type', 'application/json') - r.add_header('Host', 'www.postgresql.org') - r.get_method = lambda: 'PUT' - u = o.open(r) - x = u.read() - if x != "NOT CHANGED" and x != "OK": + r = requests.put( + args.target, + data=j, + headers={ + 'Content-type': 'application/json', + }, + ) + if r.status_code != 200: + print("Failed to upload, code: %s" % r.status_code) + sys.exit(1) + + if r.text != "NOT CHANGED" and r.text != "OK": print("Failed to upload: %s" % x) sys.exit(1) else: diff --git a/tools/varnishqueue/varnish_queue.py b/tools/varnishqueue/varnish_queue.py index 64f55f0f..3f73818b 100755 --- a/tools/varnishqueue/varnish_queue.py +++ b/tools/varnishqueue/varnish_queue.py @@ -9,7 +9,7 @@ import time import sys import select -import httplib +import requests import multiprocessing import logging import psycopg2 @@ -18,13 +18,12 @@ from setproctitle import setproctitle def do_purge(consumername, headers): try: - conn = httplib.HTTPSConnection('%s.postgresql.org' % consumername) - conn.request("GET", "/varnish-purge-url", '', headers) - resp = conn.getresponse() - conn.close() - if resp.status == 200: + r = requests.get("https://{}.postgresql.org/varnish-purge-url".format(consumername), + headers=headers, + timeout=10) + if r.status_code == 200: return True - logging.warning("Varnish purge on %s returned status %s (%s)" % (consumername, resp.status, resp.reason)) + logging.warning("Varnish purge on %s returned status %s (%s)" % (consumername, r.status_code, r.reason)) return False except Exception as ex: logging.error("Exception purging on %s: %s" % (consumername, ex)) -- 2.11.4.GIT