From 95df1aa8886695cf55b2cdf662abba04f6f245ac Mon Sep 17 00:00:00 2001
From: Magnus Hagander <magnus@hagander.net>
Date: Mon, 21 Jan 2019 12:21:46 +0100
Subject: [PATCH] Switch recaptcha, search, spiders and varnish to requests

Easiser to switch to the requests package than to figure out how to deal
with some of the encoding changes manually. And as a bonus, it's much
nicer to work with the requests package for the future.
---
 pgweb/account/recaptcha.py          | 34 +++++++++++-----------------------
 pgweb/search/views.py               | 32 +++++++++++++++++---------------
 tools/ftp/spider_ftp.py             | 21 ++++++++++++---------
 tools/ftp/spider_yum.py             | 22 +++++++++++++---------
 tools/varnishqueue/varnish_queue.py | 13 ++++++-------
 5 files changed, 59 insertions(+), 63 deletions(-)

diff --git a/pgweb/account/recaptcha.py b/pgweb/account/recaptcha.py
index a3688c56..1a10f606 100644
--- a/pgweb/account/recaptcha.py
+++ b/pgweb/account/recaptcha.py
@@ -7,9 +7,7 @@ from django.forms import ValidationError
 from django.utils.safestring import mark_safe
 from django.conf import settings
 
-import httplib
-import urllib
-import json
+import requests
 
 import logging
 log = logging.getLogger(__name__)
@@ -45,38 +43,28 @@ class ReCaptchaField(forms.CharField):
         super(ReCaptchaField, self).clean(value)
 
         # Validate the recaptcha
-        c = httplib.HTTPSConnection('www.google.com', strict=True, timeout=5)
         param = {
             'secret': settings.RECAPTCHA_SECRET_KEY,
             'response': value,
         }
-
-        # Temporarily don't include remoteip, because it only shows our ssl terminating
-        # frontends.
-#        if self.remoteip:
-#            param['remoteip'] = self.remoteip
-
         try:
-            c.request('POST', '/recaptcha/api/siteverify', urllib.urlencode(param), {
-                'Content-type': 'application/x-www-form-urlencoded',
-            })
-            c.sock.settimeout(10)
-        except Exception as e:
-            # Error to connect at TCP level
+            r = requests.post(
+                "https://www.google.com/recaptcha/api/siteverify", param,
+                headers={
+                    'Content-type': 'application/x-www-form-urlencoded',
+                },
+                timeout=5,
+            )
+        except requests.exceptions.Timeout:
             log.error('Failed to connect to google recaptcha API: %s' % e)
             raise ValidationError('Failed in API call to google recaptcha')
 
-        try:
-            r = c.getresponse()
-        except:
-            log.error('Failed in API call to google recaptcha')
-            raise ValidationError('Failed in API call to google recaptcha')
-        if r.status != 200:
+        if r.status_code != 200:
             log.error('Invalid response code from google recaptcha')
             raise ValidationError('Invalid response code from google recaptcha')
 
         try:
-            j = json.loads(r.read())
+            j = r.json()
         except:
             log.error('Invalid response structure from google recaptcha')
             raise ValidationError('Invalid response structure from google recaptcha')
diff --git a/pgweb/search/views.py b/pgweb/search/views.py
index c1b0e11c..677f05e2 100644
--- a/pgweb/search/views.py
+++ b/pgweb/search/views.py
@@ -5,12 +5,9 @@ from django.conf import settings
 
 from pgweb.util.decorators import cache
 
-import httplib
-import urllib
+import urllib.parse
+import requests
 import psycopg2
-import json
-import socket
-import ssl
 
 from pgweb.lists.models import MailingList
 
@@ -177,24 +174,29 @@ def search(request):
                 memc = None
         if not hits:
             # No hits found - so try to get them from the search server
-            if settings.ARCHIVES_SEARCH_PLAINTEXT:
-                c = httplib.HTTPConnection(settings.ARCHIVES_SEARCH_SERVER, strict=True, timeout=5)
-            else:
-                c = httplib.HTTPSConnection(settings.ARCHIVES_SEARCH_SERVER, strict=True, timeout=5)
-            c.request('POST', '/archives-search/', urlstr, {'Content-type': 'application/x-www-form-urlencoded; charset=utf-8'})
-            c.sock.settimeout(20)  # Set a 20 second timeout
             try:
-                r = c.getresponse()
-            except (socket.timeout, ssl.SSLError):
+                r = requests.post(
+                    "{}://{}/archives-search/".format(settings.ARCHIVES_SEARCH_PLAINTEXT and 'http' or 'https', settings.ARCHIVES_SEARCH_SERVER),
+                    urlstr,
+                    headers={
+                        'Content-type': 'application/x-www-form-urlencoded; charset=utf-8',
+                    },
+                    timeout=5,
+                )
+            except requests.exceptions.Timeout:
                 return render(request, 'search/listsearch.html', {
                     'search_error': 'Timeout when talking to search server. Please try your search again later, or with a more restrictive search terms.',
                 })
-            if r.status != 200:
+            except:
+                return render(request, 'search/listsearch.html', {
+                    'search_error': 'General error when talking to search server.',
+                })
+            if r.status_code != 200:
                 memc = None
                 return render(request, 'search/listsearch.html', {
                     'search_error': 'Error talking to search server: %s' % r.reason,
                 })
-            hits = json.loads(r.read())
+            hits = r.json()
             if has_memcached and memc:
                 # Store them in memcached too! But only for 10 minutes...
                 # And always compress it, just because we can
diff --git a/tools/ftp/spider_ftp.py b/tools/ftp/spider_ftp.py
index 4582bc2a..68361299 100755
--- a/tools/ftp/spider_ftp.py
+++ b/tools/ftp/spider_ftp.py
@@ -11,7 +11,7 @@ import os
 from datetime import datetime
 import pickle as pickle
 import codecs
-import urllib2
+import requests
 
 # Directories, specified from the root of the ftp tree and down, that
 # will be recursively excluded from the pickle.
@@ -80,14 +80,17 @@ if len(sys.argv) != 3:
 parse_directory(sys.argv[1], len(sys.argv[1]))
 
 if sys.argv[2].startswith("http://") or sys.argv[2].startswith("https://"):
-    o = urllib2.build_opener(urllib2.HTTPHandler)
-    r = urllib2.Request(sys.argv[2], data=pickle.dumps(allnodes))
-    r.add_header('Content-type', 'application/octet-stream')
-    r.add_header('Host', 'www.postgresql.org')
-    r.get_method = lambda: 'PUT'
-    u = o.open(r)
-    x = u.read()
-    if x != "NOT CHANGED" and x != "OK":
+    r = requests.put(
+        sys.argv[2],
+        data=pickle.dumps(allnodes),
+        headers={
+            'Content-type': 'application/octet-stream',
+        },
+    )
+    if r.status_code != 200:
+        print("Failed to upload, code: %s" % r.status_code)
+        sys.exit(1)
+    elif r.text != "NOT CHANGED" and r.text != "OK":
         print("Failed to upload: %s" % x)
         sys.exit(1)
 else:
diff --git a/tools/ftp/spider_yum.py b/tools/ftp/spider_yum.py
index 79ab45bd..912d1d68 100755
--- a/tools/ftp/spider_yum.py
+++ b/tools/ftp/spider_yum.py
@@ -4,7 +4,7 @@ import sys
 import os
 import re
 import json
-import urllib2
+import requests
 from decimal import Decimal
 from tempfile import NamedTemporaryFile
 
@@ -96,14 +96,18 @@ if __name__ == "__main__":
     j = json.dumps({'platforms': platforms, 'reporpms': reporpms})
 
     if args.target.startswith('http://') or args.target.startswith('https://'):
-        o = urllib.request.build_opener(urllib.request.HTTPHandler)
-        r = urllib.request.Request(sys.argv[2], data=j)
-        r.add_header('Content-type', 'application/json')
-        r.add_header('Host', 'www.postgresql.org')
-        r.get_method = lambda: 'PUT'
-        u = o.open(r)
-        x = u.read()
-        if x != "NOT CHANGED" and x != "OK":
+        r = requests.put(
+            args.target,
+            data=j,
+            headers={
+                'Content-type': 'application/json',
+            },
+        )
+        if r.status_code != 200:
+            print("Failed to upload, code: %s" % r.status_code)
+            sys.exit(1)
+
+        if r.text != "NOT CHANGED" and r.text != "OK":
             print("Failed to upload: %s" % x)
             sys.exit(1)
     else:
diff --git a/tools/varnishqueue/varnish_queue.py b/tools/varnishqueue/varnish_queue.py
index 64f55f0f..3f73818b 100755
--- a/tools/varnishqueue/varnish_queue.py
+++ b/tools/varnishqueue/varnish_queue.py
@@ -9,7 +9,7 @@
 import time
 import sys
 import select
-import httplib
+import requests
 import multiprocessing
 import logging
 import psycopg2
@@ -18,13 +18,12 @@ from setproctitle import setproctitle
 
 def do_purge(consumername, headers):
     try:
-        conn = httplib.HTTPSConnection('%s.postgresql.org' % consumername)
-        conn.request("GET", "/varnish-purge-url", '', headers)
-        resp = conn.getresponse()
-        conn.close()
-        if resp.status == 200:
+        r = requests.get("https://{}.postgresql.org/varnish-purge-url".format(consumername),
+                         headers=headers,
+                         timeout=10)
+        if r.status_code == 200:
             return True
-        logging.warning("Varnish purge on %s returned status %s (%s)" % (consumername, resp.status, resp.reason))
+        logging.warning("Varnish purge on %s returned status %s (%s)" % (consumername, r.status_code, r.reason))
         return False
     except Exception as ex:
         logging.error("Exception purging on %s: %s" % (consumername, ex))
-- 
2.11.4.GIT