Quick-fix for NUL in searches
[pgweb/local.git] / pgweb / search / views.py
blob4dc66500844d0955faf5cb46ba24655e776e27e6
1 from django.shortcuts import render
2 from django.http import HttpResponseRedirect
3 from django.views.decorators.csrf import csrf_exempt
4 from django.conf import settings
6 from pgweb.util.decorators import cache
8 import urllib.parse
9 import requests
10 import psycopg2
12 from pgweb.lists.models import MailingList
14 # Conditionally import memcached library. Everything will work without
15 # it, so we allow development installs to run without it...
16 try:
17 import pylibmc
18 has_memcached = True
19 except:
20 has_memcached = False
23 def generate_pagelinks(pagenum, totalpages, querystring):
24 # Generate a list of links to page through a search result
25 # We generate these in HTML from the python code because it's
26 # simply too ugly to try to do it in the template.
27 if totalpages < 2:
28 return
30 if pagenum > 1:
31 # Prev link
32 yield '<a href="%s&p=%s">Prev</a>' % (querystring, pagenum - 1)
34 if pagenum > 10:
35 start = pagenum - 10
36 else:
37 start = 1
39 for i in range(start, min(start + 20, totalpages + 1)):
40 if i == pagenum:
41 yield "%s" % i
42 else:
43 yield '<a href="%s&p=%s">%s</a>' % (querystring, i, i)
45 if pagenum != min(start + 20, totalpages):
46 yield '<a href="%s&p=%s">Next</a>' % (querystring, pagenum + 1)
49 @csrf_exempt
50 @cache(minutes=15)
51 def search(request):
52 # Perform a general web search
53 # Since this lives in a different database, we open a direct
54 # connection with psycopg, thus bypassing everything that has to do
55 # with django.
57 # constants that we might eventually want to make configurable
58 hitsperpage = 20
60 if request.GET.get('m', '') == '1':
61 searchlists = True
63 if request.GET.get('l', '') != '':
64 try:
65 listid = int(request.GET['l'])
66 except:
67 listid = None
68 else:
69 # Listid not specified. But do we have the name?
70 if 'ln' in request.GET:
71 try:
72 ll = MailingList.objects.get(listname=request.GET['ln'])
73 listid = ll.id
74 except MailingList.DoesNotExist:
75 # Invalid list name just resets the default of the form,
76 # no need to throw an error.
77 listid = None
78 else:
79 listid = None
81 if 'd' in request.GET:
82 try:
83 dateval = int(request.GET['d'])
84 except:
85 dateval = None
86 else:
87 dateval = None
89 if 's' in request.GET:
90 listsort = request.GET['s']
91 if listsort not in ('r', 'd', 'i'):
92 listsort = 'r'
93 else:
94 listsort = 'r'
96 if not dateval:
97 dateval = 365
99 sortoptions = (
100 {'val': 'r', 'text': 'Rank', 'selected': request.GET.get('s', '') not in ('d', 'i')},
101 {'val': 'd', 'text': 'Date', 'selected': request.GET.get('s', '') == 'd'},
102 {'val': 'i', 'text': 'Reverse date', 'selected': request.GET.get('s', '') == 'i'},
104 dateoptions = (
105 {'val': -1, 'text': 'anytime'},
106 {'val': 1, 'text': 'within last day'},
107 {'val': 7, 'text': 'within last week'},
108 {'val': 31, 'text': 'within last month'},
109 {'val': 186, 'text': 'within last 6 months'},
110 {'val': 365, 'text': 'within last year'},
112 else:
113 searchlists = False
114 suburl = request.GET.get('u', None)
115 allsites = request.GET.get('a', None) == "1"
117 # Check that we actually have something to search for
118 if request.GET.get('q', '') == '':
119 if searchlists:
120 return render(request, 'search/listsearch.html', {
121 'search_error': "No search term specified.",
122 'sortoptions': sortoptions,
123 'lists': MailingList.objects.all().order_by("group__sortkey"),
124 'listid': listid,
125 'dates': dateoptions,
126 'dateval': dateval,
128 else:
129 return render(request, 'search/sitesearch.html', {
130 'search_error': "No search term specified.",
132 query = request.GET['q'].strip()
133 if '\0' in query:
134 return render(request, 'search/sitesearch.html', {
135 'search_error': "Invalid character in search.",
138 # Anti-stefan prevention
139 if len(query) > 1000:
140 return render(request, 'search/sitesearch.html', {
141 'search_error': "Search term too long.",
144 # Is the request being paged?
145 try:
146 pagenum = int(request.GET.get('p', 1))
147 except:
148 pagenum = 1
150 firsthit = (pagenum - 1) * hitsperpage + 1
152 if searchlists:
153 # Lists are searched by passing the work down using a http
154 # API. In the future, we probably want to do everything
155 # through a http API and merge hits, but that's for later
156 p = {
157 'q': query.encode('utf-8'),
158 's': listsort,
160 if listid:
161 if listid < 0:
162 # This is a list group, we expand that on the web server
163 p['ln'] = ','.join([x.listname for x in MailingList.objects.filter(group=-listid)])
164 else:
165 p['ln'] = MailingList.objects.get(pk=listid).listname
166 if dateval:
167 p['d'] = dateval
168 urlstr = urllib.parse.urlencode(p)
169 # If memcached is available, let's try it
170 hits = None
171 if has_memcached:
172 memc = pylibmc.Client(['127.0.0.1', ], binary=True)
173 # behavior not supported on pylibmc in squeeze:: behaviors={'tcp_nodelay':True})
174 try:
175 hits = memc.get(urlstr)
176 except Exception:
177 # If we had an exception, don't try to store either
178 memc = None
179 if not hits:
180 # No hits found - so try to get them from the search server
181 try:
182 r = requests.post(
183 "{}://{}/archives-search/".format(settings.ARCHIVES_SEARCH_PLAINTEXT and 'http' or 'https', settings.ARCHIVES_SEARCH_SERVER),
184 urlstr,
185 headers={
186 'Content-type': 'application/x-www-form-urlencoded; charset=utf-8',
188 timeout=5,
190 except requests.exceptions.Timeout:
191 return render(request, 'search/listsearch.html', {
192 'search_error': 'Timeout when talking to search server. Please try your search again later, or with a more restrictive search terms.',
194 except:
195 return render(request, 'search/listsearch.html', {
196 'search_error': 'General error when talking to search server.',
198 if r.status_code != 200:
199 memc = None
200 return render(request, 'search/listsearch.html', {
201 'search_error': 'Error talking to search server: %s' % r.reason,
203 hits = r.json()
204 if has_memcached and memc:
205 # Store them in memcached too! But only for 10 minutes...
206 # And always compress it, just because we can
207 memc.set(urlstr, hits, 60 * 10, 1)
208 memc = None
210 if isinstance(hits, dict):
211 # This is not just a list of hits.
212 # Right now the only supported dict result is a messageid
213 # match, but make sure that's what it is.
214 if hits['messageidmatch'] == 1:
215 return HttpResponseRedirect("/message-id/%s" % query)
217 totalhits = len(hits)
218 querystr = "?m=1&q=%s&l=%s&d=%s&s=%s" % (
219 urllib.parse.quote_plus(query.encode('utf-8')),
220 listid or '',
221 dateval,
222 listsort
225 return render(request, 'search/listsearch.html', {
226 'hitcount': totalhits,
227 'firsthit': firsthit,
228 'lasthit': min(totalhits, firsthit + hitsperpage - 1),
229 'query': request.GET['q'],
230 'pagelinks': "&nbsp;".join(
231 generate_pagelinks(pagenum,
232 totalhits // hitsperpage + 1,
233 querystr)),
234 'hits': [{
235 'date': h['d'],
236 'subject': h['s'],
237 'author': h['f'],
238 'messageid': h['m'],
239 'abstract': h['a'],
240 'rank': h['r'],
241 } for h in hits[firsthit - 1:firsthit + hitsperpage - 1]],
242 'sortoptions': sortoptions,
243 'lists': MailingList.objects.all().order_by("group__sortkey"),
244 'listid': listid,
245 'dates': dateoptions,
246 'dateval': dateval,
249 else:
250 # Website search is still done by making a regular pgsql connection
251 # to the search server.
252 try:
253 conn = psycopg2.connect(settings.SEARCH_DSN)
254 curs = conn.cursor()
255 except:
256 return render(request, 'search/sitesearch.html', {
257 'search_error': 'Could not connect to search database.'
260 # This is kind of a hack, but... Some URLs are flagged as internal
261 # and should as such only be included in searches that explicitly
262 # reference the suburl that they are in.
263 if suburl and suburl.startswith('/docs/devel'):
264 include_internal = True
265 else:
266 include_internal = False
268 # perform the query for general web search
269 try:
270 curs.execute("SELECT * FROM site_search(%(query)s, %(firsthit)s, %(hitsperpage)s, %(allsites)s, %(suburl)s, %(internal)s)", {
271 'query': query,
272 'firsthit': firsthit - 1,
273 'hitsperpage': hitsperpage,
274 'allsites': allsites,
275 'suburl': suburl,
276 'internal': include_internal,
278 except psycopg2.ProgrammingError:
279 return render(request, 'search/sitesearch.html', {
280 'search_error': 'Error executing search query.'
283 hits = curs.fetchall()
284 conn.close()
285 totalhits = int(hits[-1][5])
286 try:
287 if suburl:
288 quoted_suburl = urllib.parse.quote_plus(suburl)
289 else:
290 quoted_suburl = ''
291 except:
292 quoted_suburl = ''
293 querystr = "?q=%s&a=%s&u=%s" % (
294 urllib.parse.quote_plus(query.encode('utf-8')),
295 allsites and "1" or "0",
296 quoted_suburl,
299 return render(request, 'search/sitesearch.html', {
300 'suburl': suburl,
301 'allsites': allsites,
302 'hitcount': totalhits,
303 'firsthit': firsthit,
304 'lasthit': min(totalhits, firsthit + hitsperpage - 1),
305 'query': request.GET['q'],
306 'pagelinks': "&nbsp;".join(
307 generate_pagelinks(pagenum,
308 totalhits // hitsperpage + 1,
309 querystr)),
310 'hits': [{
311 'title': h[3],
312 'url': "%s%s" % (h[1], h[2]),
313 'abstract': h[4].replace("[[[[[[", "<strong>").replace("]]]]]]", "</strong>"),
314 'rank': h[5]} for h in hits[:-1]],