1 from django
.shortcuts
import render
2 from django
.http
import HttpResponseRedirect
3 from django
.views
.decorators
.csrf
import csrf_exempt
4 from django
.conf
import settings
6 from pgweb
.util
.decorators
import cache
12 from pgweb
.lists
.models
import MailingList
14 # Conditionally import memcached library. Everything will work without
15 # it, so we allow development installs to run without it...
23 def generate_pagelinks(pagenum
, totalpages
, querystring
):
24 # Generate a list of links to page through a search result
25 # We generate these in HTML from the python code because it's
26 # simply too ugly to try to do it in the template.
32 yield '<a href="%s&p=%s">Prev</a>' % (querystring
, pagenum
- 1)
39 for i
in range(start
, min(start
+ 20, totalpages
+ 1)):
43 yield '<a href="%s&p=%s">%s</a>' % (querystring
, i
, i
)
45 if pagenum
!= min(start
+ 20, totalpages
):
46 yield '<a href="%s&p=%s">Next</a>' % (querystring
, pagenum
+ 1)
52 # Perform a general web search
53 # Since this lives in a different database, we open a direct
54 # connection with psycopg, thus bypassing everything that has to do
57 # constants that we might eventually want to make configurable
60 if request
.GET
.get('m', '') == '1':
63 if request
.GET
.get('l', '') != '':
65 listid
= int(request
.GET
['l'])
69 # Listid not specified. But do we have the name?
70 if 'ln' in request
.GET
:
72 ll
= MailingList
.objects
.get(listname
=request
.GET
['ln'])
74 except MailingList
.DoesNotExist
:
75 # Invalid list name just resets the default of the form,
76 # no need to throw an error.
81 if 'd' in request
.GET
:
83 dateval
= int(request
.GET
['d'])
89 if 's' in request
.GET
:
90 listsort
= request
.GET
['s']
91 if listsort
not in ('r', 'd', 'i'):
100 {'val': 'r', 'text': 'Rank', 'selected': request
.GET
.get('s', '') not in ('d', 'i')},
101 {'val': 'd', 'text': 'Date', 'selected': request
.GET
.get('s', '') == 'd'},
102 {'val': 'i', 'text': 'Reverse date', 'selected': request
.GET
.get('s', '') == 'i'},
105 {'val': -1, 'text': 'anytime'},
106 {'val': 1, 'text': 'within last day'},
107 {'val': 7, 'text': 'within last week'},
108 {'val': 31, 'text': 'within last month'},
109 {'val': 186, 'text': 'within last 6 months'},
110 {'val': 365, 'text': 'within last year'},
114 suburl
= request
.GET
.get('u', None)
115 allsites
= request
.GET
.get('a', None) == "1"
117 # Check that we actually have something to search for
118 if request
.GET
.get('q', '') == '':
120 return render(request
, 'search/listsearch.html', {
121 'search_error': "No search term specified.",
122 'sortoptions': sortoptions
,
123 'lists': MailingList
.objects
.all().order_by("group__sortkey"),
125 'dates': dateoptions
,
129 return render(request
, 'search/sitesearch.html', {
130 'search_error': "No search term specified.",
132 query
= request
.GET
['q'].strip()
134 return render(request
, 'search/sitesearch.html', {
135 'search_error': "Invalid character in search.",
138 # Anti-stefan prevention
139 if len(query
) > 1000:
140 return render(request
, 'search/sitesearch.html', {
141 'search_error': "Search term too long.",
144 # Is the request being paged?
146 pagenum
= int(request
.GET
.get('p', 1))
150 firsthit
= (pagenum
- 1) * hitsperpage
+ 1
153 # Lists are searched by passing the work down using a http
154 # API. In the future, we probably want to do everything
155 # through a http API and merge hits, but that's for later
157 'q': query
.encode('utf-8'),
162 # This is a list group, we expand that on the web server
163 p
['ln'] = ','.join([x
.listname
for x
in MailingList
.objects
.filter(group
=-listid
)])
165 p
['ln'] = MailingList
.objects
.get(pk
=listid
).listname
168 urlstr
= urllib
.parse
.urlencode(p
)
169 # If memcached is available, let's try it
172 memc
= pylibmc
.Client(['127.0.0.1', ], binary
=True)
173 # behavior not supported on pylibmc in squeeze:: behaviors={'tcp_nodelay':True})
175 hits
= memc
.get(urlstr
)
177 # If we had an exception, don't try to store either
180 # No hits found - so try to get them from the search server
183 "{}://{}/archives-search/".format(settings
.ARCHIVES_SEARCH_PLAINTEXT
and 'http' or 'https', settings
.ARCHIVES_SEARCH_SERVER
),
186 'Content-type': 'application/x-www-form-urlencoded; charset=utf-8',
190 except requests
.exceptions
.Timeout
:
191 return render(request
, 'search/listsearch.html', {
192 'search_error': 'Timeout when talking to search server. Please try your search again later, or with a more restrictive search terms.',
195 return render(request
, 'search/listsearch.html', {
196 'search_error': 'General error when talking to search server.',
198 if r
.status_code
!= 200:
200 return render(request
, 'search/listsearch.html', {
201 'search_error': 'Error talking to search server: %s' % r
.reason
,
204 if has_memcached
and memc
:
205 # Store them in memcached too! But only for 10 minutes...
206 # And always compress it, just because we can
207 memc
.set(urlstr
, hits
, 60 * 10, 1)
210 if isinstance(hits
, dict):
211 # This is not just a list of hits.
212 # Right now the only supported dict result is a messageid
213 # match, but make sure that's what it is.
214 if hits
['messageidmatch'] == 1:
215 return HttpResponseRedirect("/message-id/%s" % query
)
217 totalhits
= len(hits
)
218 querystr
= "?m=1&q=%s&l=%s&d=%s&s=%s" % (
219 urllib
.parse
.quote_plus(query
.encode('utf-8')),
225 return render(request
, 'search/listsearch.html', {
226 'hitcount': totalhits
,
227 'firsthit': firsthit
,
228 'lasthit': min(totalhits
, firsthit
+ hitsperpage
- 1),
229 'query': request
.GET
['q'],
230 'pagelinks': " ".join(
231 generate_pagelinks(pagenum
,
232 totalhits
// hitsperpage
+ 1,
241 } for h
in hits
[firsthit
- 1:firsthit
+ hitsperpage
- 1]],
242 'sortoptions': sortoptions
,
243 'lists': MailingList
.objects
.all().order_by("group__sortkey"),
245 'dates': dateoptions
,
250 # Website search is still done by making a regular pgsql connection
251 # to the search server.
253 conn
= psycopg2
.connect(settings
.SEARCH_DSN
)
256 return render(request
, 'search/sitesearch.html', {
257 'search_error': 'Could not connect to search database.'
260 # This is kind of a hack, but... Some URLs are flagged as internal
261 # and should as such only be included in searches that explicitly
262 # reference the suburl that they are in.
263 if suburl
and suburl
.startswith('/docs/devel'):
264 include_internal
= True
266 include_internal
= False
268 # perform the query for general web search
270 curs
.execute("SELECT * FROM site_search(%(query)s, %(firsthit)s, %(hitsperpage)s, %(allsites)s, %(suburl)s, %(internal)s)", {
272 'firsthit': firsthit
- 1,
273 'hitsperpage': hitsperpage
,
274 'allsites': allsites
,
276 'internal': include_internal
,
278 except psycopg2
.ProgrammingError
:
279 return render(request
, 'search/sitesearch.html', {
280 'search_error': 'Error executing search query.'
283 hits
= curs
.fetchall()
285 totalhits
= int(hits
[-1][5])
288 quoted_suburl
= urllib
.parse
.quote_plus(suburl
)
293 querystr
= "?q=%s&a=%s&u=%s" % (
294 urllib
.parse
.quote_plus(query
.encode('utf-8')),
295 allsites
and "1" or "0",
299 return render(request
, 'search/sitesearch.html', {
301 'allsites': allsites
,
302 'hitcount': totalhits
,
303 'firsthit': firsthit
,
304 'lasthit': min(totalhits
, firsthit
+ hitsperpage
- 1),
305 'query': request
.GET
['q'],
306 'pagelinks': " ".join(
307 generate_pagelinks(pagenum
,
308 totalhits
// hitsperpage
+ 1,
312 'url': "%s%s" % (h
[1], h
[2]),
313 'abstract': h
[4].replace("[[[[[[", "<strong>").replace("]]]]]]", "</strong>"),
314 'rank': h
[5]} for h
in hits
[:-1]],