From 6cc312bddb2f6b8babb1abcb7e4edfd98417ddd0 Mon Sep 17 00:00:00 2001
From: abki
Date: Fri, 3 Jul 2009 14:55:17 +0200
Subject: [PATCH] mysql FTS updated templates sitemap
---
commands/.gitignore | 1 +
commands/parse.py | 18 ++++---
models.py | 106 +++++++++++++++++++++++++++++++++++++--
templates/ebuildfind/base.html | 12 +++--
templates/ebuildfind/search.html | 21 ++++----
urls.py | 39 ++++++++------
views.py | 20 ++++----
7 files changed, 166 insertions(+), 51 deletions(-)
create mode 100644 commands/.gitignore
rewrite urls.py (65%)
diff --git a/commands/.gitignore b/commands/.gitignore
new file mode 100644
index 0000000..032c0df
--- /dev/null
+++ b/commands/.gitignore
@@ -0,0 +1 @@
+overlays.xml
\ No newline at end of file
diff --git a/commands/parse.py b/commands/parse.py
index 3b346a5..4a61dc1 100644
--- a/commands/parse.py
+++ b/commands/parse.py
@@ -10,7 +10,6 @@ from layman.debug import Message
from ebuilds.ebuildfind.models import Ebuild, Overlay
-from whoosh_manager import WhooshEbuildManager
from manage_layman import LaymanManager
OVERLAYS_BASE = settings.ROOT_PATH + "ebuilds/" + "ebuildfind/commands/var/overlays/"
@@ -67,14 +66,14 @@ def ParseEbuilds():
i = 0
Ebuild.objects.all().delete()
overlays = os.listdir(OVERLAYS_BASE)
-
- whoosh = WhooshEbuildManager(True)
-
+
for overlay in overlays:
path_overlay = os.path.join(OVERLAYS_BASE, overlay)
if exclude_directory(path_overlay, overlay):
overlay_name = overlay
+ print "is present", overlay_name
+
overlay = Overlay.objects.get(name=overlay)
categories = os.listdir(path_overlay)
@@ -110,17 +109,18 @@ def ParseEbuilds():
ebuild.overlay = overlay
ebuild.save()
- whoosh.Update(ebuild)
+ ebuild.index()
def ParseOverlays():
h = LaymanManager()
overlays = h.List()
-
- for name, overlay in overlays.items() :
+
+ for name, overlay in overlays.items():
""" check if new overlay is ready """
o = Overlay.objects.all().filter(name=name)
-
+ print "add ? > ", name
if not o:
+ print "added !", name
o = Overlay()
o.name = name
o.description = overlay["description"]
@@ -128,7 +128,9 @@ def ParseOverlays():
o.save()
def main():
+ print "# >>> Parse Overlays"
ParseOverlays()
+ print "# >>> Parse Ebuilds"
ParseEbuilds()
if __name__ == "__main__":
diff --git a/models.py b/models.py
index 2e6741e..5f1ee15 100644
--- a/models.py
+++ b/models.py
@@ -1,19 +1,112 @@
+import re, string
+
from django.db import models
+from django.contrib.contenttypes.models import ContentType
+from django.contrib.contenttypes import generic
+
+PUNCTUATION_REGEX = re.compile('[' + re.escape(string.punctuation) + ']')
+FULL_TEXT_MIN_LENGTH = 3
+
+FULL_TEXT_STOP_WORDS = frozenset([
+ 'a', 'about', 'according', 'accordingly', 'affected', 'affecting', 'after',
+ 'again', 'against', 'all', 'almost', 'already', 'also', 'although',
+ 'always', 'am', 'among', 'an', 'and', 'any', 'anyone', 'apparently', 'are',
+ 'arise', 'as', 'aside', 'at', 'away', 'be', 'became', 'because', 'become',
+ 'becomes', 'been', 'before', 'being', 'between', 'both', 'briefly', 'but',
+ 'by', 'came', 'can', 'cannot', 'certain', 'certainly', 'could', 'did', 'do',
+ 'does', 'done', 'during', 'each', 'either', 'else', 'etc', 'ever', 'every',
+ 'following', 'for', 'found', 'from', 'further', 'gave', 'gets', 'give',
+ 'given', 'giving', 'gone', 'got', 'had', 'hardly', 'has', 'have', 'having',
+ 'here', 'how', 'however', 'i', 'if', 'in', 'into', 'is', 'it', 'itself',
+ 'just', 'keep', 'kept', 'knowledge', 'largely', 'like', 'made', 'mainly',
+ 'make', 'many', 'might', 'more', 'most', 'mostly', 'much', 'must', 'nearly',
+ 'necessarily', 'neither', 'next', 'no', 'none', 'nor', 'normally', 'not',
+ 'noted', 'now', 'obtain', 'obtained', 'of', 'often', 'on', 'only', 'or',
+ 'other', 'our', 'out', 'owing', 'particularly', 'past', 'perhaps', 'please',
+ 'poorly', 'possible', 'possibly', 'potentially', 'predominantly', 'present',
+ 'previously', 'primarily', 'probably', 'prompt', 'promptly', 'put',
+ 'quickly', 'quite', 'rather', 'readily', 'really', 'recently', 'regarding',
+ 'regardless', 'relatively', 'respectively', 'resulted', 'resulting',
+ 'results', 'said', 'same', 'seem', 'seen', 'several', 'shall', 'should',
+ 'show', 'showed', 'shown', 'shows', 'significantly', 'similar', 'similarly',
+ 'since', 'slightly', 'so', 'some', 'sometime', 'somewhat', 'soon',
+ 'specifically', 'state', 'states', 'strongly', 'substantially',
+ 'successfully', 'such', 'sufficiently', 'than', 'that', 'the', 'their',
+ 'theirs', 'them', 'then', 'there', 'therefore', 'these', 'they', 'this',
+ 'those', 'though', 'through', 'throughout', 'to', 'too', 'toward', 'under',
+ 'unless', 'until', 'up', 'upon', 'use', 'used', 'usefully', 'usefulness',
+ 'using', 'usually', 'various', 'very', 'was', 'we', 'were', 'what', 'when',
+ 'where', 'whether', 'which', 'while', 'who', 'whose', 'why', 'widely',
+ 'will', 'with', 'within', 'without', 'would', 'yet', 'you'])
+
+class Index(models.Model):
+ content_type = models.ForeignKey(ContentType)
+ object_id = models.PositiveIntegerField()
+ content = models.TextField()
+
+ content_object = generic.GenericForeignKey()
+
+ @staticmethod
+ def full_text_index(text):
+ if text:
+ text = PUNCTUATION_REGEX.sub(' ', text)
+ words = text.lower().split()
+ words = set(words)
+ words -= FULL_TEXT_STOP_WORDS
+
+ for word in list(words):
+ if len(word) < FULL_TEXT_MIN_LENGTH:
+ words.remove(word)
+ else:
+ words = set()
+ return words
+
+ @staticmethod
+ def index(obj):
+ keywords = set()
+
+ properties = obj._meta.fields
+ for property in properties:
+ isurlfield = not isinstance(property, models.URLField)
+ istext = isinstance(property, models.CharField) or isinstance(property, models.TextField)
+ if istext and isurlfield:
+ text = property.value_from_object(obj)
+ mykeywords = Index.full_text_index(text)
+ keywords = keywords.union(mykeywords)
+
+ text = " ".join(keywords)
+ # Create or Update
+ ctype = ContentType.objects.get_for_model(obj)
+ try:
+ index = Index.objects.get(content_type__pk=ctype.id, object_id=obj.id)
+ index.content = text
+ index.save()
+ except:
+ index = Index(content_object=obj, content=text)
+ index.save()
+ return index
+
+class Searchable(object):
+ @staticmethod
+ def search(cls, query):
+ ctype = ContentType.objects.get_for_model(cls)
+ return Index.objects.filter(content__search=query).filter(content_type=ctype)
+
+ def index(self):
+ return Index.index(self)
class Overlay(models.Model):
name = models.CharField(max_length=255)
description = models.TextField()
link = models.URLField()
-
- def __repr__(self):
+
+ def __unicode__(self):
return self.name
- def get_absolute_url(self):
- return "/search/?q=%s" % self.name
-class Ebuild(models.Model):
+class Ebuild(Searchable, models.Model):
name = models.CharField(max_length=255)
category = models.CharField(max_length=255)
version = models.CharField(max_length=255)
@@ -29,3 +122,6 @@ class Ebuild(models.Model):
def get_absolute_url(self):
return "/search/?q=%s" % self.name
+
+ def __unicode__(self):
+ return self.name
diff --git a/templates/ebuildfind/base.html b/templates/ebuildfind/base.html
index 4ef4227..b20327b 100644
--- a/templates/ebuildfind/base.html
+++ b/templates/ebuildfind/base.html
@@ -27,11 +27,15 @@
{% block help %}
- This is a search engine for ebuilds, it looks for the query in the overlay name,
- category, description, application name and the exact version name. Give it a try now and here
+ This is a search engine for ebuilds, it looks for the query in the
+ category, description, application name. Give it a
+ try now
+ and here
+ 3 July 09': The search engine is now supported by
+ Mysql FTS in boolean mode, try this out .
28 June 09': Some ebuilds and overlays are missing please
- notify me bugs at google
- code project page
+ notify me bugs
+ at my email adress
{% endblock %}
diff --git a/templates/ebuildfind/search.html b/templates/ebuildfind/search.html
index 9ecf917..45c3dd4 100644
--- a/templates/ebuildfind/search.html
+++ b/templates/ebuildfind/search.html
@@ -11,21 +11,24 @@
{% endif %}
- {% for ebuild in results %}
+ {% for result in results %}
- {{ebuild.path}}
- homepage
+ {{result.content_object.path}}
+ homepage
- {% if ebuild.keywords %}
- {{ebuild.keywords}}
+ overlay
+ homepage
+
+ {% if result.content_object.keywords %}
+ {{result.content_object.keywords}}
{% endif %}
- {% if ebuild.iuse %}
- {{ebuild.iuse}}
+ {% if result.content_object.iuse %}
+ {{result.content_object.iuse}}
{% endif %}
- {{ebuild.license}}
- {{ebuild.description}}
+ {{result.content_object.license}}
+ {{result.content_object.description}}
diff --git a/urls.py b/urls.py
dissimilarity index 65%
index 41d3376..e2db4a3 100644
--- a/urls.py
+++ b/urls.py
@@ -1,15 +1,24 @@
-from django.conf.urls.defaults import *
-import views
-
-urlpatterns = patterns('',
- # Example:
-
-
- # Uncomment the admin/doc line below and add 'django.contrib.admindocs'
- # to INSTALLED_APPS to enable admin documentation:
- # (r'^admin/doc/', include('django.contrib.admindocs.urls')),
-
- # Uncomment the next line to enable the admin:
- (r'^search/', views.search),
- (r'^$', views.index),
-)
+from django.conf.urls.defaults import *
+import views
+from django.contrib.sitemaps import GenericSitemap
+from models import Ebuild
+from django.contrib import admin
+
+admin.autodiscover()
+
+info_dict = {
+ 'queryset': Ebuild.objects.all(),
+}
+
+sitemaps = {
+ 'ebuild': GenericSitemap(info_dict),
+}
+
+urlpatterns = patterns('',
+
+ (r'^sitemap.xml$', 'django.contrib.sitemaps.views.index', {'sitemaps': sitemaps}),
+ (r'^sitemap-(?P.+)\.xml$', 'django.contrib.sitemaps.views.sitemap', {'sitemaps': sitemaps}),
+ (r'^search/', views.search),
+ (r'^$', views.index),
+ (r'^admin/(.*)', admin.site.root),
+)
diff --git a/views.py b/views.py
index aabd229..d10edcf 100644
--- a/views.py
+++ b/views.py
@@ -2,9 +2,11 @@ from commands.whoosh_manager import WhooshEbuildManager
from django.shortcuts import render_to_response
from planet import Parser
-from models import Ebuild, Overlay
+from models import Ebuild, Index
-whoosh = WhooshEbuildManager()
+import re, string
+
+PUNCTUATION_REGEX = re.compile('[' + re.escape(string.punctuation.replace("-", "").replace("+", "")) + ']')
def index(request):
GPlanet = Parser("http://planet.gentoo.org/atom.xml")
@@ -13,6 +15,7 @@ def index(request):
GGoogle = Parser("http://news.google.fr/news?pz=1&ned=us&hl=en&q=gentoo+AND+(linux+OR+OS+OR+Operating+System+OR+GNU)&output=rss")
response = dict()
+
response['GGoogle'] = GGoogle
response['GNews'] = GNews
response['GOverlays'] = GOverlays
@@ -33,18 +36,15 @@ def search(request):
if request.method == 'GET':
try:
query = request.GET["q"]
+ query = PUNCTUATION_REGEX.sub(' ', query)
except:
pass
-
+
if(len(query)>2):
response["error"] = False
response["query"] = query
-
- results = whoosh.Search(query)
- response["results"] = list()
-
- for result in results:
- ebuild = Ebuild.objects.get(id=result["permalink"])
- response["results"].append(ebuild)
+ results = Ebuild.search(Ebuild, query)
+ response["results"] = results
+ response["nb"] = len(results)
return render_to_response("ebuildfind/search.html", response)
--
2.11.4.GIT