From 6cc312bddb2f6b8babb1abcb7e4edfd98417ddd0 Mon Sep 17 00:00:00 2001 From: abki Date: Fri, 3 Jul 2009 14:55:17 +0200 Subject: [PATCH] mysql FTS updated templates sitemap --- commands/.gitignore | 1 + commands/parse.py | 18 ++++--- models.py | 106 +++++++++++++++++++++++++++++++++++++-- templates/ebuildfind/base.html | 12 +++-- templates/ebuildfind/search.html | 21 ++++---- urls.py | 39 ++++++++------ views.py | 20 ++++---- 7 files changed, 166 insertions(+), 51 deletions(-) create mode 100644 commands/.gitignore rewrite urls.py (65%) diff --git a/commands/.gitignore b/commands/.gitignore new file mode 100644 index 0000000..032c0df --- /dev/null +++ b/commands/.gitignore @@ -0,0 +1 @@ +overlays.xml \ No newline at end of file diff --git a/commands/parse.py b/commands/parse.py index 3b346a5..4a61dc1 100644 --- a/commands/parse.py +++ b/commands/parse.py @@ -10,7 +10,6 @@ from layman.debug import Message from ebuilds.ebuildfind.models import Ebuild, Overlay -from whoosh_manager import WhooshEbuildManager from manage_layman import LaymanManager OVERLAYS_BASE = settings.ROOT_PATH + "ebuilds/" + "ebuildfind/commands/var/overlays/" @@ -67,14 +66,14 @@ def ParseEbuilds(): i = 0 Ebuild.objects.all().delete() overlays = os.listdir(OVERLAYS_BASE) - - whoosh = WhooshEbuildManager(True) - + for overlay in overlays: path_overlay = os.path.join(OVERLAYS_BASE, overlay) if exclude_directory(path_overlay, overlay): overlay_name = overlay + print "is present", overlay_name + overlay = Overlay.objects.get(name=overlay) categories = os.listdir(path_overlay) @@ -110,17 +109,18 @@ def ParseEbuilds(): ebuild.overlay = overlay ebuild.save() - whoosh.Update(ebuild) + ebuild.index() def ParseOverlays(): h = LaymanManager() overlays = h.List() - - for name, overlay in overlays.items() : + + for name, overlay in overlays.items(): """ check if new overlay is ready """ o = Overlay.objects.all().filter(name=name) - + print "add ? > ", name if not o: + print "added !", name o = Overlay() o.name = name o.description = overlay["description"] @@ -128,7 +128,9 @@ def ParseOverlays(): o.save() def main(): + print "# >>> Parse Overlays" ParseOverlays() + print "# >>> Parse Ebuilds" ParseEbuilds() if __name__ == "__main__": diff --git a/models.py b/models.py index 2e6741e..5f1ee15 100644 --- a/models.py +++ b/models.py @@ -1,19 +1,112 @@ +import re, string + from django.db import models +from django.contrib.contenttypes.models import ContentType +from django.contrib.contenttypes import generic + +PUNCTUATION_REGEX = re.compile('[' + re.escape(string.punctuation) + ']') +FULL_TEXT_MIN_LENGTH = 3 + +FULL_TEXT_STOP_WORDS = frozenset([ + 'a', 'about', 'according', 'accordingly', 'affected', 'affecting', 'after', + 'again', 'against', 'all', 'almost', 'already', 'also', 'although', + 'always', 'am', 'among', 'an', 'and', 'any', 'anyone', 'apparently', 'are', + 'arise', 'as', 'aside', 'at', 'away', 'be', 'became', 'because', 'become', + 'becomes', 'been', 'before', 'being', 'between', 'both', 'briefly', 'but', + 'by', 'came', 'can', 'cannot', 'certain', 'certainly', 'could', 'did', 'do', + 'does', 'done', 'during', 'each', 'either', 'else', 'etc', 'ever', 'every', + 'following', 'for', 'found', 'from', 'further', 'gave', 'gets', 'give', + 'given', 'giving', 'gone', 'got', 'had', 'hardly', 'has', 'have', 'having', + 'here', 'how', 'however', 'i', 'if', 'in', 'into', 'is', 'it', 'itself', + 'just', 'keep', 'kept', 'knowledge', 'largely', 'like', 'made', 'mainly', + 'make', 'many', 'might', 'more', 'most', 'mostly', 'much', 'must', 'nearly', + 'necessarily', 'neither', 'next', 'no', 'none', 'nor', 'normally', 'not', + 'noted', 'now', 'obtain', 'obtained', 'of', 'often', 'on', 'only', 'or', + 'other', 'our', 'out', 'owing', 'particularly', 'past', 'perhaps', 'please', + 'poorly', 'possible', 'possibly', 'potentially', 'predominantly', 'present', + 'previously', 'primarily', 'probably', 'prompt', 'promptly', 'put', + 'quickly', 'quite', 'rather', 'readily', 'really', 'recently', 'regarding', + 'regardless', 'relatively', 'respectively', 'resulted', 'resulting', + 'results', 'said', 'same', 'seem', 'seen', 'several', 'shall', 'should', + 'show', 'showed', 'shown', 'shows', 'significantly', 'similar', 'similarly', + 'since', 'slightly', 'so', 'some', 'sometime', 'somewhat', 'soon', + 'specifically', 'state', 'states', 'strongly', 'substantially', + 'successfully', 'such', 'sufficiently', 'than', 'that', 'the', 'their', + 'theirs', 'them', 'then', 'there', 'therefore', 'these', 'they', 'this', + 'those', 'though', 'through', 'throughout', 'to', 'too', 'toward', 'under', + 'unless', 'until', 'up', 'upon', 'use', 'used', 'usefully', 'usefulness', + 'using', 'usually', 'various', 'very', 'was', 'we', 'were', 'what', 'when', + 'where', 'whether', 'which', 'while', 'who', 'whose', 'why', 'widely', + 'will', 'with', 'within', 'without', 'would', 'yet', 'you']) + +class Index(models.Model): + content_type = models.ForeignKey(ContentType) + object_id = models.PositiveIntegerField() + content = models.TextField() + + content_object = generic.GenericForeignKey() + + @staticmethod + def full_text_index(text): + if text: + text = PUNCTUATION_REGEX.sub(' ', text) + words = text.lower().split() + words = set(words) + words -= FULL_TEXT_STOP_WORDS + + for word in list(words): + if len(word) < FULL_TEXT_MIN_LENGTH: + words.remove(word) + else: + words = set() + return words + + @staticmethod + def index(obj): + keywords = set() + + properties = obj._meta.fields + for property in properties: + isurlfield = not isinstance(property, models.URLField) + istext = isinstance(property, models.CharField) or isinstance(property, models.TextField) + if istext and isurlfield: + text = property.value_from_object(obj) + mykeywords = Index.full_text_index(text) + keywords = keywords.union(mykeywords) + + text = " ".join(keywords) + # Create or Update + ctype = ContentType.objects.get_for_model(obj) + try: + index = Index.objects.get(content_type__pk=ctype.id, object_id=obj.id) + index.content = text + index.save() + except: + index = Index(content_object=obj, content=text) + index.save() + return index + +class Searchable(object): + @staticmethod + def search(cls, query): + ctype = ContentType.objects.get_for_model(cls) + return Index.objects.filter(content__search=query).filter(content_type=ctype) + + def index(self): + return Index.index(self) class Overlay(models.Model): name = models.CharField(max_length=255) description = models.TextField() link = models.URLField() - - def __repr__(self): + + def __unicode__(self): return self.name - def get_absolute_url(self): - return "/search/?q=%s" % self.name -class Ebuild(models.Model): +class Ebuild(Searchable, models.Model): name = models.CharField(max_length=255) category = models.CharField(max_length=255) version = models.CharField(max_length=255) @@ -29,3 +122,6 @@ class Ebuild(models.Model): def get_absolute_url(self): return "/search/?q=%s" % self.name + + def __unicode__(self): + return self.name diff --git a/templates/ebuildfind/base.html b/templates/ebuildfind/base.html index 4ef4227..b20327b 100644 --- a/templates/ebuildfind/base.html +++ b/templates/ebuildfind/base.html @@ -27,11 +27,15 @@ {% block help %} -

This is a search engine for ebuilds, it looks for the query in the overlay name, - category, description, application name and the exact version name. Give it a try now and here

+

This is a search engine for ebuilds, it looks for the query in the + category, description, application name. Give it a + try now + and here

+

3 July 09': The search engine is now supported by + Mysql FTS in boolean mode, try this out.

28 June 09': Some ebuilds and overlays are missing please - notify me bugs at google - code project page

+ notify me bugs + at my email adress

{% endblock %} diff --git a/templates/ebuildfind/search.html b/templates/ebuildfind/search.html index 9ecf917..45c3dd4 100644 --- a/templates/ebuildfind/search.html +++ b/templates/ebuildfind/search.html @@ -11,21 +11,24 @@ {% endif %}