mysql FTS
[ebuildfind.git] / models.py
blob5f1ee15eecbe5d9164cbe63ca951304829ac4461
1 import re, string
3 from django.db import models
4 from django.contrib.contenttypes.models import ContentType
5 from django.contrib.contenttypes import generic
7 PUNCTUATION_REGEX = re.compile('[' + re.escape(string.punctuation) + ']')
8 FULL_TEXT_MIN_LENGTH = 3
10 FULL_TEXT_STOP_WORDS = frozenset([
11 'a', 'about', 'according', 'accordingly', 'affected', 'affecting', 'after',
12 'again', 'against', 'all', 'almost', 'already', 'also', 'although',
13 'always', 'am', 'among', 'an', 'and', 'any', 'anyone', 'apparently', 'are',
14 'arise', 'as', 'aside', 'at', 'away', 'be', 'became', 'because', 'become',
15 'becomes', 'been', 'before', 'being', 'between', 'both', 'briefly', 'but',
16 'by', 'came', 'can', 'cannot', 'certain', 'certainly', 'could', 'did', 'do',
17 'does', 'done', 'during', 'each', 'either', 'else', 'etc', 'ever', 'every',
18 'following', 'for', 'found', 'from', 'further', 'gave', 'gets', 'give',
19 'given', 'giving', 'gone', 'got', 'had', 'hardly', 'has', 'have', 'having',
20 'here', 'how', 'however', 'i', 'if', 'in', 'into', 'is', 'it', 'itself',
21 'just', 'keep', 'kept', 'knowledge', 'largely', 'like', 'made', 'mainly',
22 'make', 'many', 'might', 'more', 'most', 'mostly', 'much', 'must', 'nearly',
23 'necessarily', 'neither', 'next', 'no', 'none', 'nor', 'normally', 'not',
24 'noted', 'now', 'obtain', 'obtained', 'of', 'often', 'on', 'only', 'or',
25 'other', 'our', 'out', 'owing', 'particularly', 'past', 'perhaps', 'please',
26 'poorly', 'possible', 'possibly', 'potentially', 'predominantly', 'present',
27 'previously', 'primarily', 'probably', 'prompt', 'promptly', 'put',
28 'quickly', 'quite', 'rather', 'readily', 'really', 'recently', 'regarding',
29 'regardless', 'relatively', 'respectively', 'resulted', 'resulting',
30 'results', 'said', 'same', 'seem', 'seen', 'several', 'shall', 'should',
31 'show', 'showed', 'shown', 'shows', 'significantly', 'similar', 'similarly',
32 'since', 'slightly', 'so', 'some', 'sometime', 'somewhat', 'soon',
33 'specifically', 'state', 'states', 'strongly', 'substantially',
34 'successfully', 'such', 'sufficiently', 'than', 'that', 'the', 'their',
35 'theirs', 'them', 'then', 'there', 'therefore', 'these', 'they', 'this',
36 'those', 'though', 'through', 'throughout', 'to', 'too', 'toward', 'under',
37 'unless', 'until', 'up', 'upon', 'use', 'used', 'usefully', 'usefulness',
38 'using', 'usually', 'various', 'very', 'was', 'we', 'were', 'what', 'when',
39 'where', 'whether', 'which', 'while', 'who', 'whose', 'why', 'widely',
40 'will', 'with', 'within', 'without', 'would', 'yet', 'you'])
42 class Index(models.Model):
43 content_type = models.ForeignKey(ContentType)
44 object_id = models.PositiveIntegerField()
45 content = models.TextField()
47 content_object = generic.GenericForeignKey()
49 @staticmethod
50 def full_text_index(text):
51 if text:
52 text = PUNCTUATION_REGEX.sub(' ', text)
53 words = text.lower().split()
54 words = set(words)
55 words -= FULL_TEXT_STOP_WORDS
57 for word in list(words):
58 if len(word) < FULL_TEXT_MIN_LENGTH:
59 words.remove(word)
60 else:
61 words = set()
62 return words
64 @staticmethod
65 def index(obj):
66 keywords = set()
68 properties = obj._meta.fields
69 for property in properties:
70 isurlfield = not isinstance(property, models.URLField)
71 istext = isinstance(property, models.CharField) or isinstance(property, models.TextField)
72 if istext and isurlfield:
73 text = property.value_from_object(obj)
74 mykeywords = Index.full_text_index(text)
75 keywords = keywords.union(mykeywords)
77 text = " ".join(keywords)
79 # Create or Update
80 ctype = ContentType.objects.get_for_model(obj)
82 try:
83 index = Index.objects.get(content_type__pk=ctype.id, object_id=obj.id)
84 index.content = text
85 index.save()
86 except:
87 index = Index(content_object=obj, content=text)
88 index.save()
89 return index
91 class Searchable(object):
92 @staticmethod
93 def search(cls, query):
94 ctype = ContentType.objects.get_for_model(cls)
95 return Index.objects.filter(content__search=query).filter(content_type=ctype)
97 def index(self):
98 return Index.index(self)
100 class Overlay(models.Model):
101 name = models.CharField(max_length=255)
102 description = models.TextField()
103 link = models.URLField()
105 def __unicode__(self):
106 return self.name
109 class Ebuild(Searchable, models.Model):
110 name = models.CharField(max_length=255)
111 category = models.CharField(max_length=255)
112 version = models.CharField(max_length=255)
113 description = models.TextField()
114 keywords = models.TextField(max_length=255)
115 license = models.TextField(max_length=255)
116 iuse = models.TextField(max_length=255)
117 homepage = models.URLField()
118 overlay = models.ForeignKey(Overlay)
120 def path(self):
121 return "/%s/%s/%s/%s" % (self.overlay.name, self.category, self.name, self.version)
123 def get_absolute_url(self):
124 return "/search/?q=%s" % self.name
126 def __unicode__(self):
127 return self.name