regex hack to get 2 & 3 letter words to work
[ebuildfind.git] / models.py
blob0f83e62095fb5edd39146210d8329ddac15e2533
1 import re, string
3 from django.db import models
4 from django.contrib.contenttypes.models import ContentType
5 from django.contrib.contenttypes import generic
7 PUNCTUATION_REGEX = re.compile('[' + re.escape(string.punctuation) + ']')
9 FULL_TEXT_STOP_WORDS = frozenset([
10 'a', 'about', 'according', 'accordingly', 'affected', 'affecting', 'after',
11 'again', 'against', 'all', 'almost', 'already', 'also', 'although',
12 'always', 'am', 'among', 'an', 'and', 'any', 'anyone', 'apparently', 'are',
13 'arise', 'as', 'aside', 'at', 'away', 'be', 'became', 'because', 'become',
14 'becomes', 'been', 'before', 'being', 'between', 'both', 'briefly', 'but',
15 'by', 'came', 'can', 'cannot', 'certain', 'certainly', 'could', 'did', 'do',
16 'does', 'done', 'during', 'each', 'either', 'else', 'etc', 'ever', 'every',
17 'following', 'for', 'found', 'from', 'further', 'gave', 'gets', 'give',
18 'given', 'giving', 'gone', 'got', 'had', 'hardly', 'has', 'have', 'having',
19 'here', 'how', 'however', 'i', 'if', 'in', 'into', 'is', 'it', 'itself',
20 'just', 'keep', 'kept', 'knowledge', 'largely', 'like', 'made', 'mainly',
21 'make', 'many', 'might', 'more', 'most', 'mostly', 'much', 'must', 'nearly',
22 'necessarily', 'neither', 'next', 'no', 'none', 'nor', 'normally', 'not',
23 'noted', 'now', 'obtain', 'obtained', 'of', 'often', 'on', 'only', 'or',
24 'other', 'our', 'out', 'owing', 'particularly', 'past', 'perhaps', 'please',
25 'poorly', 'possible', 'possibly', 'potentially', 'predominantly', 'present',
26 'previously', 'primarily', 'probably', 'prompt', 'promptly', 'put',
27 'quickly', 'quite', 'rather', 'readily', 'really', 'recently', 'regarding',
28 'regardless', 'relatively', 'respectively', 'resulted', 'resulting',
29 'results', 'said', 'same', 'seem', 'seen', 'several', 'shall', 'should',
30 'show', 'showed', 'shown', 'shows', 'significantly', 'similar', 'similarly',
31 'since', 'slightly', 'so', 'some', 'sometime', 'somewhat', 'soon',
32 'specifically', 'state', 'states', 'strongly', 'substantially',
33 'successfully', 'such', 'sufficiently', 'than', 'that', 'the', 'their',
34 'theirs', 'them', 'then', 'there', 'therefore', 'these', 'they', 'this',
35 'those', 'though', 'through', 'throughout', 'to', 'too', 'toward', 'under',
36 'unless', 'until', 'up', 'upon', 'use', 'used', 'usefully', 'usefulness',
37 'using', 'usually', 'various', 'very', 'was', 'we', 'were', 'what', 'when',
38 'where', 'whether', 'which', 'while', 'who', 'whose', 'why', 'widely',
39 'will', 'with', 'within', 'without', 'would', 'yet', 'you'])
41 class Index(models.Model):
42 content_type = models.ForeignKey(ContentType)
43 object_id = models.PositiveIntegerField()
44 content = models.TextField()
46 content_object = generic.GenericForeignKey()
48 @staticmethod
49 def full_text_index(text):
50 if text:
51 text = PUNCTUATION_REGEX.sub(' ', text)
52 words = text.lower().split()
53 words = set(words)
54 words -= FULL_TEXT_STOP_WORDS
56 for word in list(words):
57 if len(word) < FULL_TEXT_MIN_LENGTH:
58 words.remove(word)
59 else:
60 words = set()
61 return words
63 @staticmethod
64 def index(obj):
65 keywords = set()
67 properties = obj._meta.fields
68 for property in properties:
69 isurlfield = not isinstance(property, models.URLField)
70 istext = isinstance(property, models.CharField) or isinstance(property, models.TextField)
71 if istext and isurlfield:
72 text = property.value_from_object(obj)
73 mykeywords = Index.full_text_index(text)
74 keywords = keywords.union(mykeywords)
76 text = " ".join(keywords)
78 # Create or Update
79 ctype = ContentType.objects.get_for_model(obj)
81 try:
82 index = Index.objects.get(content_type__pk=ctype.id, object_id=obj.id)
83 index.content = text
84 index.save()
85 except:
86 index = Index(content_object=obj, content=text)
87 index.save()
88 return index
90 class Searchable(object):
91 @staticmethod
92 def search(cls, query):
93 ctype = ContentType.objects.get_for_model(cls)
94 return Index.objects.filter(content__search=query).filter(content_type=ctype)
96 @staticmethod
97 def regex(cls, query):
98 ctype = ContentType.objects.get_for_model(cls)
99 return Index.objects.filter(content__regex=query).filter(content_type=ctype)
101 def index(self):
102 return Index.index(self)
104 class Overlay(models.Model):
105 name = models.CharField(max_length=255)
106 description = models.TextField()
107 link = models.URLField()
109 def __unicode__(self):
110 return self.name
113 class Ebuild(Searchable, models.Model):
114 name = models.CharField(max_length=255)
115 category = models.CharField(max_length=255)
116 version = models.CharField(max_length=255)
117 description = models.TextField()
118 keywords = models.TextField(max_length=255)
119 license = models.TextField(max_length=255)
120 iuse = models.TextField(max_length=255)
121 homepage = models.URLField()
122 overlay = models.ForeignKey(Overlay)
124 def path(self):
125 return "/%s/%s/%s/%s" % (self.overlay.name, self.category, self.name, self.version)
127 def get_absolute_url(self):
128 return "/search/?q=%s" % self.name
130 def __unicode__(self):
131 return self.name