switched back to whoosh with haystack
[ebuildfind.git] / planet.py
blobb84bc2899f8362983a8db39168ab0489ccf3ebb5
1 import shelve
3 from feedcache.cache import Cache
5 from django.conf import settings
7 from django.template.defaultfilters import truncatewords_html
10 def TryEncoding(content):
11 for body_charset in 'UTF-8', 'US-ASCII', 'ISO-8859-1', :
12 try:
13 return content.encode(body_charset)
14 except UnicodeError:
15 pass
16 except Exception, e:
17 if not type(content) is str:
18 pass
20 def GetContent(feed):
21 if hasattr(feed, "content"):
22 return feed.content[0]["value"]
23 else:
24 return feed.summary
26 class Parser:
27 """
28 "http://planet.gentoo.org/atom.xml"
29 "http://overlays.gentoo.org/rss20.xml"
30 >>> f = Parser("http://www.gentoo.org/rdf/en/gentoo-news.rdf")
31 >>> f.GetTitle()
32 u'Planet Gentoo'
33 >>> f.GetLink()
34 u'http://planet.gentoo.org/'
35 >>> for e in f: print e["title"], e["content"]
36 """
38 def __init__(self, url, summary=False):
39 storage = shelve.open(settings.ROOT_PATH + "feedcache")
40 try :
41 fc = Cache(storage)
42 self.feed = fc.fetch(url)
43 self.iterator = 0
44 self.summary = summary
45 finally:
46 storage.close()
47 def GetTitle(self):
48 return self.feed.feed.title
50 def GetLink(self):
51 return self.feed.feed.link
53 def next(self):
54 if self.iterator >= len(self.feed.entries): raise StopIteration
56 entry = self.feed.entries[self.iterator]
58 title = TryEncoding(entry["title"])
59 content = TryEncoding(GetContent(entry))
60 link = entry.link
62 if self.summary:
63 content = TryEncoding(truncatewords_html(content, 30))
64 content = "".join((content , "..."))
66 entry = dict((("title", title), ("content", content), ("link", link)))
68 self.iterator += 1
69 return entry
71 def __iter__(self):
72 return self