d6ca3c66113d2f0bb6750106123e34c5ae5cb210
[ebuildfind.git] / planet.py
blobd6ca3c66113d2f0bb6750106123e34c5ae5cb210
1 import shelve
3 from feedcache.cache import Cache
5 from django.template.defaultfilters import truncatewords_html
6 from django.utils.encoding import smart_unicode
8 def TryEncoding(content):
9 for body_charset in 'UTF-8', 'US-ASCII', 'ISO-8859-1', :
10 try:
11 return content.encode(body_charset)
12 except UnicodeError:
13 pass
14 except Exception, e:
15 if not type(content) is str:
16 pass
19 def GetContent(feed):
20 if hasattr(feed, "content"):
21 return feed.content[0]["value"]
22 else:
23 return feed.summary
26 class Parser:
27 """
28 "http://planet.gentoo.org/atom.xml"
29 "http://overlays.gentoo.org/rss20.xml"
30 "http://www.gentoo.org/rdf/en/gentoo-news.rdf"
31 >>> f = Parser()
32 >>> f.GetTitle()
33 u'Planet Gentoo'
34 >>> f.GetLink()
35 u'http://planet.gentoo.org/'
36 >>> for e in f: print e["title"], e["content"]
37 """
39 def __init__(self, url, summary=False):
40 storage = shelve.open("/home/timemachine/.feedcache")
41 try :
42 fc = Cache(storage)
43 self.feed = fc.fetch(url)
44 self.iterator = 0
45 self.summary = summary
46 finally:
47 storage.close()
48 def GetTitle(self):
49 return self.feed.feed.title
51 def GetLink(self):
52 return self.feed.feed.link
54 def next(self):
55 if self.iterator >= len(self.feed.entries): raise StopIteration
57 entry = self.feed.entries[self.iterator]
59 title = TryEncoding(entry["title"])
60 content = TryEncoding(GetContent(entry))
61 link = entry.link
63 if self.summary:
64 content = TryEncoding(truncatewords_html(content, 30))
65 content = "".join((content , "..."))
67 entry = dict((("title", title), ("content", content), ("link", link)))
69 self.iterator += 1
70 return entry
72 def __iter__(self):
73 return self
75 def _test():
76 """
77 import doctest, ebuilds.ebuildfind.planet as planet
78 return doctest.testmod(planet)
80 "http://news.google.fr/news?pz=1&ned=us&hl=en&q=gentoo+AND+(linux+OR+OS+OR+Operating+System+OR+GNU)&output=rss"
81 "http://www.gentoo.org/rdf/en/gentoo-news.rdf"
82 "http://planet.gentoo.org/atom.xml"
83 "http://overlays.gentoo.org/rss20.xml"
84 """
86 f = Parser("http://planet.gentoo.org/atom.xml", True)
88 print f.GetTitle()
89 print f.GetLink()
91 for e in f:
92 print e["title"]
93 print e["content"]
94 print e["link"]
96 if __name__ == "__main__":
97 _test()