take care some files are modifier because of the local environnement.
[ebuildfind.git] / planet.py
blob6c9abd5b7ba0630ac382170bd0105e77f2578cf2
1 import shelve
3 from feedcache.cache import Cache
5 import pdb
7 from django.template.defaultfilters import truncatewords_html
8 from django.utils.encoding import smart_unicode
10 def TryEncoding(content):
11 for body_charset in 'UTF-8', 'US-ASCII', 'ISO-8859-1', :
12 try:
13 return content.encode(body_charset)
14 except UnicodeError:
15 pass
16 except Exception, e:
17 if not type(content) is str:
18 pass
21 def GetContent(feed):
22 if hasattr(feed, "content"):
23 return feed.content[0]["value"]
24 else:
25 return feed.summary
28 class Parser:
29 """
30 "http://planet.gentoo.org/atom.xml"
31 "http://overlays.gentoo.org/rss20.xml"
32 "http://www.gentoo.org/rdf/en/gentoo-news.rdf"
33 >>> f = Parser()
34 >>> f.GetTitle()
35 u'Planet Gentoo'
36 >>> f.GetLink()
37 u'http://planet.gentoo.org/'
38 >>> for e in f: print e["title"], e["content"]
39 """
41 def __init__(self, url, summary=False):
42 storage = shelve.open(".feedcache")
43 try :
44 fc = Cache(storage)
45 self.feed = fc.fetch(url)
46 self.iterator = 0
47 self.summary = summary
48 finally:
49 storage.close()
50 def GetTitle(self):
51 return self.feed.feed.title
53 def GetLink(self):
54 return self.feed.feed.link
56 def next(self):
57 if self.iterator >= len(self.feed.entries): raise StopIteration
59 entry = self.feed.entries[self.iterator]
61 title = TryEncoding(entry["title"])
62 content = TryEncoding(GetContent(entry))
63 link = entry.link
65 if self.summary:
66 content = TryEncoding(truncatewords_html(content, 30))
67 content = "".join((content , "..."))
69 entry = dict((("title", title), ("content", content), ("link", link)))
71 self.iterator += 1
72 return entry
74 def __iter__(self):
75 return self
77 def _test():
78 """
79 import doctest, ebuilds.ebuildfind.planet as planet
80 return doctest.testmod(planet)
82 "http://news.google.fr/news?pz=1&ned=us&hl=en&q=gentoo+AND+(linux+OR+OS+OR+Operating+System+OR+GNU)&output=rss"
83 "http://www.gentoo.org/rdf/en/gentoo-news.rdf"
84 "http://planet.gentoo.org/atom.xml"
85 "http://overlays.gentoo.org/rss20.xml"
86 """
88 f = Parser("http://planet.gentoo.org/atom.xml", True)
90 print f.GetTitle()
91 print f.GetLink()
93 for e in f:
94 print e["title"]
95 print e["content"]
96 print e["link"]
98 if __name__ == "__main__":
99 _test()