fixed timestamps, parse feed, queue downloads
[riffle.git] / pod.py
blob2b0a5c8e030436897c1b52d9fbbad4842e5cd262
1 #!/usr/bin/env python
3 from __future__ import with_statement
4 import urllib2
5 from email.utils import parsedate
6 import time
7 import feedparser
9 class Feed:
10 # consider timestamps earlier then this non-existant
11 reasonable_timestamp = time.mktime(time.strptime("2008", "%Y"))
13 def __init__(self, url):
14 self.url = url
15 # last url timestamp if any
16 self.timestamp = None
17 # ignore episodes older then this
18 self.watermark = time.mktime(time.strptime("03 2008", "%m %Y"))
20 def get_remote_timestamp(self):
21 conn = urllib2.urlopen(self.url)
22 try:
23 if 'Last-Modified' in conn.info():
24 t = time.mktime( parsedate( conn.info()['Last-Modified'] ) )
25 if t > self.reasonable_timestamp:
26 return t
27 return None
28 finally:
29 conn.close()
31 def is_updated(self):
32 if self.timestamp is not None:
33 remote_timestamp = self.get_remote_timestamp()
34 return remote_timestamp is None or \
35 self.timestamp < remote_timestamp
36 else:
37 return True
39 def get_new_episodes(self):
40 tree = feedparser.parse( self.url )
41 watermark = self.watermark
42 for e in tree.entries:
43 timestamp = time.mktime(e.modified_parsed)
44 if timestamp > self.watermark and 'enclosures' in e:
45 for encl in e.enclosures:
46 yield encl.href
47 if timestamp > watermark:
48 watermark = timestamp
49 self.watermark = watermark
51 def get_subscribed_feeds():
52 with open("feeds.lst","r") as f:
53 for url in map(str.strip, f.readlines()):
54 yield Feed(url)
56 def get_updated_feeds():
57 for feed in get_subscribed_feeds():
58 if feed.is_updated():
59 yield feed
61 def download_episode(url):
62 pass
64 dl_queue = []
66 for feed in get_updated_feeds():
67 print feed.url
68 for url in feed.get_new_episodes():
69 print url
70 dl_queue.append(url)
71 # update feed watermark
73 file_manager = []
75 for url in dl_queue:
76 if download_episode(url):
77 dl_queue.remove(url)
78 file_manager.append(url)
80 # file_manager.expire_and_prune()