implented episode download with wget
[riffle.git] / pod.py
blob21dab0d90c9a8c9cbecdbc33633202985bcae5f2
1 #!/usr/bin/env python
3 from __future__ import with_statement
4 from contextlib import contextmanager
6 import os, os.path
7 import urllib2, urlparse
8 from email.utils import parsedate
9 import time
10 import feedparser
11 import pickle
12 from datetime import datetime, timedelta
13 from subprocess import Popen
15 class conf:
16 media_dir = "./media"
18 @contextmanager
19 def url_open(url):
20 connection = urllib2.urlopen(url)
21 yield connection
22 connection.close()
24 class Feed:
25 # consider timestamps earlier then this non-existant
26 reasonable_timestamp = time.mktime(time.strptime("2008", "%Y"))
27 min_ping_period = timedelta(minutes=30)
28 long_ago = datetime(1999,1,1)
30 def __init__(self, url):
31 self.url = url
32 # last url timestamp if any
33 self.timestamp = None
34 self.remote_timestamp = None
35 # ignore episodes older then this
36 self.watermark = time.mktime(time.strptime("03 2008", "%m %Y"))
37 self.ping_timestamp = self.long_ago
39 def get_remote_timestamp(self):
40 with url_open(self.url) as conn:
41 self.ping_timestamp = datetime.now()
42 if 'Last-Modified' in conn.info():
43 return time.mktime( parsedate( conn.info()['Last-Modified'] ) )
44 else:
45 return None
47 def may_ping(self):
48 delta = datetime.now() - self.ping_timestamp
49 return delta > self.min_ping_period
51 def is_updated(self):
52 self.remote_timestamp = None
53 if self.timestamp is not None:
54 self.remote_timestamp = self.get_remote_timestamp()
55 return self.remote_timestamp is None or \
56 self.timestamp < remote_timestamp
57 else:
58 return True
60 def get_new_episodes(self):
61 tree = feedparser.parse( self.url )
62 self.ping_timestamp = datetime.now()
63 watermark = self.watermark
64 for e in tree.entries:
65 timestamp = time.mktime(e.modified_parsed)
66 if timestamp > self.watermark and 'enclosures' in e:
67 for encl in e.enclosures:
68 yield encl.href
69 if timestamp > watermark:
70 watermark = timestamp
71 self.watermark = watermark
73 def update_timestamp(self):
74 self.timestamp = self.remote_timestamp
76 def try_load(fname, default):
77 try:
78 with open(fname,"r") as f:
79 return pickle.load(f)
80 except:
81 return default
83 def save(obj,fname):
84 with open(fname,"w") as f:
85 pickle.dump(obj,f)#,pickle.HIGHEST_PROTOCOL)
87 feeds = try_load("feeds.db", {})
89 def get_subscribed_feeds():
90 with open("feeds.lst","r") as f:
91 for url in map(str.strip, f.readlines()):
92 if url not in feeds:
93 feeds[url] = Feed(url)
94 yield feeds[url]
96 def get_updated_feeds():
97 for feed in get_subscribed_feeds():
98 if feed.may_ping() and feed.is_updated():
99 yield feed
101 def url_basename(url):
102 return os.path.basename( urlparse.urlparse(url)[2] )
104 url_files = try_load("files.db", {})
105 files = set()
106 for url in url_files:
107 files.update(url_files[url])
109 def make_local_path(url):
110 if url in url_files: return url_files[url]
111 desired = os.path.join( conf.media_dir, url_basename(url) )
112 desired_r,desired_e = os.path.splitext(desired)
113 attempt = 0
114 while desired in files:
115 attempt += 1
116 desired = desired_r + str(attempt) + desired_e
117 else:
118 url_files[url] = desired
119 files.update( desired )
120 save(url_files, "files.db")
121 return desired
123 def make_tmp_path(path):
124 return path + ".part"
126 def wget(url, fname):
127 dir = os.path.dirname(fname)
128 if not os.path.isdir(dir):
129 os.makedirs(dir)
130 cmd = "wget -c %s -O %s" % (url, fname)
131 print cmd
132 return Popen(cmd, shell=True).wait() == 0
134 def download_episode(url):
135 local_path = make_local_path(url)
136 tmp_file = make_tmp_path(local_path)
137 if wget(url, tmp_file):
138 print "Renaming %s to %s" % (tmp_file, local_path)
139 os.rename(tmp_file, local_path)
140 return True
141 return False
143 dl_queue = try_load("dl-queue.db", [])
144 file_manager = []
146 for feed in get_updated_feeds():
147 print feed.url
148 for url in feed.get_new_episodes():
149 if url not in file_manager:
150 print url
151 dl_queue.append(url)
152 feed.update_timestamp()
154 save(feeds, "feeds.db")
155 save(dl_queue, "dl-queue.db")
157 for url in dl_queue:
158 if download_episode(url):
159 dl_queue.remove(url)
160 file_manager.append(url)
161 save(dl_queue, "dl-queue.db")
163 # file_manager.expire_and_prune()