3 from __future__
import with_statement
4 from contextlib
import contextmanager
7 import urllib2
, urlparse
8 from email
.utils
import parsedate
11 from datetime
import datetime
, timedelta
13 from subprocess
import Popen
22 format
='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
23 datefmt
='%m-%d %H:%M',
24 filename
='catcher.log',
26 console
= logging
.StreamHandler()
27 console
.setLevel(logging
.INFO
)
28 formatter
= logging
.Formatter('%(levelname)-8s %(message)s')
29 console
.setFormatter(formatter
)
30 logging
.getLogger('').addHandler(console
)
34 connection
= urllib2
.urlopen(url
)
39 # last url timestamp if any
40 timestamp
= conf
.dawn_of_time
41 remote_timestamp
= None
42 # ignore episodes older then this
43 watermark
= datetime
.now() - timedelta(days
=60)
44 ping_timestamp
= conf
.dawn_of_time
46 def __init__(self
, url
):
50 return self
.title
if 'title' in self
.__dict
__ else self
.url
52 def __setstate__(self
, dict):
53 """Upgrade the instance being unpickled"""
54 def to_datetime(x
,default
):
55 if isinstance(x
,int) or isinstance(x
,float):
56 return datetime
.fromtimestamp(x
)
57 elif isinstance(x
,datetime
):
63 ('timestamp', to_datetime
),
64 ('remote_timestamp', to_datetime
),
65 ('watermark', to_datetime
),
66 ('ping_timestamp', to_datetime
)]:
68 dict[key
] = conv(dict[key
],Feed
.__dict
__[key
])
72 def get_remote_timestamp(self
):
73 with
url_open(self
.url
) as conn
:
74 self
.ping_timestamp
= datetime
.now()
75 if 'Last-Modified' in conn
.info():
76 return datetime
.fromtimestamp(
78 parsedate( conn
.info()['Last-Modified'] )))
83 delta
= datetime
.now() - self
.ping_timestamp
84 return delta
> conf
.min_ping_period
87 if '--force-dl' in sys
.argv
:
89 if not self
.may_ping():
91 self
.remote_timestamp
= None
93 if self
.timestamp
is not None:
94 self
.remote_timestamp
= self
.get_remote_timestamp()
95 return self
.remote_timestamp
is None or \
96 self
.remote_timestamp
< conf
.reasonable_timestamp
or \
97 self
.remote_timestamp
> self
.timestamp
100 except (urllib2
.URLError
, httplib
.BadStatusLine
), e
:
101 logging
.error("Ignoring feed %s, can't get feed timestamp: %s", feed
, e
)
104 def get_new_episodes(self
):
105 tree
= feedparser
.parse( self
.url
)
106 self
.ping_timestamp
= datetime
.now()
107 watermark
= self
.watermark
108 self
.title
= tree
['feed']['title']
109 for e
in tree
.entries
:
110 timestamp
= datetime
.fromtimestamp(
111 time
.mktime(e
.modified_parsed
))
112 if timestamp
> self
.watermark
and 'enclosures' in e
:
113 for encl
in e
.enclosures
:
115 if timestamp
> watermark
:
116 watermark
= timestamp
117 self
.watermark
= watermark
119 def update_timestamp(self
):
120 self
.timestamp
= self
.remote_timestamp
122 def try_load(fname
, default
):
124 with
open(fname
,"r") as f
:
125 return pickle
.load(f
)
130 with
open(fname
,"w") as f
:
131 pickle
.dump(obj
,f
)#,pickle.HIGHEST_PROTOCOL)
133 feeds
= try_load("feeds.db", {})
135 def get_subscribed_feeds():
136 with
open("feeds.lst","r") as f
:
137 for url
in map(str.strip
, f
.readlines()):
139 feeds
[url
] = Feed(url
)
142 def get_updated_feeds():
143 for feed
in get_subscribed_feeds():
144 if feed
.is_updated():
147 def url_basename(url
):
148 return os
.path
.basename( urlparse
.urlparse(url
)[2] )
150 url_files
= try_load("files.db", {})
152 for url
in url_files
:
153 files
.update(url_files
[url
])
155 def make_local_path(url
):
156 if url
in url_files
: return url_files
[url
]
157 desired
= os
.path
.join( conf
.media_dir
, url_basename(url
) )
158 desired_r
,desired_e
= os
.path
.splitext(desired
)
160 while desired
in files
:
162 desired
= desired_r
+ str(attempt
) + desired_e
164 url_files
[url
] = desired
165 files
.update( desired
)
166 save(url_files
, "files.db")
169 def make_tmp_path(path
):
170 return path
+ ".part"
172 def wget(url
, fname
):
173 dir = os
.path
.dirname(fname
)
174 if not os
.path
.isdir(dir):
176 cmd
= "wget -c '%s' -O %s" % (url
, fname
)
177 logging
.info("Exec: %s", cmd
)
181 return Popen(cmd
, shell
=True).wait() == 0
183 def download_episode(url
):
184 local_path
= make_local_path(url
)
185 tmp_file
= make_tmp_path(local_path
)
186 if wget(url
, tmp_file
):
187 logging
.info("Renaming %s to %s", tmp_file
, local_path
)
188 os
.rename(tmp_file
, local_path
)
192 dl_queue
= try_load("dl-queue.db", [])
194 for feed
in get_updated_feeds():
195 logging
.info("Checking feed: %s", feed
)
196 # FIXME for now limit to 10 but
197 # (a) have to make sure it's sorted by age
198 # (b) should be configurable and overridable
200 for url
in feed
.get_new_episodes():
201 if url
not in url_files
and url
not in dl_queue
:
202 logging
.info("Queueing for download: %s", url
)
205 logging
.debug("Ignoring familiar url: %s (%s)", url
, feed
)
209 feed
.update_timestamp()
211 save(feeds
, "feeds.db")
212 save(dl_queue
, "dl-queue.db")
214 # traverse a copy, we seem to miss episodes otherwise
215 for url
in [x
for x
in dl_queue
]:
216 if download_episode(url
):
218 save(dl_queue
, "dl-queue.db")