3 from __future__
import with_statement
4 from contextlib
import contextmanager
7 import urllib2
, urlparse
8 from email
.utils
import parsedate
11 from datetime
import datetime
, timedelta
13 from subprocess
import Popen
19 connection
= urllib2
.urlopen(url
)
25 # consider timestamps earlier then this non-existant
26 reasonable_timestamp
= datetime(2008,1,1)
27 min_ping_period
= timedelta(minutes
=30)
28 dawn_of_time
= datetime(1999,1,1)
30 # instance fields defaults
31 # last url timestamp if any
32 timestamp
= dawn_of_time
33 remote_timestamp
= None
34 # ignore episodes older then this
35 watermark
= datetime
.now() - timedelta(days
=60)
36 ping_timestamp
= dawn_of_time
38 def __init__(self
, url
):
42 return self
.title
if 'title' in self
.__dict
__ else self
.url
44 def __setstate__(self
, dict):
45 """Upgrade the instance being unpickled"""
46 def to_datetime(x
,default
):
47 if isinstance(x
,int) or isinstance(x
,float):
48 return datetime
.fromtimestamp(x
)
49 elif isinstance(x
,datetime
):
55 ('timestamp', to_datetime
),
56 ('remote_timestamp', to_datetime
),
57 ('watermark', to_datetime
),
58 ('ping_timestamp', to_datetime
)]:
60 dict[key
] = conv(dict[key
],Feed
.__dict
__[key
])
64 def get_remote_timestamp(self
):
65 with
url_open(self
.url
) as conn
:
66 self
.ping_timestamp
= datetime
.now()
67 if 'Last-Modified' in conn
.info():
68 return datetime
.fromtimestamp(
70 parsedate( conn
.info()['Last-Modified'] )))
75 delta
= datetime
.now() - self
.ping_timestamp
76 return delta
> self
.min_ping_period
79 if '--force-dl' in sys
.argv
:
81 if not self
.may_ping():
83 self
.remote_timestamp
= None
84 if self
.timestamp
is not None:
85 self
.remote_timestamp
= self
.get_remote_timestamp()
86 return self
.remote_timestamp
is None or \
87 self
.remote_timestamp
< Feed
.reasonable_timestamp
or \
88 self
.remote_timestamp
> self
.timestamp
92 def get_new_episodes(self
):
93 tree
= feedparser
.parse( self
.url
)
94 self
.ping_timestamp
= datetime
.now()
95 watermark
= self
.watermark
96 self
.title
= tree
['feed']['title']
97 for e
in tree
.entries
:
98 timestamp
= datetime
.fromtimestamp(
99 time
.mktime(e
.modified_parsed
))
100 if timestamp
> self
.watermark
and 'enclosures' in e
:
101 for encl
in e
.enclosures
:
103 if timestamp
> watermark
:
104 watermark
= timestamp
105 self
.watermark
= watermark
107 def update_timestamp(self
):
108 self
.timestamp
= self
.remote_timestamp
110 def try_load(fname
, default
):
112 with
open(fname
,"r") as f
:
113 return pickle
.load(f
)
118 with
open(fname
,"w") as f
:
119 pickle
.dump(obj
,f
)#,pickle.HIGHEST_PROTOCOL)
121 feeds
= try_load("feeds.db", {})
123 def get_subscribed_feeds():
124 with
open("feeds.lst","r") as f
:
125 for url
in map(str.strip
, f
.readlines()):
127 feeds
[url
] = Feed(url
)
130 def get_updated_feeds():
131 for feed
in get_subscribed_feeds():
133 if feed
.is_updated():
135 except urllib2
.URLError
, e
:
136 print "Error getting", feed
140 def url_basename(url
):
141 return os
.path
.basename( urlparse
.urlparse(url
)[2] )
143 url_files
= try_load("files.db", {})
145 for url
in url_files
:
146 files
.update(url_files
[url
])
148 def make_local_path(url
):
149 if url
in url_files
: return url_files
[url
]
150 desired
= os
.path
.join( conf
.media_dir
, url_basename(url
) )
151 desired_r
,desired_e
= os
.path
.splitext(desired
)
153 while desired
in files
:
155 desired
= desired_r
+ str(attempt
) + desired_e
157 url_files
[url
] = desired
158 files
.update( desired
)
159 save(url_files
, "files.db")
162 def make_tmp_path(path
):
163 return path
+ ".part"
165 def wget(url
, fname
):
166 dir = os
.path
.dirname(fname
)
167 if not os
.path
.isdir(dir):
169 cmd
= "wget -c %s -O %s" % (url
, fname
)
171 return Popen(cmd
, shell
=True).wait() == 0
173 def download_episode(url
):
174 local_path
= make_local_path(url
)
175 tmp_file
= make_tmp_path(local_path
)
176 if wget(url
, tmp_file
):
177 print "Renaming %s to %s" % (tmp_file
, local_path
)
178 os
.rename(tmp_file
, local_path
)
182 dl_queue
= try_load("dl-queue.db", [])
184 for feed
in get_updated_feeds():
186 for url
in feed
.get_new_episodes():
187 if url
not in url_files
and url
not in dl_queue
:
191 print "Familiar url: ignore",
192 feed
.update_timestamp()
194 save(feeds
, "feeds.db")
195 save(dl_queue
, "dl-queue.db")
197 # traverse a copy, we seem to miss episodes otherwise
198 for url
in [x
for x
in dl_queue
]:
199 if download_episode(url
):
201 save(dl_queue
, "dl-queue.db")