3 from __future__
import with_statement
7 from subprocess
import Popen
13 os
.environ
['DJANGO_SETTINGS_MODULE'] = 'riffle.settings'
14 from riffle
.catcher
import models
as catcher
19 format
='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
20 datefmt
='%m-%d %H:%M',
21 filename
='catcher.log',
23 console
= logging
.StreamHandler()
24 console
.setLevel(logging
.DEBUG
if '-v' in sys
.argv
else logging
.INFO
)
25 formatter
= logging
.Formatter('%(levelname)-8s %(message)s')
26 console
.setFormatter(formatter
)
27 logging
.getLogger('').addHandler(console
)
30 def get_new_episodes(self
):
31 tree
= feedparser
.parse( self
.url
)
32 self
.ping_timestamp
= datetime
.now()
33 watermark
= self
.watermark
34 self
.title
= tree
['feed']['title']
35 for e
in tree
.entries
:
36 timestamp
= datetime
.fromtimestamp(
37 time
.mktime(e
.modified_parsed
))
38 if timestamp
> self
.watermark
and 'enclosures' in e
:
39 for encl
in e
.enclosures
:
41 if timestamp
> watermark
:
43 self
.watermark
= watermark
45 def try_load(fname
, default
):
47 with
open(fname
,"r") as f
:
52 def url_basename(url
):
53 return os
.path
.basename( urlparse
.urlparse(url
)[2] )
55 url_files
= try_load("files.db", {})
58 files
.add(url_files
[url
])
60 def make_local_path(url
):
61 if url
in url_files
: return url_files
[url
]
62 desired
= os
.path
.join( conf
.media_dir
, url_basename(url
) )
63 desired_r
,desired_e
= os
.path
.splitext(desired
)
65 while desired
in files
:
67 desired
= desired_r
+ str(attempt
) + desired_e
69 url_files
[url
] = desired
71 save(url_files
, "files.db")
74 def make_tmp_path(path
):
78 dir = os
.path
.dirname(fname
)
79 if not os
.path
.isdir(dir):
81 cmd
= "wget -c '%s' -O %s" % (url
, fname
)
82 logging
.info("Exec: %s", cmd
)
86 return Popen(cmd
, shell
=True).wait() == 0
88 def download_episode(url
):
89 local_path
= make_local_path(url
)
90 tmp_file
= make_tmp_path(local_path
)
91 if wget(url
, tmp_file
):
92 logging
.info("Renaming %s to %s", tmp_file
, local_path
)
93 os
.rename(tmp_file
, local_path
)
97 dl_queue
= try_load("dl-queue.db", [])
99 for feed
in catcher
.Feed
.objects
.all():
104 for feed
in get_updated_feeds():
105 logging
.info("Checking feed: %s", feed
)
106 # FIXME for now limit to 10 but
107 # (a) have to make sure it's sorted by age
108 # (b) should be configurable and overridable
110 for url
in feed
.get_new_episodes():
111 if url
not in url_files
and url
not in dl_queue
:
112 logging
.info("Queueing for download: %s", url
)
115 logging
.debug("Ignoring familiar url: %s (%s)", url
, feed
)
119 feed
.update_timestamp()
121 save(feeds
, "feeds.db")
122 save(dl_queue
, "dl-queue.db")
124 # traverse a copy, we seem to miss episodes otherwise
125 for url
in [x
for x
in dl_queue
]:
126 if download_episode(url
):
128 save(dl_queue
, "dl-queue.db")