temporary solution to import episodes downloaded by older version
[riffle.git] / riffle / catcher / models.py
blobe671ad5c8bfe75d7d378929b1016d46bd10435ea
1 from __future__ import with_statement
2 from contextlib import contextmanager
4 from django.db import models
5 import conf
7 import logging
8 import sys
9 import os
10 from datetime import datetime, timedelta
11 import time
12 import urllib2, urlparse
13 from email.utils import parsedate
14 import feedparser
16 # FIXME this deserves to go to util
17 @contextmanager
18 def url_open(url):
19 connection = urllib2.urlopen(url)
20 yield connection
21 connection.close()
23 def url_basename(url):
24 return os.path.basename( urlparse.urlparse(url)[2] )
26 class Feed(models.Model):
27 url = models.CharField(maxlength=1024,unique=True)
28 timestamp = models.DateTimeField(blank=True,null=True)
29 ping_timestamp = models.DateTimeField()
30 title = models.CharField(maxlength=1024,blank=True)
32 def __str__(self):
33 return self.title if self.title != '' else self.url
35 def refresh(self, familiar):
36 if not self.may_ping():
37 return
38 logging.debug("Refreshing %s", self)
39 try:
40 with url_open(self.url) as conn:
41 self.ping_timestamp = datetime.now()
42 remote_timestamp = self.assert_updated(conn)
43 logging.debug("Feed '%s' might have changed, parsing", self)
44 tree = feedparser.parse(conn)
45 try:
46 if self.title != tree['feed']['title']:
47 logging.debug("Feed title has changed from '%s' to '%s'",
48 self.title, tree['feed']['title'])
49 self.title = tree['feed']['title']
50 except:
51 pass
52 for e in tree.entries:
53 timestamp = datetime.fromtimestamp(
54 time.mktime(e.modified_parsed))
55 # FIXME restore after watermark is implemented
56 #if timestamp > self.watermark() and 'enclosures' in e:
57 if 'enclosures' in e:
58 for encl in e.enclosures:
59 url = encl.href
60 if Episode.objects.filter(url=url).count() > 0:
61 logging.debug('Episode %s already injected', url)
62 continue
63 # FIXME this is temporary to import the old database
64 if url in familiar:
65 logging.debug('Injecting episode %s', url)
66 new_episode = Episode(
67 feed = self,
68 url = url,
69 local_path = Episode.make_local_path(url),
70 timestamp = timestamp,
71 downloaded = timestamp)
72 new_episode.save()
73 # FIXME uncomment the following after parsed feed processing
74 # is implemented
75 # self.timestamp = remote_timestamp
76 except Feed.NotChanged:
77 logging.debug("Feed '%s' haven't changed, skipping", self)
78 except:
79 logging.error('%s while trying to refresh %s', sys.exc_value, self)
80 finally:
81 # always persist: at least to save ping_timestamp
82 self.save()
84 class NotChanged:
85 "This will be raised if feed havent updated"
86 pass
88 def assert_updated(self, conn):
89 remote_timestamp = None
90 if 'Last-Modified' in conn.info():
91 remote_timestamp = datetime.fromtimestamp(
92 time.mktime(
93 parsedate( conn.info()['Last-Modified'] )))
94 logging.debug('comparing local(%s) against remote(%s) timestamps',
95 self.timestamp,
96 remote_timestamp)
97 if '--force-dl' not in sys.argv \
98 and self.timestamp is not None \
99 and remote_timestamp is not None \
100 and remote_timestamp > conf.reasonable_timestamp \
101 and remote_timestamp <= self.timestamp:
102 raise Feed.NotChanged()
103 return remote_timestamp
106 def may_ping(self):
107 if '--force-dl' in sys.argv:
108 logging.debug('--force-dl detected: ignoring ping period')
109 return True
110 else:
111 may = (datetime.now() - self.ping_timestamp) \
112 > conf.min_ping_period
113 logging.debug('May%s ping %s', '' if may else ' not', self)
114 return may
116 class Admin: pass
118 class Episode(models.Model):
119 feed = models.ForeignKey(Feed,edit_inline=models.TABULAR)
120 url = models.CharField(maxlength=1024,unique=True,core=True)
121 local_path = models.CharField(maxlength=1024,unique=True)
122 timestamp = models.DateTimeField()
123 downloaded = models.DateTimeField(null=True,default=None)
125 @classmethod
126 def make_local_path(cls,url):
127 wanted = url_basename(url)
128 wanted_base, wanted_ext = os.path.splitext(wanted)
129 # these are but *potential* clashes
130 clashes = [e.local_path for e in
131 Episode.objects.filter(local_path__startswith = wanted_base,
132 local_path__endswith = wanted_ext)]
133 mod = 1
134 while wanted in clashes:
135 logging.debug('local_path %s already exists, inventing new',wanted)
136 wanted = wanted_base + str(mod) + wanted_ext
137 mod += 1
138 return wanted