1 from __future__
import with_statement
2 from contextlib
import contextmanager
4 from django
.db
import models
10 from datetime
import datetime
, timedelta
12 import urllib2
, urlparse
13 from email
.utils
import parsedate
16 # FIXME this deserves to go to util
19 connection
= urllib2
.urlopen(url
)
23 def url_basename(url
):
24 return os
.path
.basename( urlparse
.urlparse(url
)[2] )
26 class Feed(models
.Model
):
27 url
= models
.CharField(maxlength
=1024,unique
=True)
28 timestamp
= models
.DateTimeField(blank
=True,null
=True)
29 ping_timestamp
= models
.DateTimeField()
30 title
= models
.CharField(maxlength
=1024,blank
=True)
33 return self
.title
if self
.title
!= '' else self
.url
35 def refresh(self
, familiar
):
36 if not self
.may_ping():
38 logging
.debug("Refreshing %s", self
)
40 with
url_open(self
.url
) as conn
:
41 self
.ping_timestamp
= datetime
.now()
42 remote_timestamp
= self
.assert_updated(conn
)
43 logging
.debug("Feed '%s' might have changed, parsing", self
)
44 tree
= feedparser
.parse(conn
)
46 if self
.title
!= tree
['feed']['title']:
47 logging
.debug("Feed title has changed from '%s' to '%s'",
48 self
.title
, tree
['feed']['title'])
49 self
.title
= tree
['feed']['title']
52 for e
in tree
.entries
:
53 timestamp
= datetime
.fromtimestamp(
54 time
.mktime(e
.modified_parsed
))
55 # FIXME restore after watermark is implemented
56 #if timestamp > self.watermark() and 'enclosures' in e:
58 for encl
in e
.enclosures
:
60 if Episode
.objects
.filter(url
=url
).count() > 0:
61 logging
.debug('Episode %s already injected', url
)
63 # FIXME this is temporary to import the old database
65 logging
.debug('Injecting episode %s', url
)
66 new_episode
= Episode(
69 local_path
= Episode
.make_local_path(url
),
70 timestamp
= timestamp
,
71 downloaded
= timestamp
)
73 # FIXME uncomment the following after parsed feed processing
75 # self.timestamp = remote_timestamp
76 except Feed
.NotChanged
:
77 logging
.debug("Feed '%s' haven't changed, skipping", self
)
79 logging
.error('%s while trying to refresh %s', sys
.exc_value
, self
)
81 # always persist: at least to save ping_timestamp
85 "This will be raised if feed havent updated"
88 def assert_updated(self
, conn
):
89 remote_timestamp
= None
90 if 'Last-Modified' in conn
.info():
91 remote_timestamp
= datetime
.fromtimestamp(
93 parsedate( conn
.info()['Last-Modified'] )))
94 logging
.debug('comparing local(%s) against remote(%s) timestamps',
97 if '--force-dl' not in sys
.argv \
98 and self
.timestamp
is not None \
99 and remote_timestamp
is not None \
100 and remote_timestamp
> conf
.reasonable_timestamp \
101 and remote_timestamp
<= self
.timestamp
:
102 raise Feed
.NotChanged()
103 return remote_timestamp
107 if '--force-dl' in sys
.argv
:
108 logging
.debug('--force-dl detected: ignoring ping period')
111 may
= (datetime
.now() - self
.ping_timestamp
) \
112 > conf
.min_ping_period
113 logging
.debug('May%s ping %s', '' if may
else ' not', self
)
118 class Episode(models
.Model
):
119 feed
= models
.ForeignKey(Feed
,edit_inline
=models
.TABULAR
)
120 url
= models
.CharField(maxlength
=1024,unique
=True,core
=True)
121 local_path
= models
.CharField(maxlength
=1024,unique
=True)
122 timestamp
= models
.DateTimeField()
123 downloaded
= models
.DateTimeField(null
=True,default
=None)
126 def make_local_path(cls
,url
):
127 wanted
= url_basename(url
)
128 wanted_base
, wanted_ext
= os
.path
.splitext(wanted
)
129 # these are but *potential* clashes
130 clashes
= [e
.local_path
for e
in
131 Episode
.objects
.filter(local_path__startswith
= wanted_base
,
132 local_path__endswith
= wanted_ext
)]
134 while wanted
in clashes
:
135 logging
.debug('local_path %s already exists, inventing new',wanted
)
136 wanted
= wanted_base
+ str(mod
) + wanted_ext