Properly update existing episodes (bug 211)
[gpodder.git] / src / gpodder / dbsqlite.py
blobdaf97187c14ae77d4fcc3b33e7e2b9ec615da613
1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2008 Thomas Perl and the gPodder Team
6 # gPodder is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # gPodder is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # dbsqlite.py -- SQLite interface
20 # Justin Forest <justin.forest@gmail.com> 2008-06-13
22 have_sqlite = True
24 try:
25 from sqlite3 import dbapi2 as sqlite
26 except ImportError:
27 try:
28 from pysqlite2 import dbapi2 as sqlite
29 except ImportError:
30 have_sqlite = False
32 # TODO: show a message box
33 if not have_sqlite:
34 print "Please install pysqlite2 or upgrade to Python 2.5."
35 import sys
36 sys.exit()
38 from gpodder.liblogger import log
39 from email.Utils import mktime_tz
40 from email.Utils import parsedate_tz
41 from email.Utils import formatdate
42 from threading import RLock
43 import string
45 class Storage(object):
46 (STATE_NORMAL, STATE_DOWNLOADED, STATE_DELETED) = range(3)
48 lock = None
50 def __init__(self):
51 self.settings = {}
52 self.channel_map = {}
53 self._db = None
54 self.lock = RLock()
56 def setup(self, settings):
57 self.settings = settings
58 self.__check_schema()
60 def purge(self, max_episodes, channel_id=None):
61 """
62 Deletes old episodes. Should be called
63 before adding new episodes to a channel.
64 """
65 cur = self.cursor(lock=True)
67 if channel_id is None:
68 cur.execute("SELECT channel_id, COUNT(*) AS count FROM episodes GROUP BY channel_id HAVING count > ?", (max_episodes, ))
69 else:
70 cur.execute("SELECT channel_id, COUNT(*) AS count FROM episodes WHERE channel_id = ? GROUP BY channel_id HAVING count > ?", (channel_id, max_episodes, ))
72 for row in cur.fetchall():
73 sql = """
74 DELETE FROM episodes
75 WHERE channel_id = %d
76 AND state <> %d
77 AND id NOT IN
78 (SELECT id FROM episodes WHERE channel_id = %d
79 ORDER BY pubDate DESC LIMIT %d)""" % (row[0], self.STATE_DOWNLOADED, row[0], max_episodes)
80 # log('SQL: %s', sql)
81 cur.execute(sql)
83 cur.close()
84 self.lock.release()
86 @property
87 def db(self):
88 if self._db is None:
89 self._db = sqlite.connect(self.settings['database'], check_same_thread=False)
90 self._db.create_collation("unicode", lambda a, b: cmp(a.lower().replace('the ', ''), b.lower().replace('the ', '')))
91 log('SQLite connected', sender=self)
92 return self._db
94 def cursor(self, lock=False):
95 if lock:
96 self.lock.acquire()
97 return self.db.cursor()
99 def commit(self):
100 self.lock.acquire()
101 try:
102 self.db.commit()
103 except ProgrammingError, e:
104 log('Error commiting changes: %s', e, sender=self, traceback=True)
105 self.lock.release()
107 def __check_schema(self):
109 Creates all necessary tables and indexes that don't exist.
111 log('Setting up SQLite database', sender=self)
113 cur = self.cursor(lock=True)
115 cur.execute("""CREATE TABLE IF NOT EXISTS channels (
116 id INTEGER PRIMARY KEY,
117 url TEXT,
118 title TEXT,
119 override_title TEXT,
120 link TEXT,
121 description TEXT,
122 image TEXT,
123 pubDate INTEGER,
124 sync_to_devices INTEGER,
125 device_playlist_name TEXT,
126 username TEXT,
127 password TEXT,
128 last_modified TEXT,
129 etag TEXT,
130 deleted INTEGER
131 )""")
132 cur.execute("""CREATE UNIQUE INDEX IF NOT EXISTS idx_url ON channels (url)""")
133 cur.execute("""CREATE INDEX IF NOT EXISTS idx_sync_to_devices ON channels (sync_to_devices)""")
134 cur.execute("""CREATE INDEX IF NOT EXISTS idx_title ON channels (title)""")
135 cur.execute("""CREATE INDEX IF NOT EXISTS idx_deleted ON channels (deleted)""")
137 cur.execute("""
138 CREATE TABLE IF NOT EXISTS episodes (
139 id INTEGER PRIMARY KEY,
140 channel_id INTEGER,
141 url TEXT,
142 title TEXT,
143 length INTEGER,
144 mimetype TEXT,
145 guid TEXT,
146 description TEXT,
147 link TEXT,
148 pubDate INTEGER,
149 state INTEGER,
150 played INTEGER,
151 locked INTEGER
153 """)
154 cur.execute("""CREATE UNIQUE INDEX IF NOT EXISTS idx_guid ON episodes (guid)""")
155 cur.execute("""CREATE INDEX IF NOT EXISTS idx_channel_id ON episodes (channel_id)""")
156 cur.execute("""CREATE INDEX IF NOT EXISTS idx_pubDate ON episodes (pubDate)""")
157 cur.execute("""CREATE INDEX IF NOT EXISTS idx_state ON episodes (state)""")
158 cur.execute("""CREATE INDEX IF NOT EXISTS idx_played ON episodes (played)""")
159 cur.execute("""CREATE INDEX IF NOT EXISTS idx_locked ON episodes (locked)""")
161 cur.execute("""CREATE TEMPORARY VIEW episodes_downloaded AS SELECT channel_id, COUNT(*) AS count FROM episodes WHERE state = 1 GROUP BY channel_id""")
162 cur.execute("""CREATE TEMPORARY VIEW episodes_new AS SELECT channel_id, COUNT(*) AS count FROM episodes WHERE state = 0 AND played = 0 GROUP BY channel_id""")
163 cur.execute("""CREATE TEMPORARY VIEW episodes_unplayed AS SELECT channel_id, COUNT(*) AS count FROM episodes WHERE played = 0 AND state = %d GROUP BY channel_id""" % self.STATE_DOWNLOADED)
165 # Make sure deleted episodes are played, to simplify querying statistics.
166 cur.execute("UPDATE episodes SET played = 1 WHERE state = ?", (self.STATE_DELETED, ))
168 cur.close()
169 self.lock.release()
171 def get_channel_stat(self, url_or_id, state=None, is_played=None, is_locked=None):
172 where, params = ((),())
174 if state is not None:
175 where += ("state = ?", )
176 params += (state, )
177 if is_played is not None:
178 where += ("played = ?", )
179 params += (is_played, )
180 if is_locked is not None:
181 where += ("locked = ?", )
182 params += (is_locked, )
183 if isinstance(url_or_id, int):
184 where += ("channel_id = ?", )
185 params += (url_or_id, )
186 else:
187 where += ("channel_id IN (SELECT id FROM channels WHERE url = ?)", )
188 params += (url_or_id, )
190 if len(where):
191 return self.__get__("SELECT COUNT(*) FROM episodes WHERE %s" % (' AND '.join(where)), params)
192 else:
193 return 0
195 def load_channels(self, factory=None, url=None):
197 Returns channel descriptions as a list of dictionaries or objects,
198 returned by the factory() function, which receives the dictionary
199 as the only argument.
202 cur = self.cursor(lock=True)
203 cur.execute("""
204 SELECT
206 url,
207 title,
208 override_title,
209 link,
210 description,
211 image,
212 pubDate,
213 sync_to_devices,
214 device_playlist_name,
215 username,
216 password,
217 last_modified,
218 etag
219 FROM
220 channels
221 WHERE
222 (deleted IS NULL OR deleted = 0)
223 ORDER BY
224 title COLLATE unicode
225 """)
227 stats = self.stats()
229 result = []
230 for row in cur.fetchall():
231 channel = {
232 'id': row[0],
233 'url': row[1],
234 'title': row[2],
235 'override_title': row[3],
236 'link': row[4],
237 'description': row[5],
238 'image': row[6],
239 'pubDate': self.__formatdate__(row[7]),
240 'sync_to_devices': row[8],
241 'device_playlist_name': row[9],
242 'username': row[10],
243 'password': row[11],
244 'last_modified': row[12],
245 'etag': row[13],
248 if row[0] in stats:
249 channel['count_downloaded'] = stats[row[0]][0]
250 channel['count_new'] = stats[row[0]][1]
251 channel['count_unplayed'] = stats[row[0]][2]
253 if url is None:
254 # Maintain url/id relation for faster updates (otherwise
255 # we'd need to issue an extra query to find the channel id).
256 self.channel_map[channel['url']] = channel['id']
258 if url is None or url == channel['url']:
259 if factory is None:
260 result.append(channel)
261 else:
262 result.append(factory(channel))
264 cur.close()
265 self.lock.release()
267 if url is None:
268 log('Channel list read, %d entries.', len(result), sender=self)
269 else:
270 log('Channel %s read from db', url, sender=self)
272 return result
274 def stats(self):
275 cur = self.cursor(lock=True)
276 cur.execute("""
277 SELECT c.id, d.count, n.count, u.count
278 FROM channels c
279 LEFT JOIN episodes_downloaded d ON d.channel_id = c.id
280 LEFT JOIN episodes_new n ON n.channel_id = c.id
281 LEFT JOIN episodes_unplayed u ON u.channel_id = c.id
282 WHERE c.deleted = 0
283 """)
285 data = {}
287 for row in cur.fetchall():
288 data[row[0]] = (row[1] or 0, row[2] or 0, row[3] or 0)
290 cur.close()
291 self.lock.release()
293 return data
295 def save_channel(self, c, bulk=False):
296 if c.id is None:
297 c.id = self.find_channel_id(c.url)
299 cur = self.cursor(lock=True)
301 if c.id is None:
302 cur.execute("INSERT INTO channels (url, title, override_title, link, description, image, pubDate, sync_to_devices, device_playlist_name, username, password, last_modified, etag) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (c.url, c.title, c.override_title, c.link, c.description, c.image, self.__mktime__(c.pubDate), c.sync_to_devices, c.device_playlist_name, c.username, c.password, c.last_modified, c.etag, ))
303 self.channel_map[c.url] = cur.lastrowid
304 log('Added channel %s[%d]', c.url, self.channel_map[c.url], sender=self)
305 else:
306 cur.execute("UPDATE channels SET url = ?, title = ?, override_title = ?, link = ?, description = ?, image = ?, pubDate = ?, sync_to_devices = ?, device_playlist_name = ?, username = ?, password = ?, last_modified = ?, etag = ?, deleted = 0 WHERE id = ?", (c.url, c.title, c.override_title, c.link, c.description, c.image, self.__mktime__(c.pubDate), c.sync_to_devices, c.device_playlist_name, c.username, c.password, c.last_modified, c.etag, c.id, ))
308 cur.close()
309 self.lock.release()
311 def delete_channel(self, channel, purge=False):
312 if channel.id is None:
313 channel.id = self.find_channel_id(channel.url)
315 cur = self.cursor(lock=True)
316 log('Deleting channel %d', channel.id, sender=self)
318 if purge:
319 cur.execute("DELETE FROM channels WHERE id = ?", (channel.id, ))
320 cur.execute("DELETE FROM episodes WHERE channel_id = ?", (channel.id, ))
321 if channel.url in self.channel_map:
322 del self.channel_map[channel.url]
323 else:
324 cur.execute("UPDATE channels SET deleted = 1 WHERE id = ?", (channel.id, ))
325 cur.execute("DELETE FROM episodes WHERE channel_id = ? AND state <> ?", (channel.id, self.STATE_DELETED))
327 cur.close()
328 self.lock.release()
330 def __read_episodes(self, factory=None, where=None, params=None, commit=True):
331 sql = "SELECT url, title, length, mimetype, guid, description, link, pubDate, state, played, locked, id FROM episodes"
333 if where:
334 sql = "%s %s" % (sql, where)
336 if params is None:
337 params = ()
339 cur = self.cursor(lock=True)
340 cur.execute(sql, params)
342 result = []
343 for row in cur.fetchall():
344 episode = {
345 'url': row[0],
346 'title': row[1],
347 'length': row[2],
348 'mimetype': row[3],
349 'guid': row[4],
350 'description': row[5],
351 'link': row[6],
352 'pubDate': row[7],
353 'state': row[8],
354 'is_played': row[9],
355 'is_locked': row[10],
356 'id': row[11],
358 if episode['state'] is None:
359 episode['state'] = self.STATE_NORMAL
360 if factory is None:
361 result.append(episode)
362 else:
363 result.append(factory(episode))
365 cur.close()
366 self.lock.release()
367 return result
369 def load_episodes(self, channel, factory=None, limit=1000, state=None):
370 if channel.id is None:
371 channel.id = self.find_channel_id(channel.url)
373 if state is None:
374 return self.__read_episodes(factory = factory, where = """
375 WHERE channel_id = ? AND state = ? OR id IN
376 (SELECT id FROM episodes WHERE channel_id = ?
377 ORDER BY pubDate DESC LIMIT ?)
378 ORDER BY pubDate DESC
379 """, params = (channel.id, self.STATE_DOWNLOADED, channel.id, limit, ))
380 else:
381 return self.__read_episodes(factory = factory, where = " WHERE channel_id = ? AND state = ? ORDER BY pubDate DESC LIMIT ?", params = (channel.id, state, limit, ))
383 def load_episode(self, url, factory=None):
384 list = self.__read_episodes(factory = factory, where = " WHERE url = ?", params = (url, ))
385 if len(list):
386 return list[0]
388 def save_episode(self, e, bulk=False):
389 if not e.guid:
390 log('Refusing to save an episode without guid: %s', e)
391 return
393 self.lock.acquire()
395 try:
396 cur = self.cursor()
397 channel_id = self.find_channel_id(e.channel.url)
399 if e.id is None:
400 e.id = self.__get__("SELECT id FROM episodes WHERE guid = ?", (e.guid, ))
402 if e.id is None:
403 log('Episode added: %s', e.title)
404 cur.execute("INSERT INTO episodes (channel_id, url, title, length, mimetype, guid, description, link, pubDate, state, played, locked) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (channel_id, e.url, e.title, e.length, e.mimetype, e.guid, e.description, e.link, self.__mktime__(e.pubDate), e.state, e.is_played, e.is_locked, ))
405 e.id = cur.lastrowid
406 else:
407 log('Episode updated: %s', e.title)
408 cur.execute("UPDATE episodes SET title = ?, length = ?, mimetype = ?, description = ?, link = ?, pubDate = ?, state = ?, played = ?, locked = ? WHERE id = ?", (e.title, e.length, e.mimetype, e.description, e.link, self.__mktime__(e.pubDate), e.state, e.is_played, e.is_locked, e.id, ))
409 except Exception, e:
410 log('save_episode() failed: %s', e, sender=self)
412 cur.close()
413 self.lock.release()
415 def mark_episode(self, url, state=None, is_played=None, is_locked=None, toggle=False):
416 cur = self.cursor(lock=True)
417 cur.execute("SELECT state, played, locked FROM episodes WHERE url = ?", (url, ))
419 try:
420 ( cur_state, cur_played, cur_locked ) = cur.fetchone()
421 except:
422 # This only happens when we try to mark an unknown episode,
423 # which is typical for database upgrade, so we just ignore it.
424 cur.close()
425 self.lock.release()
426 return
428 if toggle:
429 if is_played:
430 cur_played = not cur_played
431 if is_locked:
432 cur_locked = not cur_locked
433 else:
434 if state is not None:
435 cur_state = state
436 if is_played is not None:
437 cur_played = is_played
438 if is_locked is not None:
439 cur_locked = is_locked
441 cur.close()
443 cur = self.cursor()
444 cur.execute("UPDATE episodes SET state = ?, played = ?, locked = ? WHERE url = ?", (cur_state, cur_played, cur_locked, url, ))
445 cur.close()
447 self.lock.release()
449 def __get__(self, sql, params=None):
451 Returns the first cell of a query result, useful for COUNT()s.
453 cur = self.cursor(lock=True)
455 if params is None:
456 cur.execute(sql)
457 else:
458 cur.execute(sql, params)
460 row = cur.fetchone()
461 cur.close()
462 self.lock.release()
464 if row is None:
465 return None
466 else:
467 return row[0]
469 def __mktime__(self, date):
470 if isinstance(date, float) or isinstance(date, int):
471 return date
472 if date is None or '' == date:
473 return None
474 try:
475 return mktime_tz(parsedate_tz(date))
476 except TypeError:
477 log('Could not convert "%s" to a unix timestamp.', date)
478 return None
480 def __formatdate__(self, date):
481 try:
482 return formatdate(date, localtime=1)
483 except TypeError:
484 log('Could not convert "%s" to a string date.', date)
485 return None
487 def find_channel_id(self, url):
489 Looks up the channel id in the map (which lists all undeleted
490 channels), then tries to look it up in the database, including
491 deleted channels.
493 if url in self.channel_map.keys():
494 return self.channel_map[url]
495 else:
496 return self.__get__("SELECT id FROM channels WHERE url = ?", (url, ))
498 def force_last_new(self, channel):
499 old = self.__get__("""SELECT COUNT(*) FROM episodes WHERE channel_id = ?
500 AND state IN (?, ?)""", (channel.id, self.STATE_DOWNLOADED,
501 self.STATE_DELETED))
502 log('old episodes in (%d)%s: %d', channel.id, channel.url, old)
504 cur = self.cursor(lock=True)
506 if old > 0:
507 cur.execute("""
508 UPDATE episodes SET played = 1 WHERE channel_id = ?
509 AND played = 0 AND pubDate < (SELECT MAX(pubDate)
510 FROM episodes WHERE channel_id = ? AND state IN (?, ?))""",
511 (channel.id, channel.id, self.STATE_DOWNLOADED,
512 self.STATE_DELETED, ))
513 else:
514 cur.execute("""
515 UPDATE episodes SET played = 1 WHERE channel_id = ?
516 AND pubDate <> (SELECT MAX(pubDate) FROM episodes
517 WHERE channel_id = ?)""", (channel.id, channel.id, ))
519 cur.close()
520 self.lock.release()
522 db = Storage()