Calculate episode stats with a single query
[gpodder.git] / src / gpodder / dbsqlite.py
blob18b5eda7519c59b40ce722b095cf28d38a4ad399
1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2010 Thomas Perl and the gPodder Team
6 # gPodder is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # gPodder is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 # dbsqlite.py -- SQLite persistence layer for gPodder
23 # 2008-06-13 Justin Forest <justin.forest@gmail.com>
24 # 2010-04-24 Thomas Perl <thp@gpodder.org>
27 import gpodder
28 _ = gpodder.gettext
30 import sys
32 have_sqlite = True
34 try:
35 from sqlite3 import dbapi2 as sqlite
36 from sqlite3 import OperationalError
37 except ImportError:
38 try:
39 from pysqlite2 import dbapi2 as sqlite
40 from pysqlite2.dbapi2 import OperationalError
41 except ImportError:
42 have_sqlite = False
44 # TODO: show a message box
45 if not have_sqlite:
46 print >>sys.stderr, 'Please install pysqlite2 or Python 2.5.'
47 sys.exit(1)
49 from gpodder.liblogger import log
51 import string
52 import threading
53 import re
55 class Database(object):
56 UNICODE_TRANSLATE = {ord(u'ö'): u'o', ord(u'ä'): u'a', ord(u'ü'): u'u'}
58 # Column names and types for the channels table
59 TABLE_CHANNELS = "channels"
60 SCHEMA_CHANNELS = (
61 ('id', 'INTEGER PRIMARY KEY'),
62 ('url', 'TEXT'), # Feed (RSS/Atom) URL of the podcast
63 ('title', 'TEXT'), # Podcast name
64 ('override_title', 'TEXT'), # Podcast name if user-defined
65 ('link', 'TEXT'), # Website URL for the podcast
66 ('description', 'TEXT'), # Description of podcast contents
67 ('image', 'TEXT'), # URL to cover art for the image
68 ('pubDate', 'INTEGER'), # Date and time of last feed publication
69 ('sync_to_devices', 'INTEGER'), # 1 if syncing to devices is enabled, 0 otherwise
70 ('device_playlist_name', 'TEXT'), # Name of the playlist on the device for syncing
71 ('username', 'TEXT'), # Username for HTTP authentication (feed update + downloads)
72 ('password', 'TEXT'), # Password for HTTP authentication (feed update + downloads)
73 ('last_modified', 'TEXT'), # Last-modified HTTP header from last update
74 ('etag', 'TEXT'), # ETag HTTP header from last update
75 ('channel_is_locked', 'INTEGER'), # 1 if deletion is prevented, 0 otherwise
76 ('foldername', 'TEXT'), # Folder name (basename) to put downloaded episodes
77 ('auto_foldername', 'INTEGER'), # 1 if the foldername was auto-generated, 0 otherwise
78 ('release_expected', 'INTEGER'), # Statistic value for when a new release is expected
79 ('release_deviation', 'INTEGER'), # Deviation of the release cycle differences
80 ('updated_timestamp', 'INTEGER'), # Timestamp of the last feed update
82 INDEX_CHANNELS = (
83 ('foldername', 'UNIQUE INDEX'),
84 ('url', 'UNIQUE INDEX'),
85 ('sync_to_devices', 'INDEX'),
86 ('title', 'INDEX'),
89 # Column names and types for the episodes table
90 TABLE_EPISODES = 'episodes'
91 SCHEMA_EPISODES = (
92 ('id', 'INTEGER PRIMARY KEY'),
93 ('channel_id', 'INTEGER'), # Foreign key: ID of the podcast of this episode
94 ('url', 'TEXT'), # Download URL of the media file
95 ('title', 'TEXT'), # Episode title
96 ('length', 'INTEGER'), # File length of the media file in bytes
97 ('mimetype', 'TEXT'), # Mime type of the media file
98 ('guid', 'TEXT'), # GUID of the episode item
99 ('description', 'TEXT'), # Longer text description
100 ('link', 'TEXT'), # Website URL for the episode
101 ('pubDate', 'INTEGER'), # Date and time of publication
102 ('state', 'INTEGER'), # Download state (see gpodder.STATE_* constants)
103 ('played', 'INTEGER'), # 1 if it's new or played, 0 otherwise
104 ('locked', 'INTEGER'), # 1 if deletion is prevented, 0 otherwise
105 ('filename', 'TEXT'), # Filename for the downloaded file (or NULL)
106 ('auto_filename', 'INTEGER'), # 1 if the filename was auto-generated, 0 otherwise
107 ('total_time', 'INTEGER'), # Length in seconds
108 ('current_position', 'INTEGER'), # Current playback position
109 ('current_position_updated', 'INTEGER'), # Set to NOW when updating current_position
111 INDEX_EPISODES = (
112 ('guid', 'UNIQUE INDEX'),
113 ('filename', 'UNIQUE INDEX'),
114 ('channel_id', 'INDEX'),
115 ('pubDate', 'INDEX'),
116 ('state', 'INDEX'),
117 ('played', 'INDEX'),
118 ('locked', 'INDEX'),
121 def __init__(self, filename):
122 self.database_file = filename
123 self._db = None
124 self.lock = threading.RLock()
126 def close(self):
127 self.commit()
129 cur = self.cursor(lock=True)
130 log('Optimizing database for faster startup.', sender=self)
131 cur.execute("VACUUM")
132 cur.close()
133 self.lock.release()
135 self._db.close()
136 self._db = None
138 def log(self, message, *args, **kwargs):
139 try:
140 message = message % args
141 log('%s', message, sender=self)
142 except TypeError, e:
143 log('Exception in log(): %s: %s', e, message, sender=self)
145 def purge(self, max_episodes, channel_id):
147 Deletes old episodes. Should be called
148 before adding new episodes to a channel.
150 cur = self.cursor(lock=True)
152 self.log("purge(%s)", channel_id)
153 sql = """
154 DELETE FROM episodes
155 WHERE channel_id = ?
156 AND state <> ?
157 AND id NOT IN
158 (SELECT id FROM episodes WHERE channel_id = ?
159 ORDER BY pubDate DESC LIMIT ?)"""
160 cur.execute(sql, (channel_id, gpodder.STATE_DOWNLOADED, channel_id, max_episodes))
162 cur.close()
163 self.lock.release()
165 def db_sort_cmp(self, a, b):
167 Compare two strings for sorting, including removing
168 a possible "The " prefix and converting umlauts to
169 normal characters so they can be sorted correctly.
170 (i.e. "Ö1" should not appear at the end of the list)
172 try:
173 a = a.decode('utf-8', 'ignore').lower()
174 a = re.sub('^the ', '', a)
175 a = a.translate(self.UNICODE_TRANSLATE)
176 b = b.decode('utf-8', 'ignore').lower()
177 b = re.sub('^the ', '', b)
178 b = b.translate(self.UNICODE_TRANSLATE)
179 return cmp(a, b)
180 except:
181 log('Error while comparing "%s" and "%s"', a, b, sender=self, traceback=True)
182 a = re.sub('^the ', '', a.lower())
183 b = re.sub('^the ', '', b.lower())
184 return cmp(a, b)
186 @property
187 def db(self):
188 if self._db is None:
189 self._db = sqlite.connect(self.database_file, check_same_thread=False)
190 self._db.text_factory = str
191 self._db.create_collation("UNICODE", self.db_sort_cmp)
192 self.log('Connected')
193 self.__check_schema()
194 return self._db
196 def cursor(self, lock=False):
197 if lock:
198 self.lock.acquire()
199 return self.db.cursor()
201 def commit(self):
202 self.lock.acquire()
203 try:
204 self.log("COMMIT")
205 self.db.commit()
206 except ProgrammingError, e:
207 log('Error commiting changes: %s', e, sender=self, traceback=True)
208 self.lock.release()
210 def _remove_deleted_channels(self):
211 """Remove deleted podcasts and episodes (upgrade from gPodder <= 2.5)
213 If the database has been created with gPodder <= 2.5, it could
214 be possible that podcasts have been deleted where metadata and
215 episodes are kept.
217 We don't support this kind of "information keeping" anymore, so
218 simply go ahead and remove all podcast marked as "deleted" and
219 their corresponding episodes to slim down the database.
221 cur = self.cursor(lock=True)
222 cur.execute("PRAGMA table_info(%s)" % self.TABLE_CHANNELS)
223 available = cur.fetchall()
224 if available:
225 ID, NAME, TYPE, NULL, DEFAULT = range(5)
226 existing = set(column[NAME] for column in available)
228 if 'deleted' in existing:
229 cur.execute('SELECT id FROM %s WHERE deleted = ?' % self.TABLE_CHANNELS, (1,))
230 channel_ids = [id for (id,) in cur]
232 # Remove all deleted channels from the database
233 for id in channel_ids:
234 self.log('Removing deleted channel with ID %d', id)
235 cur.execute('DELETE FROM %s WHERE id = ?' % self.TABLE_CHANNELS, (id,))
236 cur.execute('DELETE FROM %s WHERE channel_id = ?' % self.TABLE_EPISODES, (id,))
237 self.lock.release()
239 def __check_schema(self):
241 Creates all necessary tables and indexes that don't exist.
243 self.log('Setting up tables and views')
245 cur = self.cursor(lock=True)
247 # If a "deleted" column exists in the channel table, remove all
248 # corresponding channels and their episodes and remove it
249 self._remove_deleted_channels()
251 # Create tables and possibly add newly-added columns
252 self.upgrade_table(self.TABLE_CHANNELS, self.SCHEMA_CHANNELS, self.INDEX_CHANNELS)
253 self.upgrade_table(self.TABLE_EPISODES, self.SCHEMA_EPISODES, self.INDEX_EPISODES)
255 # Make sure deleted episodes are played, to simplify querying statistics.
256 try:
257 cur.execute("UPDATE episodes SET played = 1 WHERE state = ?", (gpodder.STATE_DELETED,))
258 except OperationalError:
259 pass
261 cur.close()
262 self.lock.release()
264 def get_channel_count(self, id):
265 """Given a channel ID, returns the statistics for it
267 Returns a tuple (total, deleted, new, downloaded, unplayed)
269 total, deleted, new, downloaded, unplayed = 0, 0, 0, 0, 0
271 cur = self.cursor(lock=True)
272 cur.execute('SELECT COUNT(*), state, played FROM episodes WHERE channel_id = ? GROUP BY state, played', (id,))
273 for count, state, played in cur:
274 total += count
275 if state == gpodder.STATE_DELETED:
276 deleted += count
277 elif state == gpodder.STATE_NORMAL and not played:
278 new += count
279 elif state == gpodder.STATE_DOWNLOADED and not played:
280 downloaded += count
281 unplayed += count
282 elif state == gpodder.STATE_DOWNLOADED:
283 downloaded += count
285 cur.close()
286 self.lock.release()
288 return (total, deleted, new, downloaded, unplayed)
290 def get_total_count(self):
291 """Get statistics for episodes in all channels
293 Returns a tuple (total, deleted, new, downloaded, unplayed)
295 total, deleted, new, downloaded, unplayed = 0, 0, 0, 0, 0
297 cur = self.cursor(lock=True)
298 cur.execute('SELECT COUNT(*), state, played FROM episodes GROUP BY state, played')
299 for count, state, played in cur:
300 total += count
301 if state == gpodder.STATE_DELETED:
302 deleted += count
303 elif state == gpodder.STATE_NORMAL and not played:
304 new += count
305 elif state == gpodder.STATE_DOWNLOADED and not played:
306 downloaded += count
307 unplayed += count
308 elif state == gpodder.STATE_DOWNLOADED:
309 downloaded += count
311 cur.close()
312 self.lock.release()
314 return (total, deleted, new, downloaded, unplayed)
316 def load_channels(self, factory=None, url=None):
318 Returns channel descriptions as a list of dictionaries or objects,
319 returned by the factory() function, which receives the dictionary
320 as the only argument.
323 self.log("load_channels()")
325 cur = self.cursor(lock=True)
326 cur.execute('SELECT * FROM %s ORDER BY title COLLATE UNICODE' % self.TABLE_CHANNELS)
328 result = []
329 keys = list(desc[0] for desc in cur.description)
330 for row in cur:
331 channel = dict(zip(keys, row))
333 if url is None or url == channel['url']:
334 if factory is None:
335 result.append(channel)
336 else:
337 result.append(factory(channel, self))
339 cur.close()
340 self.lock.release()
342 return result
344 def save_channel(self, c):
345 self._save_object(c, self.TABLE_CHANNELS, self.SCHEMA_CHANNELS)
347 def delete_channel(self, channel):
348 assert channel.id is not None
350 cur = self.cursor(lock=True)
351 self.log("delete_channel(%d), %s", channel.id, channel.url)
353 cur.execute("DELETE FROM channels WHERE id = ?", (channel.id, ))
354 cur.execute("DELETE FROM episodes WHERE channel_id = ?", (channel.id, ))
356 cur.close()
357 # Commit changes
358 self.db.commit()
359 self.lock.release()
361 def __read_episodes(self, factory=None, where=None, params=None, commit=True):
362 sql = "SELECT * FROM episodes"
364 if where:
365 sql = "%s %s" % (sql, where)
367 if params is None:
368 params = ()
370 cur = self.cursor(lock=True)
371 cur.execute(sql, params)
373 result = []
374 keys = list(desc[0] for desc in cur.description)
375 for row in cur:
376 episode = dict(zip(keys, row))
377 if episode['state'] is None:
378 episode['state'] = gpodder.STATE_NORMAL
379 if factory is None:
380 result.append(episode)
381 else:
382 result.append(factory(episode, self))
384 cur.close()
385 self.lock.release()
386 return result
388 def load_episodes(self, channel, factory=None, limit=1000, state=None):
389 assert channel.id is not None
391 self.log("load_episodes((%d)%s)", channel.id, channel.url)
393 if state is None:
394 return self.__read_episodes(factory = factory, where = """
395 WHERE channel_id = ? AND state = ? OR id IN
396 (SELECT id FROM episodes WHERE channel_id = ?
397 ORDER BY pubDate DESC LIMIT ?)
398 ORDER BY pubDate DESC
399 """, params = (channel.id, gpodder.STATE_DOWNLOADED, channel.id, limit, ))
400 else:
401 return self.__read_episodes(factory = factory, where = " WHERE channel_id = ? AND state = ? ORDER BY pubDate DESC LIMIT ?", params = (channel.id, state, limit, ))
403 def load_episode(self, url, factory=None):
404 self.log("load_episode(%s)", url)
405 list = self.__read_episodes(factory=factory, where=' WHERE url=? LIMIT ?', params=(url, 1))
406 if list:
407 return list[0]
408 else:
409 return None
411 def save_episode(self, e):
412 assert e.channel_id
414 if not e.guid:
415 self.log('Refusing to save an episode without guid: %s', e)
416 return
418 self._save_object(e, self.TABLE_EPISODES, self.SCHEMA_EPISODES)
420 def _save_object(self, o, table, schema):
421 self.lock.acquire()
422 try:
423 cur = self.cursor()
424 columns = [name for name, typ in schema if name != 'id']
425 values = [getattr(o, name) for name in columns]
427 if o.id is None:
428 qmarks = ', '.join('?'*len(columns))
429 sql = 'INSERT INTO %s (%s) VALUES (%s)' % (table, ', '.join(columns), qmarks)
430 cur.execute(sql, values)
431 o.id = cur.lastrowid
432 else:
433 qmarks = ', '.join('%s = ?' % name for name in columns)
434 values.append(o.id)
435 sql = 'UPDATE %s SET %s WHERE id = ?' % (table, qmarks)
436 cur.execute(sql, values)
437 except Exception, e:
438 log('Cannot save %s to %s: %s', o, table, e, sender=self, traceback=True)
440 cur.close()
441 self.lock.release()
443 def update_episode_state(self, episode):
444 assert episode.id is not None
446 cur = self.cursor(lock=True)
447 cur.execute('UPDATE episodes SET state = ?, played = ?, locked = ? WHERE id = ?', (episode.state, episode.is_played, episode.is_locked, episode.id))
448 cur.close()
449 self.lock.release()
451 def update_channel_lock(self, channel):
452 self.log("update_channel_lock(%s, locked=%s)", channel.url, channel.channel_is_locked)
454 cur = self.cursor(lock=True)
455 cur.execute("UPDATE channels SET channel_is_locked = ? WHERE url = ?", (channel.channel_is_locked, channel.url, ))
456 cur.close()
458 self.lock.release()
460 def __get__(self, sql, params=None):
462 Returns the first cell of a query result, useful for COUNT()s.
464 cur = self.cursor(lock=True)
466 self.log("__get__(): %s", sql)
468 if params is None:
469 cur.execute(sql)
470 else:
471 cur.execute(sql, params)
473 row = cur.fetchone()
474 cur.close()
475 self.lock.release()
477 if row is None:
478 return None
479 else:
480 return row[0]
482 def channel_foldername_exists(self, foldername):
484 Returns True if a foldername for a channel exists.
485 False otherwise.
487 return self.__get__("SELECT id FROM channels WHERE foldername = ?", (foldername,)) is not None
489 def remove_foldername_if_deleted_channel(self, foldername):
490 cur = self.cursor(lock=True)
491 self.log('Setting foldername=NULL for folder "%s"', foldername)
492 cur.execute('UPDATE channels SET foldername=NULL ' + \
493 'WHERE foldername=? AND deleted=1', (foldername,))
494 cur.close()
495 self.lock.release()
497 def episode_filename_exists(self, filename):
499 Returns True if a filename for an episode exists.
500 False otherwise.
502 return self.__get__("SELECT id FROM episodes WHERE filename = ?", (filename,)) is not None
504 def get_last_pubdate(self, channel):
506 Look up the highest "pubDate" value for
507 all episodes of the given podcast.
509 return self.__get__("""
510 SELECT MAX(pubDate) FROM episodes
511 WHERE channel_id = ?
512 """, (channel.id, ))
514 def force_last_new(self, channel):
515 old = self.__get__("""SELECT COUNT(*) FROM episodes WHERE channel_id = ?
516 AND state IN (?, ?)""", (channel.id, gpodder.STATE_DOWNLOADED,
517 gpodder.STATE_DELETED))
519 cur = self.cursor(lock=True)
521 self.log("force_last_new((%d)%s)", channel.id, channel.url)
523 if old > 0:
524 cur.execute("""
525 UPDATE episodes SET played = 1 WHERE channel_id = ?
526 AND played = 0 AND pubDate < (SELECT MAX(pubDate)
527 FROM episodes WHERE channel_id = ? AND state IN (?, ?))""",
528 (channel.id, channel.id, gpodder.STATE_DOWNLOADED,
529 gpodder.STATE_DELETED, ))
530 else:
531 cur.execute("""
532 UPDATE episodes SET played = 1 WHERE channel_id = ?
533 AND pubDate <> (SELECT MAX(pubDate) FROM episodes
534 WHERE channel_id = ?)""", (channel.id, channel.id, ))
536 cur.close()
537 self.lock.release()
539 def upgrade_table(self, table_name, fields, index_list):
541 Creates a table or adds fields to it.
543 cur = self.cursor(lock=True)
545 cur.execute("PRAGMA table_info(%s)" % table_name)
546 available = cur.fetchall()
548 if not available:
549 log('Creating table %s', table_name, sender=self)
550 columns = ', '.join(' '.join(f) for f in fields)
551 sql = "CREATE TABLE %s (%s)" % (table_name, columns)
552 cur.execute(sql)
553 else:
554 # Table info columns, as returned by SQLite
555 ID, NAME, TYPE, NULL, DEFAULT = range(5)
556 existing = set(column[NAME] for column in available)
558 for field_name, field_type in fields:
559 if field_name not in existing:
560 log('Adding column: %s.%s (%s)', table_name, field_name, field_type, sender=self)
561 cur.execute("ALTER TABLE %s ADD COLUMN %s %s" % (table_name, field_name, field_type))
563 for column, typ in index_list:
564 cur.execute('CREATE %s IF NOT EXISTS idx_%s ON %s (%s)' % (typ, column, table_name, column))
566 self.lock.release()
568 def delete_empty_episodes(self, channel_id):
570 Deletes episodes which haven't been downloaded.
571 Currently used when a channel URL is changed.
573 cur = self.cursor(lock=True)
574 log('Deleting old episodes from channel #%d' % channel_id, sender=self)
575 cur.execute("DELETE FROM episodes WHERE channel_id = ? AND state != ?", (channel_id, gpodder.STATE_DOWNLOADED, ))
576 self.lock.release()
578 def delete_episode_by_guid(self, guid, channel_id):
580 Deletes episodes that have a specific GUID for
581 a given channel. Used after feed updates for
582 episodes that have disappeared from the feed.
584 cur = self.cursor(lock=True)
585 cur.execute('DELETE FROM episodes WHERE channel_id = ? AND guid = ?', \
586 (channel_id, guid))
587 self.lock.release()