src/gpodder/dbsqlite.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # gPodder - A media aggregator and podcast client
   4 # Copyright (c) 2005-2010 Thomas Perl and the gPodder Team
   5 #
   6 # gPodder is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 3 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # gPodder is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 #
  19
  20 #
  21 # dbsqlite.py -- SQLite persistence layer for gPodder
  22 #
  23 # 2008-06-13 Justin Forest <justin.forest@gmail.com>
  24 # 2010-04-24 Thomas Perl <thp@gpodder.org>
  25 #
  26
  27 import gpodder
  28 _ = gpodder.gettext
  29
  30 import sys
  31
  32 have_sqlite = True
  33
  34 try:
  35     from sqlite3 import dbapi2 as sqlite
  36     from sqlite3 import OperationalError
  37 except ImportError:
  38     try:
  39         from pysqlite2 import dbapi2 as sqlite
  40         from pysqlite2.dbapi2 import OperationalError
  41     except ImportError:
  42         have_sqlite = False
  43
  44 # TODO: show a message box
  45 if not have_sqlite:
  46     print >>sys.stderr, 'Please install pysqlite2 or Python 2.5.'
  47     sys.exit(1)
  48
  49 from gpodder.liblogger import log
  50
  51 import string
  52 import threading
  53 import re
  54
  55 class Database(object):
  56     UNICODE_TRANSLATE = {ord(u'ö'): u'o', ord(u'ä'): u'a', ord(u'ü'): u'u'}
  57
  58     # Column names and types for the channels table
  59     TABLE_CHANNELS = "channels"
  60     SCHEMA_CHANNELS = (
  61             ('id', 'INTEGER PRIMARY KEY'),
  62             ('url', 'TEXT'), # Feed (RSS/Atom) URL of the podcast
  63             ('title', 'TEXT'), # Podcast name
  64             ('override_title', 'TEXT'), # Podcast name if user-defined
  65             ('link', 'TEXT'), # Website URL for the podcast
  66             ('description', 'TEXT'), # Description of podcast contents
  67             ('image', 'TEXT'), # URL to cover art for the image
  68             ('pubDate', 'INTEGER'), # Date and time of last feed publication
  69             ('sync_to_devices', 'INTEGER'), # 1 if syncing to devices is enabled, 0 otherwise
  70             ('device_playlist_name', 'TEXT'), # Name of the playlist on the device for syncing
  71             ('username', 'TEXT'), # Username for HTTP authentication (feed update + downloads)
  72             ('password', 'TEXT'), # Password for HTTP authentication (feed update + downloads)
  73             ('last_modified', 'TEXT'), # Last-modified HTTP header from last update
  74             ('etag', 'TEXT'), # ETag HTTP header from last update
  75             ('channel_is_locked', 'INTEGER'), # 1 if deletion is prevented, 0 otherwise
  76             ('foldername', 'TEXT'), # Folder name (basename) to put downloaded episodes
  77             ('auto_foldername', 'INTEGER'), # 1 if the foldername was auto-generated, 0 otherwise
  78             ('release_expected', 'INTEGER'), # Statistic value for when a new release is expected
  79             ('release_deviation', 'INTEGER'), # Deviation of the release cycle differences
  80             ('updated_timestamp', 'INTEGER'), # Timestamp of the last feed update
  81     )
  82     INDEX_CHANNELS = (
  83             ('foldername', 'UNIQUE INDEX'),
  84             ('url', 'UNIQUE INDEX'),
  85             ('sync_to_devices', 'INDEX'),
  86             ('title', 'INDEX'),
  87     )
  88
  89     # Column names and types for the episodes table
  90     TABLE_EPISODES = 'episodes'
  91     SCHEMA_EPISODES = (
  92             ('id', 'INTEGER PRIMARY KEY'),
  93             ('channel_id', 'INTEGER'), # Foreign key: ID of the podcast of this episode
  94             ('url', 'TEXT'), # Download URL of the media file
  95             ('title', 'TEXT'), # Episode title
  96             ('length', 'INTEGER'), # File length of the media file in bytes
  97             ('mimetype', 'TEXT'), # Mime type of the media file
  98             ('guid', 'TEXT'), # GUID of the episode item
  99             ('description', 'TEXT'), # Longer text description
 100             ('link', 'TEXT'), # Website URL for the episode
 101             ('pubDate', 'INTEGER'), # Date and time of publication
 102             ('state', 'INTEGER'), # Download state (see gpodder.STATE_* constants)
 103             ('played', 'INTEGER'), # 1 if it's new or played, 0 otherwise
 104             ('locked', 'INTEGER'), # 1 if deletion is prevented, 0 otherwise
 105             ('filename', 'TEXT'), # Filename for the downloaded file (or NULL)
 106             ('auto_filename', 'INTEGER'), # 1 if the filename was auto-generated, 0 otherwise
 107             ('total_time', 'INTEGER'), # Length in seconds
 108             ('current_position', 'INTEGER'), # Current playback position
 109             ('current_position_updated', 'INTEGER'), # Set to NOW when updating current_position
 110     )
 111     INDEX_EPISODES = (
 112             ('guid', 'UNIQUE INDEX'),
 113             ('filename', 'UNIQUE INDEX'),
 114             ('channel_id', 'INDEX'),
 115             ('pubDate', 'INDEX'),
 116             ('state', 'INDEX'),
 117             ('played', 'INDEX'),
 118             ('locked', 'INDEX'),
 119     )
 120
 121     def __init__(self, filename):
 122         self.database_file = filename
 123         self._db = None
 124         self.lock = threading.RLock()
 125
 126     def close(self):
 127         self.commit()
 128
 129         cur = self.cursor(lock=True)
 130         log('Optimizing database for faster startup.', sender=self)
 131         cur.execute("VACUUM")
 132         cur.close()
 133         self.lock.release()
 134
 135         self._db.close()
 136         self._db = None
 137
 138     def log(self, message, *args, **kwargs):
 139         try:
 140             message = message % args
 141             log('%s', message, sender=self)
 142         except TypeError, e:
 143             log('Exception in log(): %s: %s', e, message, sender=self)
 144
 145     def purge(self, max_episodes, channel_id):
 146         """
 147         Deletes old episodes.  Should be called
 148         before adding new episodes to a channel.
 149         """
 150         cur = self.cursor(lock=True)
 151
 152         self.log("purge(%s)", channel_id)
 153         sql = """
 154             DELETE FROM episodes
 155             WHERE channel_id = ?
 156             AND state <> ?
 157             AND id NOT IN
 158             (SELECT id FROM episodes WHERE channel_id = ?
 159             ORDER BY pubDate DESC LIMIT ?)"""
 160         cur.execute(sql, (channel_id, gpodder.STATE_DOWNLOADED, channel_id, max_episodes))
 161
 162         cur.close()
 163         self.lock.release()
 164
 165     def db_sort_cmp(self, a, b):
 166         """
 167         Compare two strings for sorting, including removing
 168         a possible "The " prefix and converting umlauts to
 169         normal characters so they can be sorted correctly.
 170         (i.e. "Ö1" should not appear at the end of the list)
 171         """
 172         try:
 173             a = a.decode('utf-8', 'ignore').lower()
 174             a = re.sub('^the ', '', a)
 175             a = a.translate(self.UNICODE_TRANSLATE)
 176             b = b.decode('utf-8', 'ignore').lower()
 177             b = re.sub('^the ', '', b)
 178             b = b.translate(self.UNICODE_TRANSLATE)
 179             return cmp(a, b)
 180         except:
 181             log('Error while comparing "%s" and "%s"', a, b, sender=self, traceback=True)
 182             a = re.sub('^the ', '', a.lower())
 183             b = re.sub('^the ', '', b.lower())
 184             return cmp(a, b)
 185
 186     @property
 187     def db(self):
 188         if self._db is None:
 189             self._db = sqlite.connect(self.database_file, check_same_thread=False)
 190             self._db.text_factory = str
 191             self._db.create_collation("UNICODE", self.db_sort_cmp)
 192             self.log('Connected')
 193             self.__check_schema()
 194         return self._db
 195
 196     def cursor(self, lock=False):
 197         if lock:
 198             self.lock.acquire()
 199         return self.db.cursor()
 200
 201     def commit(self):
 202         self.lock.acquire()
 203         try:
 204             self.log("COMMIT")
 205             self.db.commit()
 206         except ProgrammingError, e:
 207             log('Error commiting changes: %s', e, sender=self, traceback=True)
 208         self.lock.release()
 209
 210     def _remove_deleted_channels(self):
 211         """Remove deleted podcasts and episodes (upgrade from gPodder <= 2.5)
 212
 213         If the database has been created with gPodder <= 2.5, it could
 214         be possible that podcasts have been deleted where metadata and
 215         episodes are kept.
 216
 217         We don't support this kind of "information keeping" anymore, so
 218         simply go ahead and remove all podcast marked as "deleted" and
 219         their corresponding episodes to slim down the database.
 220         """
 221         cur = self.cursor(lock=True)
 222         cur.execute("PRAGMA table_info(%s)" % self.TABLE_CHANNELS)
 223         available = cur.fetchall()
 224         if available:
 225             ID, NAME, TYPE, NULL, DEFAULT = range(5)
 226             existing = set(column[NAME] for column in available)
 227
 228             if 'deleted' in existing:
 229                 cur.execute('SELECT id FROM %s WHERE deleted = ?' % self.TABLE_CHANNELS, (1,))
 230                 channel_ids = [id for (id,) in cur]
 231
 232                 # Remove all deleted channels from the database
 233                 for id in channel_ids:
 234                     self.log('Removing deleted channel with ID %d', id)
 235                     cur.execute('DELETE FROM %s WHERE id = ?' % self.TABLE_CHANNELS, (id,))
 236                     cur.execute('DELETE FROM %s WHERE channel_id = ?' % self.TABLE_EPISODES, (id,))
 237         self.lock.release()
 238
 239     def __check_schema(self):
 240         """
 241         Creates all necessary tables and indexes that don't exist.
 242         """
 243         self.log('Setting up tables and views')
 244
 245         cur = self.cursor(lock=True)
 246
 247         # If a "deleted" column exists in the channel table, remove all
 248         # corresponding channels and their episodes and remove it
 249         self._remove_deleted_channels()
 250
 251         # Create tables and possibly add newly-added columns
 252         self.upgrade_table(self.TABLE_CHANNELS, self.SCHEMA_CHANNELS, self.INDEX_CHANNELS)
 253         self.upgrade_table(self.TABLE_EPISODES, self.SCHEMA_EPISODES, self.INDEX_EPISODES)
 254
 255         # Make sure deleted episodes are played, to simplify querying statistics.
 256         try:
 257             cur.execute("UPDATE episodes SET played = 1 WHERE state = ?", (gpodder.STATE_DELETED,))
 258         except OperationalError:
 259             pass
 260
 261         cur.close()
 262         self.lock.release()
 263
 264     def get_channel_count(self, id):
 265         """Given a channel ID, returns the statistics for it
 266
 267         Returns a tuple (total, deleted, new, downloaded, unplayed)
 268         """
 269         total, deleted, new, downloaded, unplayed = 0, 0, 0, 0, 0
 270
 271         cur = self.cursor(lock=True)
 272         cur.execute('SELECT COUNT(*), state, played FROM episodes WHERE channel_id = ? GROUP BY state, played', (id,))
 273         for count, state, played in cur:
 274             total += count
 275             if state == gpodder.STATE_DELETED:
 276                 deleted += count
 277             elif state == gpodder.STATE_NORMAL and not played:
 278                 new += count
 279             elif state == gpodder.STATE_DOWNLOADED and not played:
 280                 downloaded += count
 281                 unplayed += count
 282             elif state == gpodder.STATE_DOWNLOADED:
 283                 downloaded += count
 284
 285         cur.close()
 286         self.lock.release()
 287
 288         return (total, deleted, new, downloaded, unplayed)
 289
 290     def get_total_count(self):
 291         """Get statistics for episodes in all channels
 292
 293         Returns a tuple (total, deleted, new, downloaded, unplayed)
 294         """
 295         total, deleted, new, downloaded, unplayed = 0, 0, 0, 0, 0
 296
 297         cur = self.cursor(lock=True)
 298         cur.execute('SELECT COUNT(*), state, played FROM episodes GROUP BY state, played')
 299         for count, state, played in cur:
 300             total += count
 301             if state == gpodder.STATE_DELETED:
 302                 deleted += count
 303             elif state == gpodder.STATE_NORMAL and not played:
 304                 new += count
 305             elif state == gpodder.STATE_DOWNLOADED and not played:
 306                 downloaded += count
 307                 unplayed += count
 308             elif state == gpodder.STATE_DOWNLOADED:
 309                 downloaded += count
 310
 311         cur.close()
 312         self.lock.release()
 313
 314         return (total, deleted, new, downloaded, unplayed)
 315
 316     def load_channels(self, factory=None, url=None):
 317         """
 318         Returns channel descriptions as a list of dictionaries or objects,
 319         returned by the factory() function, which receives the dictionary
 320         as the only argument.
 321         """
 322
 323         self.log("load_channels()")
 324
 325         cur = self.cursor(lock=True)
 326         cur.execute('SELECT * FROM %s ORDER BY title COLLATE UNICODE' % self.TABLE_CHANNELS)
 327
 328         result = []
 329         keys = list(desc[0] for desc in cur.description)
 330         for row in cur:
 331             channel = dict(zip(keys, row))
 332
 333             if url is None or url == channel['url']:
 334                 if factory is None:
 335                     result.append(channel)
 336                 else:
 337                     result.append(factory(channel, self))
 338
 339         cur.close()
 340         self.lock.release()
 341
 342         return result
 343
 344     def save_channel(self, c):
 345         self._save_object(c, self.TABLE_CHANNELS, self.SCHEMA_CHANNELS)
 346
 347     def delete_channel(self, channel):
 348         assert channel.id is not None
 349
 350         cur = self.cursor(lock=True)
 351         self.log("delete_channel(%d), %s", channel.id, channel.url)
 352
 353         cur.execute("DELETE FROM channels WHERE id = ?", (channel.id, ))
 354         cur.execute("DELETE FROM episodes WHERE channel_id = ?", (channel.id, ))
 355
 356         cur.close()
 357         # Commit changes
 358         self.db.commit()
 359         self.lock.release()
 360
 361     def __read_episodes(self, factory=None, where=None, params=None, commit=True):
 362         sql = "SELECT * FROM episodes"
 363
 364         if where:
 365             sql = "%s %s" % (sql, where)
 366
 367         if params is None:
 368             params = ()
 369
 370         cur = self.cursor(lock=True)
 371         cur.execute(sql, params)
 372
 373         result = []
 374         keys = list(desc[0] for desc in cur.description)
 375         for row in cur:
 376             episode = dict(zip(keys, row))
 377             if episode['state'] is None:
 378                 episode['state'] = gpodder.STATE_NORMAL
 379             if factory is None:
 380                 result.append(episode)
 381             else:
 382                 result.append(factory(episode, self))
 383
 384         cur.close()
 385         self.lock.release()
 386         return result
 387
 388     def load_episodes(self, channel, factory=None, limit=1000, state=None):
 389         assert channel.id is not None
 390
 391         self.log("load_episodes((%d)%s)", channel.id, channel.url)
 392
 393         if state is None:
 394             return self.__read_episodes(factory = factory, where = """
 395                 WHERE channel_id = ? AND state = ? OR id IN
 396                 (SELECT id FROM episodes WHERE channel_id = ?
 397                 ORDER BY pubDate DESC LIMIT ?)
 398                 ORDER BY pubDate DESC
 399                 """, params = (channel.id, gpodder.STATE_DOWNLOADED, channel.id, limit, ))
 400         else:
 401             return self.__read_episodes(factory = factory, where = " WHERE channel_id = ? AND state = ? ORDER BY pubDate DESC LIMIT ?", params = (channel.id, state, limit, ))
 402
 403     def load_episode(self, url, factory=None):
 404         self.log("load_episode(%s)", url)
 405         list = self.__read_episodes(factory=factory, where=' WHERE url=? LIMIT ?', params=(url, 1))
 406         if list:
 407             return list[0]
 408         else:
 409             return None
 410
 411     def save_episode(self, e):
 412         assert e.channel_id
 413
 414         if not e.guid:
 415             self.log('Refusing to save an episode without guid: %s', e)
 416             return
 417
 418         self._save_object(e, self.TABLE_EPISODES, self.SCHEMA_EPISODES)
 419
 420     def _save_object(self, o, table, schema):
 421         self.lock.acquire()
 422         try:
 423             cur = self.cursor()
 424             columns = [name for name, typ in schema if name != 'id']
 425             values = [getattr(o, name) for name in columns]
 426
 427             if o.id is None:
 428                 qmarks = ', '.join('?'*len(columns))
 429                 sql = 'INSERT INTO %s (%s) VALUES (%s)' % (table, ', '.join(columns), qmarks)
 430                 cur.execute(sql, values)
 431                 o.id = cur.lastrowid
 432             else:
 433                 qmarks = ', '.join('%s = ?' % name for name in columns)
 434                 values.append(o.id)
 435                 sql = 'UPDATE %s SET %s WHERE id = ?' % (table, qmarks)
 436                 cur.execute(sql, values)
 437         except Exception, e:
 438             log('Cannot save %s to %s: %s', o, table, e, sender=self, traceback=True)
 439
 440         cur.close()
 441         self.lock.release()
 442
 443     def update_episode_state(self, episode):
 444         assert episode.id is not None
 445
 446         cur = self.cursor(lock=True)
 447         cur.execute('UPDATE episodes SET state = ?, played = ?, locked = ? WHERE id = ?', (episode.state, episode.is_played, episode.is_locked, episode.id))
 448         cur.close()
 449         self.lock.release()
 450
 451     def update_channel_lock(self, channel):
 452         self.log("update_channel_lock(%s, locked=%s)", channel.url, channel.channel_is_locked)
 453
 454         cur = self.cursor(lock=True)
 455         cur.execute("UPDATE channels SET channel_is_locked = ? WHERE url = ?", (channel.channel_is_locked, channel.url, ))
 456         cur.close()
 457
 458         self.lock.release()
 459
 460     def __get__(self, sql, params=None):
 461         """
 462         Returns the first cell of a query result, useful for COUNT()s.
 463         """
 464         cur = self.cursor(lock=True)
 465
 466         self.log("__get__(): %s", sql)
 467
 468         if params is None:
 469             cur.execute(sql)
 470         else:
 471             cur.execute(sql, params)
 472
 473         row = cur.fetchone()
 474         cur.close()
 475         self.lock.release()
 476
 477         if row is None:
 478             return None
 479         else:
 480             return row[0]
 481
 482     def channel_foldername_exists(self, foldername):
 483         """
 484         Returns True if a foldername for a channel exists.
 485         False otherwise.
 486         """
 487         return self.__get__("SELECT id FROM channels WHERE foldername = ?", (foldername,)) is not None
 488
 489     def remove_foldername_if_deleted_channel(self, foldername):
 490         cur = self.cursor(lock=True)
 491         self.log('Setting foldername=NULL for folder "%s"', foldername)
 492         cur.execute('UPDATE channels SET foldername=NULL ' + \
 493                     'WHERE foldername=? AND deleted=1', (foldername,))
 494         cur.close()
 495         self.lock.release()
 496
 497     def episode_filename_exists(self, filename):
 498         """
 499         Returns True if a filename for an episode exists.
 500         False otherwise.
 501         """
 502         return self.__get__("SELECT id FROM episodes WHERE filename = ?", (filename,)) is not None
 503
 504     def get_last_pubdate(self, channel):
 505         """
 506         Look up the highest "pubDate" value for
 507         all episodes of the given podcast.
 508         """
 509         return self.__get__("""
 510                 SELECT MAX(pubDate) FROM episodes
 511                 WHERE channel_id = ?
 512                 """, (channel.id, ))
 513
 514     def force_last_new(self, channel):
 515         old = self.__get__("""SELECT COUNT(*) FROM episodes WHERE channel_id = ?
 516             AND state IN (?, ?)""", (channel.id, gpodder.STATE_DOWNLOADED,
 517             gpodder.STATE_DELETED))
 518
 519         cur = self.cursor(lock=True)
 520
 521         self.log("force_last_new((%d)%s)", channel.id, channel.url)
 522
 523         if old > 0:
 524             cur.execute("""
 525                 UPDATE episodes SET played = 1 WHERE channel_id = ?
 526                 AND played = 0 AND pubDate < (SELECT MAX(pubDate)
 527                 FROM episodes WHERE channel_id = ? AND state IN (?, ?))""",
 528                 (channel.id, channel.id, gpodder.STATE_DOWNLOADED,
 529                 gpodder.STATE_DELETED, ))
 530         else:
 531             cur.execute("""
 532                 UPDATE episodes SET played = 1 WHERE channel_id = ?
 533                 AND pubDate <> (SELECT MAX(pubDate) FROM episodes
 534                 WHERE channel_id = ?)""", (channel.id, channel.id, ))
 535
 536         cur.close()
 537         self.lock.release()
 538
 539     def upgrade_table(self, table_name, fields, index_list):
 540         """
 541         Creates a table or adds fields to it.
 542         """
 543         cur = self.cursor(lock=True)
 544
 545         cur.execute("PRAGMA table_info(%s)" % table_name)
 546         available = cur.fetchall()
 547
 548         if not available:
 549             log('Creating table %s', table_name, sender=self)
 550             columns = ', '.join(' '.join(f) for f in fields)
 551             sql = "CREATE TABLE %s (%s)" % (table_name, columns)
 552             cur.execute(sql)
 553         else:
 554             # Table info columns, as returned by SQLite
 555             ID, NAME, TYPE, NULL, DEFAULT = range(5)
 556             existing = set(column[NAME] for column in available)
 557
 558             for field_name, field_type in fields:
 559                 if field_name not in existing:
 560                     log('Adding column: %s.%s (%s)', table_name, field_name, field_type, sender=self)
 561                     cur.execute("ALTER TABLE %s ADD COLUMN %s %s" % (table_name, field_name, field_type))
 562
 563         for column, typ in index_list:
 564             cur.execute('CREATE %s IF NOT EXISTS idx_%s ON %s (%s)' % (typ, column, table_name, column))
 565
 566         self.lock.release()
 567
 568     def delete_empty_episodes(self, channel_id):
 569         """
 570         Deletes episodes which haven't been downloaded.
 571         Currently used when a channel URL is changed.
 572         """
 573         cur = self.cursor(lock=True)
 574         log('Deleting old episodes from channel #%d' % channel_id, sender=self)
 575         cur.execute("DELETE FROM episodes WHERE channel_id = ? AND state != ?", (channel_id, gpodder.STATE_DOWNLOADED, ))
 576         self.lock.release()
 577
 578     def delete_episode_by_guid(self, guid, channel_id):
 579         """
 580         Deletes episodes that have a specific GUID for
 581         a given channel. Used after feed updates for
 582         episodes that have disappeared from the feed.
 583         """
 584         cur = self.cursor(lock=True)
 585         cur.execute('DELETE FROM episodes WHERE channel_id = ? AND guid = ?', \
 586                 (channel_id, guid))
 587         self.lock.release()
 588