1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2010 Thomas Perl and the gPodder Team
6 # gPodder is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # gPodder is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 # dbsqlite.py -- SQLite persistence layer for gPodder
23 # 2008-06-13 Justin Forest <justin.forest@gmail.com>
24 # 2010-04-24 Thomas Perl <thp@gpodder.org>
35 from sqlite3
import dbapi2
as sqlite
36 from sqlite3
import OperationalError
39 from pysqlite2
import dbapi2
as sqlite
40 from pysqlite2
.dbapi2
import OperationalError
44 # TODO: show a message box
46 print >>sys
.stderr
, 'Please install pysqlite2 or Python 2.5.'
49 from gpodder
.liblogger
import log
54 class Database(object):
55 UNICODE_TRANSLATE
= {ord(u
'ö'): u
'o', ord(u
'ä'): u
'a', ord(u
'ü'): u
'u'}
57 # Column names, types, required and default values for the channels table
58 TABLE_CHANNELS
= "channels"
60 ('id', 'INTEGER PRIMARY KEY', True, '-1'),
61 ('url', 'TEXT', True, "''"), # Feed (RSS/Atom) URL of the podcast
62 ('title', 'TEXT', True, "''"), # Podcast name
63 ('override_title', 'TEXT', True, "''"), # Podcast name if user-defined
64 ('link', 'TEXT', True, "''"), # Website URL for the podcast
65 ('description', 'TEXT', False, None), # Description of podcast contents
66 ('image', 'TEXT', False, None), # URL to cover art for the image
67 ('pubDate', 'INTEGER', True, '0'), # Date and time of last feed publication
68 ('sync_to_devices', 'INTEGER', True, '1'), # 1 if syncing to devices is enabled, 0 otherwise
69 ('device_playlist_name', 'TEXT', True, "'gPodder'"), # Name of the playlist on the device for syncing
70 ('username', 'TEXT', True, "''"), # Username for HTTP authentication (feed update + downloads)
71 ('password', 'TEXT', True, "''"), # Password for HTTP authentication (feed update + downloads)
72 ('last_modified', 'TEXT', False, None), # Last-modified HTTP header from last update
73 ('etag', 'TEXT', False, None), # ETag HTTP header from last update
74 ('channel_is_locked', 'INTEGER', True, '0'), # 1 if deletion is prevented, 0 otherwise
75 ('foldername', 'TEXT', True, "''"), # Folder name (basename) to put downloaded episodes
76 ('auto_foldername', 'INTEGER', True, '1'), # 1 if the foldername was auto-generated, 0 otherwise
77 ('release_expected', 'INTEGER', True, '0'), # Statistic value for when a new release is expected
78 ('release_deviation', 'INTEGER', True, '0'), # Deviation of the release cycle differences
79 ('updated_timestamp', 'INTEGER', True, '0'), # Timestamp of the last feed update
82 ('foldername', 'UNIQUE INDEX'),
83 ('url', 'UNIQUE INDEX'),
84 ('sync_to_devices', 'INDEX'),
88 # Column names and types for the episodes table
89 TABLE_EPISODES
= 'episodes'
91 ('id', 'INTEGER PRIMARY KEY', True, '-1'),
92 ('channel_id', 'INTEGER', True, '-1'), # Foreign key: ID of the podcast of this episode
93 ('url', 'TEXT', True, "''"), # Download URL of the media file
94 ('title', 'TEXT', True, "''"), # Episode title
95 ('length', 'INTEGER', True, '0'), # File length of the media file in bytes
96 ('mimetype', 'TEXT', True, "''"), # Mime type of the media file
97 ('guid', 'TEXT', True, "''"), # GUID of the episode item
98 ('description', 'TEXT', True, "''"), # Longer text description
99 ('link', 'TEXT', True, "''"), # Website URL for the episode
100 ('pubDate', 'INTEGER', True, '0'), # Date and time of publication
101 ('state', 'INTEGER', True, '0'), # Download state (see gpodder.STATE_* constants)
102 ('played', 'INTEGER', True, '1'), # 1 if it's new or played, 0 otherwise
103 ('locked', 'INTEGER', True, '0'), # 1 if deletion is prevented, 0 otherwise
104 ('filename', 'TEXT', False, None), # Filename for the downloaded file (or NULL)
105 ('auto_filename', 'INTEGER', True, '0'), # 1 if the filename was auto-generated, 0 otherwise
106 ('total_time', 'INTEGER', True, '0'), # Length in seconds
107 ('current_position', 'INTEGER', True, '0'), # Current playback position
108 ('current_position_updated', 'INTEGER', True, '0'), # Set to NOW when updating current_position
111 ('guid', 'UNIQUE INDEX'),
112 ('filename', 'UNIQUE INDEX'),
113 ('channel_id', 'INDEX'),
114 ('pubDate', 'INDEX'),
120 def __init__(self
, filename
):
121 self
.database_file
= filename
123 self
.lock
= threading
.RLock()
128 cur
= self
.cursor(lock
=True)
129 log('Optimizing database for faster startup.', sender
=self
)
130 cur
.execute("VACUUM")
137 def log(self
, message
, *args
, **kwargs
):
139 message
= message
% args
140 log('%s', message
, sender
=self
)
142 log('Exception in log(): %s: %s', e
, message
, sender
=self
)
144 def purge(self
, max_episodes
, channel_id
):
146 Deletes old episodes. Should be called
147 before adding new episodes to a channel.
149 cur
= self
.cursor(lock
=True)
151 self
.log("purge(%s)", channel_id
)
157 (SELECT id FROM episodes WHERE channel_id = ?
158 ORDER BY pubDate DESC LIMIT ?)"""
159 cur
.execute(sql
, (channel_id
, gpodder
.STATE_DOWNLOADED
, channel_id
, max_episodes
))
164 def db_sort_cmp(self
, a
, b
):
166 Compare two strings for sorting, including removing
167 a possible "The " prefix and converting umlauts to
168 normal characters so they can be sorted correctly.
169 (i.e. "Ö1" should not appear at the end of the list)
172 a
= a
.decode('utf-8', 'ignore').lower()
173 a
= re
.sub('^the ', '', a
)
174 a
= a
.translate(self
.UNICODE_TRANSLATE
)
175 b
= b
.decode('utf-8', 'ignore').lower()
176 b
= re
.sub('^the ', '', b
)
177 b
= b
.translate(self
.UNICODE_TRANSLATE
)
180 log('Error while comparing "%s" and "%s"', a
, b
, sender
=self
, traceback
=True)
181 a
= re
.sub('^the ', '', a
.lower())
182 b
= re
.sub('^the ', '', b
.lower())
188 self
._db
= sqlite
.connect(self
.database_file
, check_same_thread
=False)
189 self
._db
.text_factory
= str
190 self
._db
.create_collation("UNICODE", self
.db_sort_cmp
)
191 self
.log('Connected')
192 self
.__check
_schema
()
195 def cursor(self
, lock
=False):
198 return self
.db
.cursor()
206 log('Error commiting changes: %s', e
, sender
=self
, traceback
=True)
209 def _remove_deleted_channels(self
):
210 """Remove deleted podcasts and episodes (upgrade from gPodder <= 2.5)
212 If the database has been created with gPodder <= 2.5, it could
213 be possible that podcasts have been deleted where metadata and
216 We don't support this kind of "information keeping" anymore, so
217 simply go ahead and remove all podcast marked as "deleted" and
218 their corresponding episodes to slim down the database.
220 cur
= self
.cursor(lock
=True)
221 cur
.execute("PRAGMA table_info(%s)" % self
.TABLE_CHANNELS
)
222 available
= cur
.fetchall()
224 ID
, NAME
, TYPE
, NOTNULL
, DEFAULT
= range(5)
225 existing
= set(column
[NAME
] for column
in available
)
227 if 'deleted' in existing
:
228 cur
.execute('SELECT id FROM %s WHERE deleted = ?' % self
.TABLE_CHANNELS
, (1,))
229 channel_ids
= [id for (id,) in cur
]
231 # Remove all deleted channels from the database
232 for id in channel_ids
:
233 self
.log('Removing deleted channel with ID %d', id)
234 cur
.execute('DELETE FROM %s WHERE id = ?' % self
.TABLE_CHANNELS
, (id,))
235 cur
.execute('DELETE FROM %s WHERE channel_id = ?' % self
.TABLE_EPISODES
, (id,))
238 def _remove_orphaned_episodes(self
):
239 """Remove episodes without a corresponding podcast
241 In some weird circumstances, it can happen that episodes are
242 left in the database that do not have a fitting podcast in the
243 database. This is an inconsistency. We simply delete the
244 episode information in this case, as we can't find a podcast.
246 cur
= self
.cursor(lock
=True)
247 sql
= 'DELETE FROM %s WHERE channel_id NOT IN ' + \
248 '(SELECT DISTINCT id FROM %s)'
249 cur
.execute(sql
% (self
.TABLE_EPISODES
, self
.TABLE_CHANNELS
,))
252 def __check_schema(self
):
254 Creates all necessary tables and indexes that don't exist.
256 self
.log('Setting up tables and views')
258 cur
= self
.cursor(lock
=True)
260 # If a "deleted" column exists in the channel table, remove all
261 # corresponding channels and their episodes and remove it
262 self
._remove
_deleted
_channels
()
264 # Create tables and possibly add newly-added columns
265 self
.upgrade_table(self
.TABLE_CHANNELS
, self
.SCHEMA_CHANNELS
, self
.INDEX_CHANNELS
)
266 self
.upgrade_table(self
.TABLE_EPISODES
, self
.SCHEMA_EPISODES
, self
.INDEX_EPISODES
)
268 # Remove orphaned episodes (episodes without a corresponding
269 # channel object) from the database to keep the DB clean
270 self
._remove
_orphaned
_episodes
()
272 # Make sure deleted episodes are played, to simplify querying statistics.
274 cur
.execute("UPDATE episodes SET played = 1 WHERE state = ?", (gpodder
.STATE_DELETED
,))
275 except OperationalError
:
281 def get_channel_count(self
, id):
282 """Given a channel ID, returns the statistics for it
284 Returns a tuple (total, deleted, new, downloaded, unplayed)
286 total
, deleted
, new
, downloaded
, unplayed
= 0, 0, 0, 0, 0
288 cur
= self
.cursor(lock
=True)
289 cur
.execute('SELECT COUNT(*), state, played FROM episodes WHERE channel_id = ? GROUP BY state, played', (id,))
290 for count
, state
, played
in cur
:
292 if state
== gpodder
.STATE_DELETED
:
294 elif state
== gpodder
.STATE_NORMAL
and not played
:
296 elif state
== gpodder
.STATE_DOWNLOADED
and not played
:
299 elif state
== gpodder
.STATE_DOWNLOADED
:
305 return (total
, deleted
, new
, downloaded
, unplayed
)
307 def get_total_count(self
):
308 """Get statistics for episodes in all channels
310 Returns a tuple (total, deleted, new, downloaded, unplayed)
312 total
, deleted
, new
, downloaded
, unplayed
= 0, 0, 0, 0, 0
314 cur
= self
.cursor(lock
=True)
315 cur
.execute('SELECT COUNT(*), state, played FROM episodes GROUP BY state, played')
316 for count
, state
, played
in cur
:
318 if state
== gpodder
.STATE_DELETED
:
320 elif state
== gpodder
.STATE_NORMAL
and not played
:
322 elif state
== gpodder
.STATE_DOWNLOADED
and not played
:
325 elif state
== gpodder
.STATE_DOWNLOADED
:
331 return (total
, deleted
, new
, downloaded
, unplayed
)
333 def load_channels(self
, factory
=None, url
=None):
335 Returns channel descriptions as a list of dictionaries or objects,
336 returned by the factory() function, which receives the dictionary
337 as the only argument.
340 self
.log("load_channels()")
342 cur
= self
.cursor(lock
=True)
343 cur
.execute('SELECT * FROM %s ORDER BY title COLLATE UNICODE' % self
.TABLE_CHANNELS
)
346 keys
= list(desc
[0] for desc
in cur
.description
)
348 channel
= dict(zip(keys
, row
))
350 if url
is None or url
== channel
['url']:
352 result
.append(channel
)
354 result
.append(factory(channel
, self
))
361 def save_channel(self
, c
):
362 self
._save
_object
(c
, self
.TABLE_CHANNELS
, self
.SCHEMA_CHANNELS
)
364 def delete_channel(self
, channel
):
365 assert channel
.id is not None
367 cur
= self
.cursor(lock
=True)
368 self
.log("delete_channel(%d), %s", channel
.id, channel
.url
)
370 cur
.execute("DELETE FROM channels WHERE id = ?", (channel
.id, ))
371 cur
.execute("DELETE FROM episodes WHERE channel_id = ?", (channel
.id, ))
378 def load_all_episodes(self
, channel_mapping
, limit
=10000):
379 self
.log('Loading all episodes from the database')
380 sql
= 'SELECT * FROM %s ORDER BY pubDate DESC LIMIT ?' % (self
.TABLE_EPISODES
,)
382 cur
= self
.cursor(lock
=True)
383 cur
.execute(sql
, args
)
384 keys
= [desc
[0] for desc
in cur
.description
]
385 id_index
= keys
.index('channel_id')
386 result
= map(lambda row
: channel_mapping
[row
[id_index
]].episode_factory(dict(zip(keys
, row
))), cur
)
391 def load_episodes(self
, channel
, factory
=lambda x
: x
, limit
=1000, state
=None):
392 assert channel
.id is not None
394 self
.log('Loading episodes for channel %d', channel
.id)
397 sql
= 'SELECT * FROM %s WHERE channel_id = ? ORDER BY pubDate DESC LIMIT ?' % (self
.TABLE_EPISODES
,)
398 args
= (channel
.id, limit
)
400 sql
= 'SELECT * FROM %s WHERE channel_id = ? AND state = ? ORDER BY pubDate DESC LIMIT ?' % (self
.TABLE_EPISODES
,)
401 args
= (channel
.id, state
, limit
)
403 cur
= self
.cursor(lock
=True)
404 cur
.execute(sql
, args
)
405 keys
= [desc
[0] for desc
in cur
.description
]
406 result
= map(lambda row
: factory(dict(zip(keys
, row
)), self
), cur
)
411 def load_single_episode(self
, channel
, factory
=lambda x
: x
, **kwargs
):
412 """Load one episode with keywords
414 Return an episode object (created by "factory") for a
415 given channel. You can use keyword arguments to specify
416 the attributes that the episode object should have.
419 db.load_single_episode(channel, url='x')
421 This will search all episodes belonging to "channel"
422 and return the first one where the "url" column is "x".
424 Returns None if the episode cannot be found.
426 assert channel
.id is not None
428 # Inject channel_id into query to reduce search space
429 kwargs
['channel_id'] = channel
.id
431 # We need to have the keys in the same order as the values, so
432 # we use items() and unzip the resulting list into two ordered lists
433 keys
, args
= zip(*kwargs
.items())
435 sql
= 'SELECT * FROM %s WHERE %s LIMIT 1' % (self
.TABLE_EPISODES
, \
436 ' AND '.join('%s=?' % k
for k
in keys
))
438 cur
= self
.cursor(lock
=True)
439 cur
.execute(sql
, args
)
440 keys
= [desc
[0] for desc
in cur
.description
]
443 result
= factory(dict(zip(keys
, row
)), self
)
451 def load_episode(self
, id):
452 """Load episode as dictionary by its id
454 This will return the data for an episode as
455 dictionary or None if it does not exist.
457 assert id is not None
459 cur
= self
.cursor(lock
=True)
460 cur
.execute('SELECT * from %s WHERE id = ? LIMIT 1' % (self
.TABLE_EPISODES
,), (id,))
462 d
= dict(zip((desc
[0] for desc
in cur
.description
), cur
.fetchone()))
464 self
.log('Loaded episode %d from DB', id)
472 def get_channel_id_from_episode_url(self
, url
):
473 """Return the (first) associated channel ID given an episode URL"""
474 assert url
is not None
476 cur
= self
.cursor(lock
=True)
477 cur
.execute('SELECT channel_id FROM %s WHERE url = ? LIMIT 1' % (self
.TABLE_EPISODES
,), (url
,))
481 self
.log('Found channel ID: %d', int(row
[0]), sender
=self
)
488 def save_episode(self
, e
):
492 self
.log('Refusing to save an episode without guid: %s', e
)
495 self
._save
_object
(e
, self
.TABLE_EPISODES
, self
.SCHEMA_EPISODES
)
497 def _save_object(self
, o
, table
, schema
):
501 columns
= [name
for name
, typ
, required
, default
in schema
if name
!= 'id']
502 values
= [getattr(o
, name
) for name
in columns
]
505 qmarks
= ', '.join('?'*len(columns
))
506 sql
= 'INSERT INTO %s (%s) VALUES (%s)' % (table
, ', '.join(columns
), qmarks
)
507 cur
.execute(sql
, values
)
510 qmarks
= ', '.join('%s = ?' % name
for name
in columns
)
512 sql
= 'UPDATE %s SET %s WHERE id = ?' % (table
, qmarks
)
513 cur
.execute(sql
, values
)
515 log('Cannot save %s to %s: %s', o
, table
, e
, sender
=self
, traceback
=True)
520 def save_downloaded_episode(self
, episode
):
521 assert episode
.id is not None
523 cur
= self
.cursor(lock
=True)
524 cur
.execute('UPDATE episodes SET state = ?, played = ?, length = ? WHERE id = ?', \
525 (episode
.state
, episode
.is_played
, episode
.length
, episode
.id))
529 def update_episode_state(self
, episode
):
530 assert episode
.id is not None
532 cur
= self
.cursor(lock
=True)
533 cur
.execute('UPDATE episodes SET state = ?, played = ?, locked = ? WHERE id = ?', (episode
.state
, episode
.is_played
, episode
.is_locked
, episode
.id))
537 def update_channel_lock(self
, channel
):
538 assert channel
.id is not None
539 self
.log("update_channel_lock(%s, locked=%s)", channel
.url
, channel
.channel_is_locked
)
541 cur
= self
.cursor(lock
=True)
542 cur
.execute("UPDATE channels SET channel_is_locked = ? WHERE id = ?", (channel
.channel_is_locked
, channel
.id, ))
547 def __get__(self
, sql
, params
=None):
549 Returns the first cell of a query result, useful for COUNT()s.
551 cur
= self
.cursor(lock
=True)
553 self
.log("__get__(): %s", sql
)
558 cur
.execute(sql
, params
)
569 def channel_foldername_exists(self
, foldername
):
571 Returns True if a foldername for a channel exists.
574 return self
.__get
__("SELECT id FROM channels WHERE foldername = ?", (foldername
,)) is not None
576 def episode_filename_exists(self
, filename
):
578 Returns True if a filename for an episode exists.
581 return self
.__get
__("SELECT id FROM episodes WHERE filename = ?", (filename
,)) is not None
583 def get_last_pubdate(self
, channel
):
585 Look up the highest "pubDate" value for
586 all episodes of the given podcast.
588 return self
.__get
__('SELECT MAX(pubDate) FROM episodes WHERE channel_id = ?', (channel
.id,))
590 def force_last_new(self
, channel
):
592 Only set the most-recent episode as "new"; this
593 should be called when a new podcast is added.
595 cur
= self
.cursor(lock
=True)
600 WHERE channel_id = ? AND
601 pubDate < (SELECT MAX(pubDate)
603 WHERE channel_id = ?)
604 """, (True, channel
.id, channel
.id))
609 def recreate_table(self
, cur
, table_name
, fields
, index_list
):
610 log('Rename table %s', table_name
, sender
=self
)
611 new_table_name
= table_name
+ "_save"
612 cur
.execute("ALTER TABLE %s RENAME TO %s" % (table_name
, new_table_name
))
613 #log("ALTER TABLE %s RENAME TO %s" % (table_name, new_table_name))
615 log('Delete existing indices', sender
=self
)
616 for column
, typ
in index_list
:
617 cur
.execute('DROP INDEX IF EXISTS idx_%s' % (column
))
619 self
.create_table(cur
, table_name
, fields
)
621 log('Correct NULL values in the existing data', sender
=self
)
622 columns
= set((column
, default
) for column
, typ
, required
, default
in fields
if required
)
623 for column
, default
in columns
:
624 cur
.execute('UPDATE %s SET %s = %s where %s IS NULL' % (new_table_name
, column
, default
, column
))
626 log('Copy data from table %s to table %s' % (new_table_name
, table_name
), sender
=self
)
627 columns
= ', '.join(f
[0] for f
in fields
)
628 cur
.execute("INSERT INTO %(tab)s (%(col)s) SELECT %(col)s FROM %(new_tab)s" %
629 {'tab': table_name
, 'col': columns
, 'new_tab': new_table_name
})
631 def create_table(self
, cur
, table_name
, fields
):
632 log('Creating table %s', table_name
, sender
=self
)
634 for column
, typ
, required
, default
in fields
:
636 columns
+= '\n %s %s NOT NULL DEFAULT %s,' % (column
, typ
, default
)
638 columns
+= '\n %s %s,' % (column
, typ
)
639 columns
= columns
.rstrip(',')
640 sql
= "CREATE TABLE %s (%s)" % (table_name
, columns
)
643 def upgrade_table(self
, table_name
, fields
, index_list
):
645 Creates a table or adds fields to it.
647 cur
= self
.cursor(lock
=True)
649 cur
.execute("PRAGMA table_info(%s)" % table_name
)
650 available
= cur
.fetchall()
653 self
.create_table(cur
, table_name
, fields
)
656 # Table info columns, as returned by SQLite
657 ID
, NAME
, TYPE
, NOTNULL
, DEFAULT
= range(5)
658 exists_notnull_column
= any(bool(column
[NOTNULL
]) for column
in available
)
660 if not exists_notnull_column
:
661 self
.recreate_table(cur
, table_name
, fields
, index_list
)
664 existing
= set(column
[NAME
] for column
in available
)
665 for field_name
, field_type
, field_required
, field_default
in fields
:
666 if field_name
not in existing
:
667 log('Adding column: %s.%s (%s)', table_name
, field_name
, field_type
, sender
=self
)
668 sql
= "ALTER TABLE %s ADD COLUMN %s %s" % (table_name
, field_name
, field_type
)
670 sql
+= " NOT NULL DEFAULT %s" % (field_default
)
673 for column
, typ
in index_list
:
674 cur
.execute('CREATE %s IF NOT EXISTS idx_%s ON %s (%s)' % (typ
, column
, table_name
, column
))
678 def delete_episode_by_guid(self
, guid
, channel_id
):
680 Deletes episodes that have a specific GUID for
681 a given channel. Used after feed updates for
682 episodes that have disappeared from the feed.
684 cur
= self
.cursor(lock
=True)
685 cur
.execute('DELETE FROM episodes WHERE channel_id = ? AND guid = ?', \