1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2010 Thomas Perl and the gPodder Team
6 # gPodder is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # gPodder is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 # dbsqlite.py -- SQLite persistence layer for gPodder
23 # 2008-06-13 Justin Forest <justin.forest@gmail.com>
24 # 2010-04-24 Thomas Perl <thp@gpodder.org>
35 from sqlite3
import dbapi2
as sqlite
36 from sqlite3
import OperationalError
39 from pysqlite2
import dbapi2
as sqlite
40 from pysqlite2
.dbapi2
import OperationalError
44 # TODO: show a message box
46 print >>sys
.stderr
, 'Please install pysqlite2 or Python 2.5.'
49 from gpodder
.liblogger
import log
55 class Database(object):
56 UNICODE_TRANSLATE
= {ord(u
'ö'): u
'o', ord(u
'ä'): u
'a', ord(u
'ü'): u
'u'}
58 # Column names and types for the channels table
59 TABLE_CHANNELS
= "channels"
61 ('id', 'INTEGER PRIMARY KEY'),
62 ('url', 'TEXT'), # Feed (RSS/Atom) URL of the podcast
63 ('title', 'TEXT'), # Podcast name
64 ('override_title', 'TEXT'), # Podcast name if user-defined
65 ('link', 'TEXT'), # Website URL for the podcast
66 ('description', 'TEXT'), # Description of podcast contents
67 ('image', 'TEXT'), # URL to cover art for the image
68 ('pubDate', 'INTEGER'), # Date and time of last feed publication
69 ('sync_to_devices', 'INTEGER'), # 1 if syncing to devices is enabled, 0 otherwise
70 ('device_playlist_name', 'TEXT'), # Name of the playlist on the device for syncing
71 ('username', 'TEXT'), # Username for HTTP authentication (feed update + downloads)
72 ('password', 'TEXT'), # Password for HTTP authentication (feed update + downloads)
73 ('last_modified', 'TEXT'), # Last-modified HTTP header from last update
74 ('etag', 'TEXT'), # ETag HTTP header from last update
75 ('channel_is_locked', 'INTEGER'), # 1 if deletion is prevented, 0 otherwise
76 ('foldername', 'TEXT'), # Folder name (basename) to put downloaded episodes
77 ('auto_foldername', 'INTEGER'), # 1 if the foldername was auto-generated, 0 otherwise
78 ('release_expected', 'INTEGER'), # Statistic value for when a new release is expected
79 ('release_deviation', 'INTEGER'), # Deviation of the release cycle differences
80 ('updated_timestamp', 'INTEGER'), # Timestamp of the last feed update
83 ('foldername', 'UNIQUE INDEX'),
84 ('url', 'UNIQUE INDEX'),
85 ('sync_to_devices', 'INDEX'),
89 # Column names and types for the episodes table
90 TABLE_EPISODES
= 'episodes'
92 ('id', 'INTEGER PRIMARY KEY'),
93 ('channel_id', 'INTEGER'), # Foreign key: ID of the podcast of this episode
94 ('url', 'TEXT'), # Download URL of the media file
95 ('title', 'TEXT'), # Episode title
96 ('length', 'INTEGER'), # File length of the media file in bytes
97 ('mimetype', 'TEXT'), # Mime type of the media file
98 ('guid', 'TEXT'), # GUID of the episode item
99 ('description', 'TEXT'), # Longer text description
100 ('link', 'TEXT'), # Website URL for the episode
101 ('pubDate', 'INTEGER'), # Date and time of publication
102 ('state', 'INTEGER'), # Download state (see gpodder.STATE_* constants)
103 ('played', 'INTEGER'), # 1 if it's new or played, 0 otherwise
104 ('locked', 'INTEGER'), # 1 if deletion is prevented, 0 otherwise
105 ('filename', 'TEXT'), # Filename for the downloaded file (or NULL)
106 ('auto_filename', 'INTEGER'), # 1 if the filename was auto-generated, 0 otherwise
107 ('total_time', 'INTEGER'), # Length in seconds
108 ('current_position', 'INTEGER'), # Current playback position
109 ('current_position_updated', 'INTEGER'), # Set to NOW when updating current_position
112 ('guid', 'UNIQUE INDEX'),
113 ('filename', 'UNIQUE INDEX'),
114 ('channel_id', 'INDEX'),
115 ('pubDate', 'INDEX'),
121 def __init__(self
, filename
):
122 self
.database_file
= filename
124 self
.lock
= threading
.RLock()
129 cur
= self
.cursor(lock
=True)
130 log('Optimizing database for faster startup.', sender
=self
)
131 cur
.execute("VACUUM")
138 def log(self
, message
, *args
, **kwargs
):
140 message
= message
% args
141 log('%s', message
, sender
=self
)
143 log('Exception in log(): %s: %s', e
, message
, sender
=self
)
145 def purge(self
, max_episodes
, channel_id
):
147 Deletes old episodes. Should be called
148 before adding new episodes to a channel.
150 cur
= self
.cursor(lock
=True)
152 self
.log("purge(%s)", channel_id
)
158 (SELECT id FROM episodes WHERE channel_id = ?
159 ORDER BY pubDate DESC LIMIT ?)"""
160 cur
.execute(sql
, (channel_id
, gpodder
.STATE_DOWNLOADED
, channel_id
, max_episodes
))
165 def db_sort_cmp(self
, a
, b
):
167 Compare two strings for sorting, including removing
168 a possible "The " prefix and converting umlauts to
169 normal characters so they can be sorted correctly.
170 (i.e. "Ö1" should not appear at the end of the list)
173 a
= a
.decode('utf-8', 'ignore').lower()
174 a
= re
.sub('^the ', '', a
)
175 a
= a
.translate(self
.UNICODE_TRANSLATE
)
176 b
= b
.decode('utf-8', 'ignore').lower()
177 b
= re
.sub('^the ', '', b
)
178 b
= b
.translate(self
.UNICODE_TRANSLATE
)
181 log('Error while comparing "%s" and "%s"', a
, b
, sender
=self
, traceback
=True)
182 a
= re
.sub('^the ', '', a
.lower())
183 b
= re
.sub('^the ', '', b
.lower())
189 self
._db
= sqlite
.connect(self
.database_file
, check_same_thread
=False)
190 self
._db
.text_factory
= str
191 self
._db
.create_collation("UNICODE", self
.db_sort_cmp
)
192 self
.log('Connected')
193 self
.__check
_schema
()
196 def cursor(self
, lock
=False):
199 return self
.db
.cursor()
206 except ProgrammingError
, e
:
207 log('Error commiting changes: %s', e
, sender
=self
, traceback
=True)
210 def _remove_deleted_channels(self
):
211 """Remove deleted podcasts and episodes (upgrade from gPodder <= 2.5)
213 If the database has been created with gPodder <= 2.5, it could
214 be possible that podcasts have been deleted where metadata and
217 We don't support this kind of "information keeping" anymore, so
218 simply go ahead and remove all podcast marked as "deleted" and
219 their corresponding episodes to slim down the database.
221 cur
= self
.cursor(lock
=True)
222 cur
.execute("PRAGMA table_info(%s)" % self
.TABLE_CHANNELS
)
223 available
= cur
.fetchall()
225 ID
, NAME
, TYPE
, NULL
, DEFAULT
= range(5)
226 existing
= set(column
[NAME
] for column
in available
)
228 if 'deleted' in existing
:
229 cur
.execute('SELECT id FROM %s WHERE deleted = ?' % self
.TABLE_CHANNELS
, (1,))
230 channel_ids
= [id for (id,) in cur
]
232 # Remove all deleted channels from the database
233 for id in channel_ids
:
234 self
.log('Removing deleted channel with ID %d', id)
235 cur
.execute('DELETE FROM %s WHERE id = ?' % self
.TABLE_CHANNELS
, (id,))
236 cur
.execute('DELETE FROM %s WHERE channel_id = ?' % self
.TABLE_EPISODES
, (id,))
239 def __check_schema(self
):
241 Creates all necessary tables and indexes that don't exist.
243 self
.log('Setting up tables and views')
245 cur
= self
.cursor(lock
=True)
247 # If a "deleted" column exists in the channel table, remove all
248 # corresponding channels and their episodes and remove it
249 self
._remove
_deleted
_channels
()
251 # Create tables and possibly add newly-added columns
252 self
.upgrade_table(self
.TABLE_CHANNELS
, self
.SCHEMA_CHANNELS
, self
.INDEX_CHANNELS
)
253 self
.upgrade_table(self
.TABLE_EPISODES
, self
.SCHEMA_EPISODES
, self
.INDEX_EPISODES
)
255 # Make sure deleted episodes are played, to simplify querying statistics.
257 cur
.execute("UPDATE episodes SET played = 1 WHERE state = ?", (gpodder
.STATE_DELETED
,))
258 except OperationalError
:
264 def get_channel_count(self
, id):
265 """Given a channel ID, returns the statistics for it
267 Returns a tuple (total, deleted, new, downloaded, unplayed)
269 total
, deleted
, new
, downloaded
, unplayed
= 0, 0, 0, 0, 0
271 cur
= self
.cursor(lock
=True)
272 cur
.execute('SELECT COUNT(*), state, played FROM episodes WHERE channel_id = ? GROUP BY state, played', (id,))
273 for count
, state
, played
in cur
:
275 if state
== gpodder
.STATE_DELETED
:
277 elif state
== gpodder
.STATE_NORMAL
and not played
:
279 elif state
== gpodder
.STATE_DOWNLOADED
and not played
:
282 elif state
== gpodder
.STATE_DOWNLOADED
:
288 return (total
, deleted
, new
, downloaded
, unplayed
)
290 def get_total_count(self
):
291 """Get statistics for episodes in all channels
293 Returns a tuple (total, deleted, new, downloaded, unplayed)
295 total
, deleted
, new
, downloaded
, unplayed
= 0, 0, 0, 0, 0
297 cur
= self
.cursor(lock
=True)
298 cur
.execute('SELECT COUNT(*), state, played FROM episodes GROUP BY state, played')
299 for count
, state
, played
in cur
:
301 if state
== gpodder
.STATE_DELETED
:
303 elif state
== gpodder
.STATE_NORMAL
and not played
:
305 elif state
== gpodder
.STATE_DOWNLOADED
and not played
:
308 elif state
== gpodder
.STATE_DOWNLOADED
:
314 return (total
, deleted
, new
, downloaded
, unplayed
)
316 def load_channels(self
, factory
=None, url
=None):
318 Returns channel descriptions as a list of dictionaries or objects,
319 returned by the factory() function, which receives the dictionary
320 as the only argument.
323 self
.log("load_channels()")
325 cur
= self
.cursor(lock
=True)
326 cur
.execute('SELECT * FROM %s ORDER BY title COLLATE UNICODE' % self
.TABLE_CHANNELS
)
329 keys
= list(desc
[0] for desc
in cur
.description
)
331 channel
= dict(zip(keys
, row
))
333 if url
is None or url
== channel
['url']:
335 result
.append(channel
)
337 result
.append(factory(channel
, self
))
344 def save_channel(self
, c
):
345 self
._save
_object
(c
, self
.TABLE_CHANNELS
, self
.SCHEMA_CHANNELS
)
347 def delete_channel(self
, channel
):
348 assert channel
.id is not None
350 cur
= self
.cursor(lock
=True)
351 self
.log("delete_channel(%d), %s", channel
.id, channel
.url
)
353 cur
.execute("DELETE FROM channels WHERE id = ?", (channel
.id, ))
354 cur
.execute("DELETE FROM episodes WHERE channel_id = ?", (channel
.id, ))
361 def __read_episodes(self
, factory
=None, where
=None, params
=None, commit
=True):
362 sql
= "SELECT * FROM episodes"
365 sql
= "%s %s" % (sql
, where
)
370 cur
= self
.cursor(lock
=True)
371 cur
.execute(sql
, params
)
374 keys
= list(desc
[0] for desc
in cur
.description
)
376 episode
= dict(zip(keys
, row
))
377 if episode
['state'] is None:
378 episode
['state'] = gpodder
.STATE_NORMAL
380 result
.append(episode
)
382 result
.append(factory(episode
, self
))
388 def load_episodes(self
, channel
, factory
=None, limit
=1000, state
=None):
389 assert channel
.id is not None
391 self
.log("load_episodes((%d)%s)", channel
.id, channel
.url
)
394 return self
.__read
_episodes
(factory
= factory
, where
= """
395 WHERE channel_id = ? AND state = ? OR id IN
396 (SELECT id FROM episodes WHERE channel_id = ?
397 ORDER BY pubDate DESC LIMIT ?)
398 ORDER BY pubDate DESC
399 """, params
= (channel
.id, gpodder
.STATE_DOWNLOADED
, channel
.id, limit
, ))
401 return self
.__read
_episodes
(factory
= factory
, where
= " WHERE channel_id = ? AND state = ? ORDER BY pubDate DESC LIMIT ?", params
= (channel
.id, state
, limit
, ))
403 def load_episode(self
, url
, factory
=None):
404 self
.log("load_episode(%s)", url
)
405 list = self
.__read
_episodes
(factory
=factory
, where
=' WHERE url=? LIMIT ?', params
=(url
, 1))
411 def save_episode(self
, e
):
415 self
.log('Refusing to save an episode without guid: %s', e
)
418 self
._save
_object
(e
, self
.TABLE_EPISODES
, self
.SCHEMA_EPISODES
)
420 def _save_object(self
, o
, table
, schema
):
424 columns
= [name
for name
, typ
in schema
if name
!= 'id']
425 values
= [getattr(o
, name
) for name
in columns
]
428 qmarks
= ', '.join('?'*len(columns
))
429 sql
= 'INSERT INTO %s (%s) VALUES (%s)' % (table
, ', '.join(columns
), qmarks
)
430 cur
.execute(sql
, values
)
433 qmarks
= ', '.join('%s = ?' % name
for name
in columns
)
435 sql
= 'UPDATE %s SET %s WHERE id = ?' % (table
, qmarks
)
436 cur
.execute(sql
, values
)
438 log('Cannot save %s to %s: %s', o
, table
, e
, sender
=self
, traceback
=True)
443 def update_episode_state(self
, episode
):
444 assert episode
.id is not None
446 cur
= self
.cursor(lock
=True)
447 cur
.execute('UPDATE episodes SET state = ?, played = ?, locked = ? WHERE id = ?', (episode
.state
, episode
.is_played
, episode
.is_locked
, episode
.id))
451 def update_channel_lock(self
, channel
):
452 self
.log("update_channel_lock(%s, locked=%s)", channel
.url
, channel
.channel_is_locked
)
454 cur
= self
.cursor(lock
=True)
455 cur
.execute("UPDATE channels SET channel_is_locked = ? WHERE url = ?", (channel
.channel_is_locked
, channel
.url
, ))
460 def __get__(self
, sql
, params
=None):
462 Returns the first cell of a query result, useful for COUNT()s.
464 cur
= self
.cursor(lock
=True)
466 self
.log("__get__(): %s", sql
)
471 cur
.execute(sql
, params
)
482 def channel_foldername_exists(self
, foldername
):
484 Returns True if a foldername for a channel exists.
487 return self
.__get
__("SELECT id FROM channels WHERE foldername = ?", (foldername
,)) is not None
489 def remove_foldername_if_deleted_channel(self
, foldername
):
490 cur
= self
.cursor(lock
=True)
491 self
.log('Setting foldername=NULL for folder "%s"', foldername
)
492 cur
.execute('UPDATE channels SET foldername=NULL ' + \
493 'WHERE foldername=? AND deleted=1', (foldername
,))
497 def episode_filename_exists(self
, filename
):
499 Returns True if a filename for an episode exists.
502 return self
.__get
__("SELECT id FROM episodes WHERE filename = ?", (filename
,)) is not None
504 def get_last_pubdate(self
, channel
):
506 Look up the highest "pubDate" value for
507 all episodes of the given podcast.
509 return self
.__get
__("""
510 SELECT MAX(pubDate) FROM episodes
514 def force_last_new(self
, channel
):
515 old
= self
.__get
__("""SELECT COUNT(*) FROM episodes WHERE channel_id = ?
516 AND state IN (?, ?)""", (channel
.id, gpodder
.STATE_DOWNLOADED
,
517 gpodder
.STATE_DELETED
))
519 cur
= self
.cursor(lock
=True)
521 self
.log("force_last_new((%d)%s)", channel
.id, channel
.url
)
525 UPDATE episodes SET played = 1 WHERE channel_id = ?
526 AND played = 0 AND pubDate < (SELECT MAX(pubDate)
527 FROM episodes WHERE channel_id = ? AND state IN (?, ?))""",
528 (channel
.id, channel
.id, gpodder
.STATE_DOWNLOADED
,
529 gpodder
.STATE_DELETED
, ))
532 UPDATE episodes SET played = 1 WHERE channel_id = ?
533 AND pubDate <> (SELECT MAX(pubDate) FROM episodes
534 WHERE channel_id = ?)""", (channel
.id, channel
.id, ))
539 def upgrade_table(self
, table_name
, fields
, index_list
):
541 Creates a table or adds fields to it.
543 cur
= self
.cursor(lock
=True)
545 cur
.execute("PRAGMA table_info(%s)" % table_name
)
546 available
= cur
.fetchall()
549 log('Creating table %s', table_name
, sender
=self
)
550 columns
= ', '.join(' '.join(f
) for f
in fields
)
551 sql
= "CREATE TABLE %s (%s)" % (table_name
, columns
)
554 # Table info columns, as returned by SQLite
555 ID
, NAME
, TYPE
, NULL
, DEFAULT
= range(5)
556 existing
= set(column
[NAME
] for column
in available
)
558 for field_name
, field_type
in fields
:
559 if field_name
not in existing
:
560 log('Adding column: %s.%s (%s)', table_name
, field_name
, field_type
, sender
=self
)
561 cur
.execute("ALTER TABLE %s ADD COLUMN %s %s" % (table_name
, field_name
, field_type
))
563 for column
, typ
in index_list
:
564 cur
.execute('CREATE %s IF NOT EXISTS idx_%s ON %s (%s)' % (typ
, column
, table_name
, column
))
568 def delete_empty_episodes(self
, channel_id
):
570 Deletes episodes which haven't been downloaded.
571 Currently used when a channel URL is changed.
573 cur
= self
.cursor(lock
=True)
574 log('Deleting old episodes from channel #%d' % channel_id
, sender
=self
)
575 cur
.execute("DELETE FROM episodes WHERE channel_id = ? AND state != ?", (channel_id
, gpodder
.STATE_DOWNLOADED
, ))
578 def delete_episode_by_guid(self
, guid
, channel_id
):
580 Deletes episodes that have a specific GUID for
581 a given channel. Used after feed updates for
582 episodes that have disappeared from the feed.
584 cur
= self
.cursor(lock
=True)
585 cur
.execute('DELETE FROM episodes WHERE channel_id = ? AND guid = ?', \