[DB] Add index for Episode.listeners
[mygpo.git] / mygpo / podcasts / models.py
blobbc207fcbd667e26b0ad337f2b2c9652247e65fc6
1 from __future__ import unicode_literals
3 import re
4 from datetime import datetime
6 from django.db import models, connection, transaction, IntegrityError
7 from django.contrib.contenttypes.models import ContentType
8 from django.contrib.contenttypes.fields import GenericRelation
9 from django.contrib.contenttypes import generic
11 from uuidfield import UUIDField
13 import logging
14 logger = logging.getLogger(__name__)
17 # default podcast update interval in hours
18 DEFAULT_UPDATE_INTERVAL = 7 * 24
20 # minium podcast update interval in hours
21 MIN_UPDATE_INTERVAL = 5
23 # every podcast should be updated at least once a month
24 MAX_UPDATE_INTERVAL = 24 * 30
27 class UUIDModel(models.Model):
28 """ Models that have an UUID as primary key """
30 id = UUIDField(primary_key=True)
32 class Meta:
33 abstract = True
35 def get_id(self):
36 """ String representation of the ID """
37 return self.id.hex
40 class TitleModel(models.Model):
41 """ Model that has a title """
43 title = models.CharField(max_length=1000, null=False, blank=True,
44 db_index=True)
45 subtitle = models.TextField(null=False, blank=True)
47 def __str__(self):
48 return self.title.encode('ascii', errors='replace')
50 def __unicode(self):
51 return self.title
53 class Meta:
54 abstract = True
57 class DescriptionModel(models.Model):
58 """ Model that has a description """
60 description = models.TextField(null=False, blank=True)
62 class Meta:
63 abstract = True
66 class LinkModel(models.Model):
67 """ Model that has a link """
69 link = models.URLField(null=True, max_length=1000)
71 class Meta:
72 abstract = True
75 class LanguageModel(models.Model):
76 """ Model that has a language """
78 language = models.CharField(max_length=10, null=True, blank=False,
79 db_index=True)
81 class Meta:
82 abstract = True
85 class LastUpdateModel(models.Model):
86 """ Model with timestamp of last update from its source """
88 # date and time at which the model has last been updated from its source
89 # (eg a podcast feed). None means that the object has been created as a
90 # stub, without information from the source.
91 last_update = models.DateTimeField(null=True)
93 class Meta:
94 abstract = True
97 class UpdateInfoModel(models.Model):
99 # this does not use "auto_now_add=True" so that data
100 # can be migrated with its creation timestamp intact; it can be
101 # switched on after the migration is complete
102 created = models.DateTimeField(default=datetime.utcnow)
103 modified = models.DateTimeField(auto_now=True)
105 class Meta:
106 abstract = True
109 class LicenseModel(models.Model):
110 # URL to a license (usually Creative Commons)
111 license = models.CharField(max_length=100, null=True, blank=False,
112 db_index=True)
114 class Meta:
115 abstract = True
118 class FlattrModel(models.Model):
119 # A Flattr payment URL
120 flattr_url = models.URLField(null=True, blank=False, max_length=1000,
121 db_index=True)
123 class Meta:
124 abstract = True
127 class ContentTypesModel(models.Model):
128 # contains a comma-separated values of content types, eg 'audio,video'
129 content_types = models.CharField(max_length=20, null=False, blank=True)
131 class Meta:
132 abstract = True
135 class MergedIdsModel(models.Model):
137 class Meta:
138 abstract = True
141 class OutdatedModel(models.Model):
142 outdated = models.BooleanField(default=False, db_index=True)
144 class Meta:
145 abstract = True
148 class AuthorModel(models.Model):
149 author = models.CharField(max_length=350, null=True, blank=True)
151 class Meta:
152 abstract = True
155 class GenericManager(models.Manager):
156 """ Generic manager methods """
158 def count_fast(self):
159 """ Fast approximate count of all model instances
161 PostgreSQL is slow when counting records without an index. This is a
162 workaround which only gives approximate results. see:
163 http://wiki.postgresql.org/wiki/Slow_Counting """
164 cursor = connection.cursor()
165 cursor.execute("select reltuples from pg_class where relname='%s';" %
166 self.model._meta.db_table)
167 row = cursor.fetchone()
168 return int(row[0])
171 class UrlsMixin(models.Model):
172 """ Methods for working with URL objects """
174 urls = GenericRelation('URL', related_query_name='urls')
176 class Meta:
177 abstract = True
179 @property
180 def url(self):
181 """ The main URL of the model """
182 # We could also use self.urls.first() here, but this would result in a
183 # different query and would render a .prefetch_related('urls') useless
184 # The assumption is that we will never have loads of URLS, so
185 # fetching all won't hurt
186 urls = list(self.urls.all())
187 return urls[0].url if urls else None
189 def add_missing_urls(self, new_urls):
190 """ Adds missing URLS from new_urls
192 The order of existing URLs is not changed """
193 existing_urls = self.urls.all()
194 next_order = max([-1] + [u.order for u in existing_urls]) + 1
195 existing_urls = [u.url for u in existing_urls]
197 for url in new_urls:
198 if url in existing_urls:
199 continue
201 URL.objects.create(url=url,
202 order=next_order,
203 scope=self.scope,
204 content_object=obj,
207 next_order += 1
210 class SlugsMixin(models.Model):
211 """ Methods for working with Slug objects """
213 slugs = GenericRelation('Slug', related_query_name='slugs')
215 class Meta:
216 abstract = True
218 @property
219 def slug(self):
220 """ The main slug of the podcast
222 TODO: should be retrieved from a (materialized) view """
224 # We could also use self.slugs.first() here, but this would result in a
225 # different query and would render a .prefetch_related('slugs') useless
226 # The assumption is that we will never have loads of slugs, so
227 # fetching all won't hurt
228 slugs = list(self.slugs.all())
229 slug = slugs[0].slug if slugs else None
230 logger.debug('Found slugs %r, picking %r', slugs, slug)
231 return slug
234 def add_slug(self, slug):
235 """ Adds a (non-cannonical) slug """
237 if not slug:
238 raise ValueError("'%s' is not a valid slug" % slug)
240 existing_slugs = self.slugs.all()
242 # check if slug already exists
243 if slug in [s.slug for s in existing_slugs]:
244 return
246 max_order = max([-1] + [s.order for s in existing_slugs])
247 next_order = max_order + 1
248 Slug.objects.create(scope=self.scope,
249 slug=slug,
250 content_object=self,
251 order=next_order,
254 def set_slug(self, slug):
255 """ Sets the canonical slug """
257 slugs = [s.slug for s in self.slugs.all()]
258 if slug in slugs:
259 slugs.remove(slug)
261 slugs.insert(0, slug)
262 self.set_slugs(slugs)
265 def remove_slug(self, slug):
266 """ Removes a slug """
267 Slug.objects.filter(
268 slug=slug,
269 content_type=ContentType.objects.get_for_model(self),
270 object_id=self.id,
271 ).delete()
274 def set_slugs(self, slugs):
275 """ Update the object's slugs to the given list
277 'slugs' should be a list of strings. Slugs that do not exist are
278 created. Existing slugs that are not in the 'slugs' list are
279 deleted. """
280 existing = {s.slug: s for s in self.slugs.all()}
281 logger.info('%d existing slugs', len(existing))
283 logger.info('%d new slugs', len(slugs))
285 with transaction.atomic():
286 max_order = max([s.order for s in existing.values()] + [len(slugs)])
287 logger.info('Renumbering slugs starting from %d', max_order+1)
288 for n, slug in enumerate(existing.values(), max_order+1):
289 slug.order = n
290 slug.save()
292 logger.info('%d existing slugs', len(existing))
294 for n, slug in enumerate(slugs):
295 try:
296 s = existing.pop(slug)
297 logger.info('Updating new slug %d: %s', n, slug)
298 s.order = n
299 s.save()
300 except KeyError:
301 logger.info('Creating new slug %d: %s', n, slug)
302 try:
303 Slug.objects.create(slug=slug,
304 content_object=self,
305 order=n,
306 scope=self.scope,
308 except IntegrityError as ie:
309 logger.warn('Could not create Slug for %s: %s', self, ie)
311 with transaction.atomic():
312 delete = [s.pk for s in existing.values()]
313 logger.info('Deleting %d slugs', len(delete))
314 Slug.objects.filter(id__in=delete).delete()
318 class MergedUUIDsMixin(models.Model):
319 """ Methods for working with MergedUUID objects """
321 merged_uuids = GenericRelation('MergedUUID',
322 related_query_name='merged_uuids')
324 class Meta:
325 abstract = True
328 class MergedUUIDQuerySet(models.QuerySet):
329 """ QuerySet for Models inheriting from MergedUUID """
331 def get_by_any_id(self, id):
332 """ Find am Episode by its own ID or by a merged ID """
333 # TODO: should this be done in the model?
334 try:
335 return self.get(id=id)
336 except self.model.DoesNotExist:
337 return self.get(merged_uuids__uuid=id)
340 class TagsMixin(models.Model):
341 """ Methods for working with Tag objects """
343 tags = GenericRelation('Tag', related_query_name='tags')
345 class Meta:
346 abstract = True
349 class OrderedModel(models.Model):
350 """ A model that can be ordered
352 The implementing Model must make sure that 'order' is sufficiently unique
355 order = models.PositiveSmallIntegerField()
357 class Meta:
358 abstract = True
359 ordering = ['order']
362 class PodcastGroup(UUIDModel, TitleModel, SlugsMixin):
363 """ Groups multiple podcasts together """
365 @property
366 def scope(self):
367 """ A podcast group is always in the global scope """
368 return ''
371 class PodcastQuerySet(MergedUUIDQuerySet):
372 """ Custom queries for Podcasts """
374 def random(self):
375 """ Random podcasts
377 Excludes podcasts with missing title to guarantee some
378 minimum quality of the results """
380 # Using PostgreSQL's RANDOM() is very expensive, so we're generating a
381 # random uuid and query podcasts with a higher ID
382 # This returns podcasts in order of their ID, but the assumption is
383 # that usually only one podcast will be required anyway
384 import uuid
385 ruuid = uuid.uuid1()
386 return self.exclude(title='').filter(id__gt=ruuid)
388 def flattr(self):
389 """ Podcasts providing Flattr information """
390 return self.exclude(flattr_url__isnull=True)
392 def license(self, license_url=None):
393 """ Podcasts with any / the given license """
394 if license_url:
395 return self.filter(license=license_url)
396 else:
397 return self.exclude(license__isnull=True)
399 def order_by_next_update(self):
400 """ Sort podcasts by next scheduled update """
401 NEXTUPDATE = "last_update + (update_interval || ' hours')::INTERVAL"
402 q = self.extra(select={'next_update': NEXTUPDATE})
403 return q.order_by('next_update')
405 def toplist(self, language=None):
406 toplist = self
407 if language:
408 toplist = toplist.filter(language=language)
410 return toplist.order_by('-subscribers')
413 class PodcastManager(GenericManager):
414 """ Manager for the Podcast model """
416 def get_queryset(self):
417 return PodcastQuerySet(self.model, using=self._db)
419 @transaction.atomic
420 def get_or_create_for_url(self, url, defaults={}):
421 # TODO: where to specify how uuid is created?
422 import uuid
423 defaults.update({
424 'id': uuid.uuid1().hex,
426 podcast, created = self.get_or_create(urls__url=url, defaults=defaults)
428 if created:
429 url = URL.objects.create(url=url,
430 order=0,
431 scope='',
432 content_object=podcast,
434 return podcast
437 class Podcast(UUIDModel, TitleModel, DescriptionModel, LinkModel,
438 LanguageModel, LastUpdateModel, UpdateInfoModel, LicenseModel,
439 FlattrModel, ContentTypesModel, MergedIdsModel, OutdatedModel,
440 AuthorModel, UrlsMixin, SlugsMixin, TagsMixin, MergedUUIDsMixin):
441 """ A Podcast """
443 logo_url = models.URLField(null=True, max_length=1000)
444 group = models.ForeignKey(PodcastGroup, null=True,
445 on_delete=models.PROTECT)
446 group_member_name = models.CharField(max_length=30, null=True, blank=False)
448 # if p1 is related to p2, p2 is also related to p1
449 related_podcasts = models.ManyToManyField('self', symmetrical=True)
451 subscribers = models.PositiveIntegerField(default=0)
452 restrictions = models.CharField(max_length=20, null=False, blank=True,
453 default='')
454 common_episode_title = models.CharField(max_length=100, null=False, blank=True)
455 new_location = models.URLField(max_length=1000, null=True, blank=False)
456 latest_episode_timestamp = models.DateTimeField(null=True)
457 episode_count = models.PositiveIntegerField(default=0)
458 hub = models.URLField(null=True)
459 twitter = models.CharField(max_length=15, null=True, blank=False)
460 update_interval = models.PositiveSmallIntegerField(null=False,
461 default=DEFAULT_UPDATE_INTERVAL)
463 objects = PodcastManager()
465 def subscriber_count(self):
466 # TODO: implement
467 return 0
469 def group_with(self, other, grouptitle, myname, othername):
470 """ Group the podcast with another one """
471 # TODO: move to PodcastGroup?
473 if bool(self.group) and (self.group == other.group):
474 # they are already grouped
475 return
477 group1 = self.group
478 group2 = other.group
480 if group1 and group2:
481 raise ValueError('both podcasts already are in different groups')
483 elif not (group1 or group2):
484 # Form a new group
485 import uuid
486 group = PodcastGroup.objects.create(id=uuid.uuid1(), title=grouptitle)
487 self.group_member_name = myname
488 self.group = group
489 self.save()
491 other.group_member_name = othername
492 other.group = group
493 other.save()
495 return group
497 elif group1:
498 # add other to self's group
499 other.group_member_name = othername
500 other.group = group1
501 other.save()
502 return group1
504 else:
505 # add self to other's group
506 self.group_member_name = myname
507 self.group = group2
508 self.save()
509 return group2
512 def subscribe_targets(self, user):
514 returns all Devices and SyncGroups on which this podcast can be subsrbied. This excludes all
515 devices/syncgroups on which the podcast is already subscribed
517 targets = []
519 subscriptions_by_devices = user.get_subscriptions_by_device()
521 for group in user.get_grouped_devices():
523 if group.is_synced:
525 dev = group.devices[0]
527 if not self.get_id() in subscriptions_by_devices[dev.id]:
528 targets.append(group.devices)
530 else:
531 for device in group.devices:
532 if not self.get_id() in subscriptions_by_devices[device.id]:
533 targets.append(device)
535 return targets
538 def get_common_episode_title(self, num_episodes=100):
540 if self.common_episode_title:
541 return self.common_episode_title
543 episodes = self.episode_set.all()[:num_episodes]
545 # We take all non-empty titles
546 titles = filter(None, (e.title for e in episodes))
548 # there can not be a "common" title of a single title
549 if len(titles) < 2:
550 return None
552 # get the longest common substring
553 common_title = utils.longest_substr(titles)
555 # but consider only the part up to the first number. Otherwise we risk
556 # removing part of the number (eg if a feed contains episodes 100-199)
557 common_title = re.search(r'^\D*', common_title).group(0)
559 if len(common_title.strip()) < 2:
560 return None
562 return common_title
565 def get_episode_before(self, episode):
566 if not episode.released:
567 return None
568 return self.episode_set.filter(released__lt=episode.released).latest()
570 def get_episode_after(self, episode):
571 if not episode.released:
572 return None
573 return self.episode_set.filter(released__gt=episode.released).first()
575 @property
576 def scope(self):
577 """ A podcast is always in the global scope """
578 return ''
581 class EpisodeQuerySet(MergedUUIDQuerySet):
582 """ QuerySet for Episodes """
584 def toplist(self, language=None):
585 toplist = self
586 if language:
587 toplist = toplist.filter(language=language)
589 return toplist.order_by('-listeners')
592 class EpisodeManager(GenericManager):
593 """ Custom queries for Episodes """
595 def get_queryset(self):
596 return EpisodeQuerySet(self.model, using=self._db)
598 @transaction.atomic
599 def get_or_create_for_url(self, podcast, url, defaults={}):
600 # TODO: where to specify how uuid is created?
601 import uuid
602 defaults.update({
603 'id': uuid.uuid1().hex,
605 episode, created = self.get_or_create(podcast=podcast,
606 urls__url=url,
607 defaults=defaults,
610 if created:
611 url = URL.objects.create(url=url,
612 order=0,
613 scope=podcast.get_id(),
614 content_object=episode,
616 return episode
618 class Episode(UUIDModel, TitleModel, DescriptionModel, LinkModel,
619 LanguageModel, LastUpdateModel, UpdateInfoModel, LicenseModel,
620 FlattrModel, ContentTypesModel, MergedIdsModel, OutdatedModel,
621 AuthorModel, UrlsMixin, SlugsMixin, MergedUUIDsMixin):
622 """ An episode """
624 guid = models.CharField(max_length=200, null=True)
625 content = models.TextField()
626 released = models.DateTimeField(null=True, db_index=True)
627 duration = models.PositiveIntegerField(null=True)
628 filesize = models.BigIntegerField(null=True)
629 mimetypes = models.CharField(max_length=200)
630 podcast = models.ForeignKey(Podcast, on_delete=models.PROTECT)
631 listeners = models.PositiveIntegerField(null=True, db_index=True)
633 objects = EpisodeManager()
635 class Meta:
636 ordering = ['-released']
638 @property
639 def scope(self):
640 """ An episode's scope is its podcast """
641 return self.podcast_id.hex
643 def get_short_title(self, common_title):
644 """ Title when used within the podcast's context """
645 if not self.title or not common_title:
646 return None
648 title = self.title.replace(common_title, '').strip()
649 title = re.sub(r'^[\W\d]+', '', title)
650 return title
653 def get_episode_number(self, common_title):
654 """ Number of the episode """
655 if not self.title or not common_title:
656 return None
658 title = self.title.replace(common_title, '').strip()
659 match = re.search(r'^\W*(\d+)', title)
660 if not match:
661 return None
663 return int(match.group(1))
666 class ScopedModel(models.Model):
667 """ A model that belongs to some scope, usually for limited uniqueness
669 scope does not allow null values, because null is not equal to null in SQL.
670 It could therefore not be used in unique constraints. """
672 # A slug / URL is unique within a scope; no two podcasts can have the same
673 # URL (scope ''), and no two episdoes of the same podcast (scope =
674 # podcast-ID) can have the same URL
675 scope = models.CharField(max_length=32, null=False, blank=True,
676 db_index=True)
678 class Meta:
679 abstract = True
682 class URL(OrderedModel, ScopedModel):
683 """ Podcasts and Episodes can have multiple URLs
685 URLs are ordered, and the first slug is considered the canonical one """
687 url = models.URLField(max_length=2048)
689 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
690 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
691 object_id = UUIDField()
692 content_object = generic.GenericForeignKey('content_type', 'object_id')
694 class Meta(OrderedModel.Meta):
695 unique_together = (
696 # a URL is unique per scope
697 ('url', 'scope'),
699 # URLs of an object must be ordered, so that no two slugs of one
700 # object have the same order key
701 ('content_type', 'object_id', 'order'),
704 verbose_name = 'URL'
705 verbose_name_plural = 'URLs'
708 class Tag(models.Model):
709 """ Tags any kind of Model
711 See also :class:`TagsMixin`
714 FEED = 1
715 DELICIOUS = 2
716 USER = 4
718 SOURCE_CHOICES = (
719 (FEED, 'Feed'),
720 (DELICIOUS, 'delicious'),
721 (USER, 'User'),
724 tag = models.SlugField()
725 source = models.PositiveSmallIntegerField(choices=SOURCE_CHOICES)
726 #user = models.ForeignKey(null=True)
728 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
729 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
730 object_id = UUIDField()
731 content_object = generic.GenericForeignKey('content_type', 'object_id')
733 class Meta:
734 unique_together = (
735 # a tag can only be assigned once from one source to one item
736 # TODO: add user to tuple
737 ('tag', 'source', 'content_type', 'object_id'),
741 class Slug(OrderedModel, ScopedModel):
742 """ Slug for any kind of Model
744 Slugs are ordered, and the first slug is considered the canonical one.
745 See also :class:`SlugsMixin`
748 slug = models.SlugField(max_length=150, db_index=True)
750 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
751 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
752 object_id = UUIDField()
753 content_object = generic.GenericForeignKey('content_type', 'object_id')
755 class Meta(OrderedModel.Meta):
756 unique_together = (
757 # a slug is unique per type; eg a podcast can have the same slug
758 # as an episode, but no two podcasts can have the same slug
759 ('slug', 'scope'),
761 # slugs of an object must be ordered, so that no two slugs of one
762 # object have the same order key
763 ('content_type', 'object_id', 'order'),
766 def __repr__(self):
767 return '{cls}(slug={slug}, order={order}, content_object={obj}'.format(
768 cls=self.__class__.__name__,
769 slug=self.slug,
770 order=self.order,
771 obj=self.content_object
775 class MergedUUID(models.Model):
776 """ If objects are merged their UUIDs are stored for later reference
778 see also :class:`MergedUUIDsMixin`
781 uuid = UUIDField(unique=True)
783 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
784 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
785 object_id = UUIDField()
786 content_object = generic.GenericForeignKey('content_type', 'object_id')
788 class Meta:
789 verbose_name = 'Merged UUID'
790 verbose_name_plural = 'Merged UUIDs'