[Models] add stub Podcast.display_title
[mygpo.git] / mygpo / podcasts / models.py
blobbece3580b5d19964cc5418cd2efe38735ccb0a23
1 from __future__ import unicode_literals
3 import re
4 from datetime import datetime
6 from django.db import models, connection, transaction, IntegrityError
7 from django.contrib.contenttypes.models import ContentType
8 from django.contrib.contenttypes.fields import GenericRelation
9 from django.contrib.contenttypes import generic
11 from uuidfield import UUIDField
13 from mygpo import utils
15 import logging
16 logger = logging.getLogger(__name__)
19 # default podcast update interval in hours
20 DEFAULT_UPDATE_INTERVAL = 7 * 24
22 # minium podcast update interval in hours
23 MIN_UPDATE_INTERVAL = 5
25 # every podcast should be updated at least once a month
26 MAX_UPDATE_INTERVAL = 24 * 30
29 class UUIDModel(models.Model):
30 """ Models that have an UUID as primary key """
32 id = UUIDField(primary_key=True)
34 class Meta:
35 abstract = True
37 def get_id(self):
38 """ String representation of the ID """
39 return self.id.hex
42 class TitleModel(models.Model):
43 """ Model that has a title """
45 title = models.CharField(max_length=1000, null=False, blank=True,
46 db_index=True)
47 subtitle = models.TextField(null=False, blank=True)
49 def __str__(self):
50 return self.title.encode('ascii', errors='replace')
52 def __unicode(self):
53 return self.title
55 class Meta:
56 abstract = True
59 class DescriptionModel(models.Model):
60 """ Model that has a description """
62 description = models.TextField(null=False, blank=True)
64 class Meta:
65 abstract = True
68 class LinkModel(models.Model):
69 """ Model that has a link """
71 link = models.URLField(null=True, max_length=1000)
73 class Meta:
74 abstract = True
77 class LanguageModel(models.Model):
78 """ Model that has a language """
80 language = models.CharField(max_length=10, null=True, blank=False,
81 db_index=True)
83 class Meta:
84 abstract = True
87 class LastUpdateModel(models.Model):
88 """ Model with timestamp of last update from its source """
90 # date and time at which the model has last been updated from its source
91 # (eg a podcast feed). None means that the object has been created as a
92 # stub, without information from the source.
93 last_update = models.DateTimeField(null=True)
95 class Meta:
96 abstract = True
99 class UpdateInfoModel(models.Model):
101 # this does not use "auto_now_add=True" so that data
102 # can be migrated with its creation timestamp intact; it can be
103 # switched on after the migration is complete
104 created = models.DateTimeField(default=datetime.utcnow)
105 modified = models.DateTimeField(auto_now=True)
107 class Meta:
108 abstract = True
111 class LicenseModel(models.Model):
112 # URL to a license (usually Creative Commons)
113 license = models.CharField(max_length=100, null=True, blank=False,
114 db_index=True)
116 class Meta:
117 abstract = True
120 class FlattrModel(models.Model):
121 # A Flattr payment URL
122 flattr_url = models.URLField(null=True, blank=False, max_length=1000,
123 db_index=True)
125 class Meta:
126 abstract = True
129 class ContentTypesModel(models.Model):
130 # contains a comma-separated values of content types, eg 'audio,video'
131 content_types = models.CharField(max_length=20, null=False, blank=True)
133 class Meta:
134 abstract = True
137 class MergedIdsModel(models.Model):
139 class Meta:
140 abstract = True
143 class OutdatedModel(models.Model):
144 outdated = models.BooleanField(default=False, db_index=True)
146 class Meta:
147 abstract = True
150 class AuthorModel(models.Model):
151 author = models.CharField(max_length=350, null=True, blank=True)
153 class Meta:
154 abstract = True
157 class GenericManager(models.Manager):
158 """ Generic manager methods """
160 def count_fast(self):
161 """ Fast approximate count of all model instances
163 PostgreSQL is slow when counting records without an index. This is a
164 workaround which only gives approximate results. see:
165 http://wiki.postgresql.org/wiki/Slow_Counting """
166 cursor = connection.cursor()
167 cursor.execute("select reltuples from pg_class where relname='%s';" %
168 self.model._meta.db_table)
169 row = cursor.fetchone()
170 return int(row[0])
173 class UrlsMixin(models.Model):
174 """ Methods for working with URL objects """
176 urls = GenericRelation('URL', related_query_name='urls')
178 class Meta:
179 abstract = True
181 @property
182 def url(self):
183 """ The main URL of the model """
184 # We could also use self.urls.first() here, but this would result in a
185 # different query and would render a .prefetch_related('urls') useless
186 # The assumption is that we will never have loads of URLS, so
187 # fetching all won't hurt
188 urls = list(self.urls.all())
189 return urls[0].url if urls else None
191 def add_missing_urls(self, new_urls):
192 """ Adds missing URLS from new_urls
194 The order of existing URLs is not changed """
195 existing_urls = self.urls.all()
196 next_order = max([-1] + [u.order for u in existing_urls]) + 1
197 existing_urls = [u.url for u in existing_urls]
199 for url in new_urls:
200 if url in existing_urls:
201 continue
203 URL.objects.create(url=url,
204 order=next_order,
205 scope=self.scope,
206 content_object=obj,
209 next_order += 1
212 class SlugsMixin(models.Model):
213 """ Methods for working with Slug objects """
215 slugs = GenericRelation('Slug', related_query_name='slugs')
217 class Meta:
218 abstract = True
220 @property
221 def slug(self):
222 """ The main slug of the podcast
224 TODO: should be retrieved from a (materialized) view """
226 # We could also use self.slugs.first() here, but this would result in a
227 # different query and would render a .prefetch_related('slugs') useless
228 # The assumption is that we will never have loads of slugs, so
229 # fetching all won't hurt
230 slugs = list(self.slugs.all())
231 slug = slugs[0].slug if slugs else None
232 logger.debug('Found slugs %r, picking %r', slugs, slug)
233 return slug
236 def add_slug(self, slug):
237 """ Adds a (non-cannonical) slug """
239 if not slug:
240 raise ValueError("'%s' is not a valid slug" % slug)
242 existing_slugs = self.slugs.all()
244 # check if slug already exists
245 if slug in [s.slug for s in existing_slugs]:
246 return
248 max_order = max([-1] + [s.order for s in existing_slugs])
249 next_order = max_order + 1
250 Slug.objects.create(scope=self.scope,
251 slug=slug,
252 content_object=self,
253 order=next_order,
256 def set_slug(self, slug):
257 """ Sets the canonical slug """
259 slugs = [s.slug for s in self.slugs.all()]
260 if slug in slugs:
261 slugs.remove(slug)
263 slugs.insert(0, slug)
264 self.set_slugs(slugs)
267 def remove_slug(self, slug):
268 """ Removes a slug """
269 Slug.objects.filter(
270 slug=slug,
271 content_type=ContentType.objects.get_for_model(self),
272 object_id=self.id,
273 ).delete()
276 def set_slugs(self, slugs):
277 """ Update the object's slugs to the given list
279 'slugs' should be a list of strings. Slugs that do not exist are
280 created. Existing slugs that are not in the 'slugs' list are
281 deleted. """
282 existing = {s.slug: s for s in self.slugs.all()}
283 logger.info('%d existing slugs', len(existing))
285 logger.info('%d new slugs', len(slugs))
287 with transaction.atomic():
288 max_order = max([s.order for s in existing.values()] + [len(slugs)])
289 logger.info('Renumbering slugs starting from %d', max_order+1)
290 for n, slug in enumerate(existing.values(), max_order+1):
291 slug.order = n
292 slug.save()
294 logger.info('%d existing slugs', len(existing))
296 for n, slug in enumerate(slugs):
297 try:
298 s = existing.pop(slug)
299 logger.info('Updating new slug %d: %s', n, slug)
300 s.order = n
301 s.save()
302 except KeyError:
303 logger.info('Creating new slug %d: %s', n, slug)
304 try:
305 Slug.objects.create(slug=slug,
306 content_object=self,
307 order=n,
308 scope=self.scope,
310 except IntegrityError as ie:
311 logger.warn('Could not create Slug for %s: %s', self, ie)
313 with transaction.atomic():
314 delete = [s.pk for s in existing.values()]
315 logger.info('Deleting %d slugs', len(delete))
316 Slug.objects.filter(id__in=delete).delete()
320 class MergedUUIDsMixin(models.Model):
321 """ Methods for working with MergedUUID objects """
323 merged_uuids = GenericRelation('MergedUUID',
324 related_query_name='merged_uuids')
326 class Meta:
327 abstract = True
330 class MergedUUIDQuerySet(models.QuerySet):
331 """ QuerySet for Models inheriting from MergedUUID """
333 def get_by_any_id(self, id):
334 """ Find am Episode by its own ID or by a merged ID """
335 # TODO: should this be done in the model?
336 try:
337 return self.get(id=id)
338 except self.model.DoesNotExist:
339 return self.get(merged_uuids__uuid=id)
342 class TagsMixin(models.Model):
343 """ Methods for working with Tag objects """
345 tags = GenericRelation('Tag', related_query_name='tags')
347 class Meta:
348 abstract = True
351 class OrderedModel(models.Model):
352 """ A model that can be ordered
354 The implementing Model must make sure that 'order' is sufficiently unique
357 order = models.PositiveSmallIntegerField()
359 class Meta:
360 abstract = True
361 ordering = ['order']
364 class PodcastGroup(UUIDModel, TitleModel, SlugsMixin):
365 """ Groups multiple podcasts together """
367 @property
368 def scope(self):
369 """ A podcast group is always in the global scope """
370 return ''
373 class PodcastQuerySet(MergedUUIDQuerySet):
374 """ Custom queries for Podcasts """
376 def random(self):
377 """ Random podcasts
379 Excludes podcasts with missing title to guarantee some
380 minimum quality of the results """
382 # Using PostgreSQL's RANDOM() is very expensive, so we're generating a
383 # random uuid and query podcasts with a higher ID
384 # This returns podcasts in order of their ID, but the assumption is
385 # that usually only one podcast will be required anyway
386 import uuid
387 ruuid = uuid.uuid1()
388 return self.exclude(title='').filter(id__gt=ruuid)
390 def flattr(self):
391 """ Podcasts providing Flattr information """
392 return self.exclude(flattr_url__isnull=True)
394 def license(self, license_url=None):
395 """ Podcasts with any / the given license """
396 if license_url:
397 return self.filter(license=license_url)
398 else:
399 return self.exclude(license__isnull=True)
401 def order_by_next_update(self):
402 """ Sort podcasts by next scheduled update """
403 NEXTUPDATE = "last_update + (update_interval || ' hours')::INTERVAL"
404 q = self.extra(select={'next_update': NEXTUPDATE})
405 return q.order_by('next_update')
407 def toplist(self, language=None):
408 toplist = self
409 if language:
410 toplist = toplist.filter(language=language)
412 return toplist.order_by('-subscribers')
415 class PodcastManager(GenericManager):
416 """ Manager for the Podcast model """
418 def get_queryset(self):
419 return PodcastQuerySet(self.model, using=self._db)
421 @transaction.atomic
422 def get_or_create_for_url(self, url, defaults={}):
423 # TODO: where to specify how uuid is created?
424 import uuid
425 defaults.update({
426 'id': uuid.uuid1().hex,
428 podcast, created = self.get_or_create(urls__url=url, defaults=defaults)
430 if created:
431 url = URL.objects.create(url=url,
432 order=0,
433 scope='',
434 content_object=podcast,
436 return podcast
439 class Podcast(UUIDModel, TitleModel, DescriptionModel, LinkModel,
440 LanguageModel, LastUpdateModel, UpdateInfoModel, LicenseModel,
441 FlattrModel, ContentTypesModel, MergedIdsModel, OutdatedModel,
442 AuthorModel, UrlsMixin, SlugsMixin, TagsMixin, MergedUUIDsMixin):
443 """ A Podcast """
445 logo_url = models.URLField(null=True, max_length=1000)
446 group = models.ForeignKey(PodcastGroup, null=True,
447 on_delete=models.PROTECT)
448 group_member_name = models.CharField(max_length=30, null=True, blank=False)
450 # if p1 is related to p2, p2 is also related to p1
451 related_podcasts = models.ManyToManyField('self', symmetrical=True)
453 subscribers = models.PositiveIntegerField(default=0)
454 restrictions = models.CharField(max_length=20, null=False, blank=True,
455 default='')
456 common_episode_title = models.CharField(max_length=100, null=False, blank=True)
457 new_location = models.URLField(max_length=1000, null=True, blank=False)
458 latest_episode_timestamp = models.DateTimeField(null=True)
459 episode_count = models.PositiveIntegerField(default=0)
460 hub = models.URLField(null=True)
461 twitter = models.CharField(max_length=15, null=True, blank=False)
462 update_interval = models.PositiveSmallIntegerField(null=False,
463 default=DEFAULT_UPDATE_INTERVAL)
465 objects = PodcastManager()
467 def subscriber_count(self):
468 # TODO: implement
469 return 0
471 def group_with(self, other, grouptitle, myname, othername):
472 """ Group the podcast with another one """
473 # TODO: move to PodcastGroup?
475 if bool(self.group) and (self.group == other.group):
476 # they are already grouped
477 return
479 group1 = self.group
480 group2 = other.group
482 if group1 and group2:
483 raise ValueError('both podcasts already are in different groups')
485 elif not (group1 or group2):
486 # Form a new group
487 import uuid
488 group = PodcastGroup.objects.create(id=uuid.uuid1(), title=grouptitle)
489 self.group_member_name = myname
490 self.group = group
491 self.save()
493 other.group_member_name = othername
494 other.group = group
495 other.save()
497 return group
499 elif group1:
500 # add other to self's group
501 other.group_member_name = othername
502 other.group = group1
503 other.save()
504 return group1
506 else:
507 # add self to other's group
508 self.group_member_name = myname
509 self.group = group2
510 self.save()
511 return group2
514 def subscribe_targets(self, user):
516 returns all Devices and SyncGroups on which this podcast can be subsrbied. This excludes all
517 devices/syncgroups on which the podcast is already subscribed
519 targets = []
521 subscriptions_by_devices = user.get_subscriptions_by_device()
523 for group in user.get_grouped_devices():
525 if group.is_synced:
527 dev = group.devices[0]
529 if not self.get_id() in subscriptions_by_devices[dev.id]:
530 targets.append(group.devices)
532 else:
533 for device in group.devices:
534 if not self.get_id() in subscriptions_by_devices[device.id]:
535 targets.append(device)
537 return targets
540 def get_common_episode_title(self, num_episodes=100):
542 if self.common_episode_title:
543 return self.common_episode_title
545 episodes = self.episode_set.all()[:num_episodes]
547 # We take all non-empty titles
548 titles = filter(None, (e.title for e in episodes))
550 # there can not be a "common" title of a single title
551 if len(titles) < 2:
552 return None
554 # get the longest common substring
555 common_title = utils.longest_substr(titles)
557 # but consider only the part up to the first number. Otherwise we risk
558 # removing part of the number (eg if a feed contains episodes 100-199)
559 common_title = re.search(r'^\D*', common_title).group(0)
561 if len(common_title.strip()) < 2:
562 return None
564 return common_title
567 def get_episode_before(self, episode):
568 if not episode.released:
569 return None
570 return self.episode_set.filter(released__lt=episode.released).latest()
572 def get_episode_after(self, episode):
573 if not episode.released:
574 return None
575 return self.episode_set.filter(released__gt=episode.released).first()
577 @property
578 def scope(self):
579 """ A podcast is always in the global scope """
580 return ''
582 @property
583 def display_title(self):
584 # TODO
585 return self.title
588 class EpisodeQuerySet(MergedUUIDQuerySet):
589 """ QuerySet for Episodes """
591 def toplist(self, language=None):
592 toplist = self
593 if language:
594 toplist = toplist.filter(language=language)
596 return toplist.order_by('-listeners')
598 def by_released(self):
599 """ Sorts by release date, sorting missing release date last
601 When sorting by release date, we want to list those with the most
602 revent release date first. At the end the episodes without release date
603 should be sorted. """
604 return self.extra(select={
605 'has_released': 'released IS NOT NULL',
606 }).\
607 order_by('-has_released', '-released')
610 class EpisodeManager(GenericManager):
611 """ Custom queries for Episodes """
613 def get_queryset(self):
614 return EpisodeQuerySet(self.model, using=self._db)
616 @transaction.atomic
617 def get_or_create_for_url(self, podcast, url, defaults={}):
618 # TODO: where to specify how uuid is created?
619 import uuid
620 defaults.update({
621 'id': uuid.uuid1().hex,
623 episode, created = self.get_or_create(podcast=podcast,
624 urls__url=url,
625 defaults=defaults,
628 if created:
629 url = URL.objects.create(url=url,
630 order=0,
631 scope=podcast.get_id(),
632 content_object=episode,
634 return episode
636 class Episode(UUIDModel, TitleModel, DescriptionModel, LinkModel,
637 LanguageModel, LastUpdateModel, UpdateInfoModel, LicenseModel,
638 FlattrModel, ContentTypesModel, MergedIdsModel, OutdatedModel,
639 AuthorModel, UrlsMixin, SlugsMixin, MergedUUIDsMixin):
640 """ An episode """
642 guid = models.CharField(max_length=200, null=True)
643 content = models.TextField()
644 released = models.DateTimeField(null=True, db_index=True)
645 duration = models.PositiveIntegerField(null=True)
646 filesize = models.BigIntegerField(null=True)
647 mimetypes = models.CharField(max_length=200)
648 podcast = models.ForeignKey(Podcast, on_delete=models.PROTECT)
649 listeners = models.PositiveIntegerField(null=True, db_index=True)
651 objects = EpisodeManager()
653 class Meta:
654 ordering = ['-released']
656 @property
657 def scope(self):
658 """ An episode's scope is its podcast """
659 return self.podcast_id.hex
661 def get_short_title(self, common_title):
662 """ Title when used within the podcast's context """
663 if not self.title or not common_title:
664 return None
666 title = self.title.replace(common_title, '').strip()
667 title = re.sub(r'^[\W\d]+', '', title)
668 return title
671 def get_episode_number(self, common_title):
672 """ Number of the episode """
673 if not self.title or not common_title:
674 return None
676 title = self.title.replace(common_title, '').strip()
677 match = re.search(r'^\W*(\d+)', title)
678 if not match:
679 return None
681 return int(match.group(1))
684 class ScopedModel(models.Model):
685 """ A model that belongs to some scope, usually for limited uniqueness
687 scope does not allow null values, because null is not equal to null in SQL.
688 It could therefore not be used in unique constraints. """
690 # A slug / URL is unique within a scope; no two podcasts can have the same
691 # URL (scope ''), and no two episdoes of the same podcast (scope =
692 # podcast-ID) can have the same URL
693 scope = models.CharField(max_length=32, null=False, blank=True,
694 db_index=True)
696 class Meta:
697 abstract = True
700 class URL(OrderedModel, ScopedModel):
701 """ Podcasts and Episodes can have multiple URLs
703 URLs are ordered, and the first slug is considered the canonical one """
705 url = models.URLField(max_length=2048)
707 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
708 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
709 object_id = UUIDField()
710 content_object = generic.GenericForeignKey('content_type', 'object_id')
712 class Meta(OrderedModel.Meta):
713 unique_together = (
714 # a URL is unique per scope
715 ('url', 'scope'),
717 # URLs of an object must be ordered, so that no two slugs of one
718 # object have the same order key
719 ('content_type', 'object_id', 'order'),
722 verbose_name = 'URL'
723 verbose_name_plural = 'URLs'
726 class Tag(models.Model):
727 """ Tags any kind of Model
729 See also :class:`TagsMixin`
732 FEED = 1
733 DELICIOUS = 2
734 USER = 4
736 SOURCE_CHOICES = (
737 (FEED, 'Feed'),
738 (DELICIOUS, 'delicious'),
739 (USER, 'User'),
742 tag = models.SlugField()
743 source = models.PositiveSmallIntegerField(choices=SOURCE_CHOICES)
744 #user = models.ForeignKey(null=True)
746 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
747 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
748 object_id = UUIDField()
749 content_object = generic.GenericForeignKey('content_type', 'object_id')
751 class Meta:
752 unique_together = (
753 # a tag can only be assigned once from one source to one item
754 # TODO: add user to tuple
755 ('tag', 'source', 'content_type', 'object_id'),
759 class Slug(OrderedModel, ScopedModel):
760 """ Slug for any kind of Model
762 Slugs are ordered, and the first slug is considered the canonical one.
763 See also :class:`SlugsMixin`
766 slug = models.SlugField(max_length=150, db_index=True)
768 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
769 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
770 object_id = UUIDField()
771 content_object = generic.GenericForeignKey('content_type', 'object_id')
773 class Meta(OrderedModel.Meta):
774 unique_together = (
775 # a slug is unique per type; eg a podcast can have the same slug
776 # as an episode, but no two podcasts can have the same slug
777 ('slug', 'scope'),
779 # slugs of an object must be ordered, so that no two slugs of one
780 # object have the same order key
781 ('content_type', 'object_id', 'order'),
784 def __repr__(self):
785 return '{cls}(slug={slug}, order={order}, content_object={obj}'.format(
786 cls=self.__class__.__name__,
787 slug=self.slug,
788 order=self.order,
789 obj=self.content_object
793 class MergedUUID(models.Model):
794 """ If objects are merged their UUIDs are stored for later reference
796 see also :class:`MergedUUIDsMixin`
799 uuid = UUIDField(unique=True)
801 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
802 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
803 object_id = UUIDField()
804 content_object = generic.GenericForeignKey('content_type', 'object_id')
806 class Meta:
807 verbose_name = 'Merged UUID'
808 verbose_name_plural = 'Merged UUIDs'