[Podcasts] make Episode.duration a BigInteger
[mygpo.git] / mygpo / podcasts / models.py
blob8ecf149e96d3734a3941b5bc8651c3f411d429d2
1 from __future__ import unicode_literals
3 import re
4 from datetime import datetime
6 from django.conf import settings
7 from django.db import models, transaction, IntegrityError
8 from django.utils.translation import ugettext as _
9 from django.contrib.contenttypes.models import ContentType
10 from django.contrib.contenttypes.fields import GenericRelation
11 from django.contrib.contenttypes import generic
13 from uuidfield import UUIDField
15 from mygpo import utils
16 from mygpo.core.models import (TwitterModel, UUIDModel, GenericManager,
17 UpdateInfoModel, OrderedModel, OptionallyOrderedModel)
19 import logging
20 logger = logging.getLogger(__name__)
23 # default podcast update interval in hours
24 DEFAULT_UPDATE_INTERVAL = 7 * 24
26 # minium podcast update interval in hours
27 MIN_UPDATE_INTERVAL = 5
29 # every podcast should be updated at least once a month
30 MAX_UPDATE_INTERVAL = 24 * 30
33 class TitleModel(models.Model):
34 """ Model that has a title """
36 title = models.CharField(max_length=1000, null=False, blank=True,
37 db_index=True)
38 subtitle = models.TextField(null=False, blank=True)
40 def __str__(self):
41 return self.title.encode('ascii', errors='replace')
43 def __unicode(self):
44 return self.title
46 class Meta:
47 abstract = True
50 class DescriptionModel(models.Model):
51 """ Model that has a description """
53 description = models.TextField(null=False, blank=True)
55 class Meta:
56 abstract = True
59 class LinkModel(models.Model):
60 """ Model that has a link """
62 link = models.URLField(null=True, max_length=1000)
64 class Meta:
65 abstract = True
68 class LanguageModel(models.Model):
69 """ Model that has a language """
71 language = models.CharField(max_length=10, null=True, blank=False,
72 db_index=True)
74 class Meta:
75 abstract = True
78 class LastUpdateModel(models.Model):
79 """ Model with timestamp of last update from its source """
81 # date and time at which the model has last been updated from its source
82 # (eg a podcast feed). None means that the object has been created as a
83 # stub, without information from the source.
84 last_update = models.DateTimeField(null=True)
86 class Meta:
87 abstract = True
90 class LicenseModel(models.Model):
91 # URL to a license (usually Creative Commons)
92 license = models.CharField(max_length=100, null=True, blank=False,
93 db_index=True)
95 class Meta:
96 abstract = True
99 class FlattrModel(models.Model):
100 # A Flattr payment URL
101 flattr_url = models.URLField(null=True, blank=False, max_length=1000,
102 db_index=True)
104 class Meta:
105 abstract = True
108 class ContentTypesModel(models.Model):
109 # contains a comma-separated values of content types, eg 'audio,video'
110 content_types = models.CharField(max_length=20, null=False, blank=True)
112 class Meta:
113 abstract = True
116 class MergedIdsModel(models.Model):
118 class Meta:
119 abstract = True
122 class OutdatedModel(models.Model):
123 outdated = models.BooleanField(default=False, db_index=True)
125 class Meta:
126 abstract = True
129 class AuthorModel(models.Model):
130 author = models.CharField(max_length=350, null=True, blank=True)
132 class Meta:
133 abstract = True
136 class UrlsMixin(models.Model):
137 """ Methods for working with URL objects """
139 urls = GenericRelation('URL', related_query_name='urls')
141 class Meta:
142 abstract = True
144 @property
145 def url(self):
146 """ The main URL of the model """
147 # We could also use self.urls.first() here, but this would result in a
148 # different query and would render a .prefetch_related('urls') useless
149 # The assumption is that we will never have loads of URLS, so
150 # fetching all won't hurt
151 urls = list(self.urls.all())
152 return urls[0].url if urls else None
154 def add_missing_urls(self, new_urls):
155 """ Adds missing URLS from new_urls
157 The order of existing URLs is not changed """
158 existing_urls = self.urls.all()
159 next_order = max([-1] + [u.order for u in existing_urls]) + 1
160 existing_urls = [u.url for u in existing_urls]
162 for url in new_urls:
163 if url in existing_urls:
164 continue
166 try:
167 URL.objects.create(url=url,
168 order=next_order,
169 scope=self.scope,
170 content_object=self,
172 next_order += 1
173 except IntegrityError as ie:
174 logger.warn('Could not add URL: {err}'.format(err=ie))
175 continue
178 class SlugsMixin(models.Model):
179 """ Methods for working with Slug objects """
181 slugs = GenericRelation('Slug', related_query_name='slugs')
183 class Meta:
184 abstract = True
186 @property
187 def slug(self):
188 """ The main slug of the podcast
190 TODO: should be retrieved from a (materialized) view """
192 # We could also use self.slugs.first() here, but this would result in a
193 # different query and would render a .prefetch_related('slugs') useless
194 # The assumption is that we will never have loads of slugs, so
195 # fetching all won't hurt
196 slugs = list(self.slugs.all())
197 slug = slugs[0].slug if slugs else None
198 logger.debug('Found slugs %r, picking %r', slugs, slug)
199 return slug
202 def add_slug(self, slug):
203 """ Adds a (non-cannonical) slug """
205 if not slug:
206 raise ValueError("'%s' is not a valid slug" % slug)
208 existing_slugs = self.slugs.all()
210 # cut slug to the maximum allowed length
211 slug = utils.to_maxlength(Slug, 'slug', slug)
213 # check if slug already exists
214 if slug in [s.slug for s in existing_slugs]:
215 return
217 max_order = max([-1] + [s.order for s in existing_slugs])
218 next_order = max_order + 1
219 Slug.objects.create(scope=self.scope,
220 slug=slug,
221 content_object=self,
222 order=next_order,
225 def set_slug(self, slug):
226 """ Sets the canonical slug """
228 slugs = [s.slug for s in self.slugs.all()]
229 if slug in slugs:
230 slugs.remove(slug)
232 slugs.insert(0, slug)
233 self.set_slugs(slugs)
236 def remove_slug(self, slug):
237 """ Removes a slug """
238 Slug.objects.filter(
239 slug=slug,
240 content_type=ContentType.objects.get_for_model(self),
241 object_id=self.id,
242 ).delete()
245 def set_slugs(self, slugs):
246 """ Update the object's slugs to the given list
248 'slugs' should be a list of strings. Slugs that do not exist are
249 created. Existing slugs that are not in the 'slugs' list are
250 deleted. """
251 slugs = [utils.to_maxlength(Slug, 'slug', slug) for slug in slugs]
252 existing = {s.slug: s for s in self.slugs.all()}
253 utils.set_ordered_entries(self, slugs, existing, Slug, 'slug',
254 'content_object')
257 class MergedUUIDsMixin(models.Model):
258 """ Methods for working with MergedUUID objects """
260 merged_uuids = GenericRelation('MergedUUID',
261 related_query_name='merged_uuids')
263 class Meta:
264 abstract = True
267 class MergedUUIDQuerySet(models.QuerySet):
268 """ QuerySet for Models inheriting from MergedUUID """
270 def get_by_any_id(self, id):
271 """ Find am Episode by its own ID or by a merged ID """
272 # TODO: should this be done in the model?
273 try:
274 return self.get(id=id)
275 except self.model.DoesNotExist:
276 return self.get(merged_uuids__uuid=id)
279 class TagsMixin(models.Model):
280 """ Methods for working with Tag objects """
282 tags = GenericRelation('Tag', related_query_name='tags')
284 class Meta:
285 abstract = True
288 class PodcastGroup(UUIDModel, TitleModel, SlugsMixin):
289 """ Groups multiple podcasts together """
291 @property
292 def scope(self):
293 """ A podcast group is always in the global scope """
294 return ''
296 def subscriber_count(self):
297 # this could be done directly in the DB
298 return sum([p.subscriber_count() for p in self.podcast_set.all()] + [0])
300 @property
301 def logo_url(self):
302 podcast = self.podcast_set.first()
303 podcast.logo_url
306 class PodcastQuerySet(MergedUUIDQuerySet):
307 """ Custom queries for Podcasts """
309 def random(self):
310 """ Random podcasts
312 Excludes podcasts with missing title to guarantee some
313 minimum quality of the results """
315 # Using PostgreSQL's RANDOM() is very expensive, so we're generating a
316 # random uuid and query podcasts with a higher ID
317 # This returns podcasts in order of their ID, but the assumption is
318 # that usually only one podcast will be required anyway
319 import uuid
320 ruuid = uuid.uuid1()
321 return self.exclude(title='').filter(id__gt=ruuid)
323 def flattr(self):
324 """ Podcasts providing Flattr information """
325 return self.exclude(flattr_url__isnull=True)
327 def license(self, license_url=None):
328 """ Podcasts with any / the given license """
329 if license_url:
330 return self.filter(license=license_url)
331 else:
332 return self.exclude(license__isnull=True)
334 def order_by_next_update(self):
335 """ Sort podcasts by next scheduled update """
336 NEXTUPDATE = "last_update + (update_interval || ' hours')::INTERVAL"
337 q = self.extra(select={'next_update': NEXTUPDATE})
338 return q.order_by('next_update')
340 def next_update_between(self, start, end):
341 NEXTUPDATE_BETWEEN = ("(last_update + (update_interval || "
342 "' hours')::INTERVAL) BETWEEN %s AND %s")
343 return self.extra(
344 where=[NEXTUPDATE_BETWEEN], params=[start, end]
347 def toplist(self, language=None):
348 toplist = self
349 if language:
350 toplist = toplist.filter(language=language)
352 return toplist.order_by('-subscribers')
355 class PodcastManager(GenericManager):
356 """ Manager for the Podcast model """
358 def get_queryset(self):
359 return PodcastQuerySet(self.model, using=self._db)
361 @transaction.atomic
362 def get_or_create_for_url(self, url, defaults={}):
363 # TODO: where to specify how uuid is created?
364 import uuid
365 defaults.update({
366 'id': uuid.uuid1().hex,
369 url = utils.to_maxlength(URL, 'url', url)
370 podcast, created = self.get_or_create(urls__url=url, defaults=defaults)
372 if created:
373 url = URL.objects.create(url=url,
374 order=0,
375 scope='',
376 content_object=podcast,
378 return podcast
381 class Podcast(UUIDModel, TitleModel, DescriptionModel, LinkModel,
382 LanguageModel, LastUpdateModel, UpdateInfoModel, LicenseModel,
383 FlattrModel, ContentTypesModel, MergedIdsModel, OutdatedModel,
384 AuthorModel, UrlsMixin, SlugsMixin, TagsMixin, MergedUUIDsMixin,
385 TwitterModel, ):
386 """ A Podcast """
388 logo_url = models.URLField(null=True, max_length=1000)
389 group = models.ForeignKey(PodcastGroup, null=True,
390 on_delete=models.PROTECT)
391 group_member_name = models.CharField(max_length=30, null=True, blank=False)
393 # if p1 is related to p2, p2 is also related to p1
394 related_podcasts = models.ManyToManyField('self', symmetrical=True)
396 subscribers = models.PositiveIntegerField(default=0)
397 restrictions = models.CharField(max_length=20, null=False, blank=True,
398 default='')
399 common_episode_title = models.CharField(max_length=100, null=False, blank=True)
400 new_location = models.URLField(max_length=1000, null=True, blank=False)
401 latest_episode_timestamp = models.DateTimeField(null=True)
402 episode_count = models.PositiveIntegerField(default=0)
403 hub = models.URLField(null=True)
404 update_interval = models.PositiveSmallIntegerField(null=False,
405 default=DEFAULT_UPDATE_INTERVAL)
407 # "order" value of the most recent episode (will be the highest of all)
408 max_episode_order = models.PositiveIntegerField(null=True, default=None)
410 objects = PodcastManager()
412 def subscriber_count(self):
413 # TODO: implement
414 return self.subscribers
416 def group_with(self, other, grouptitle, myname, othername):
417 """ Group the podcast with another one """
418 # TODO: move to PodcastGroup?
420 if bool(self.group) and (self.group == other.group):
421 # they are already grouped
422 return
424 group1 = self.group
425 group2 = other.group
427 if group1 and group2:
428 raise ValueError('both podcasts already are in different groups')
430 elif not (group1 or group2):
431 # Form a new group
432 import uuid
433 group = PodcastGroup.objects.create(id=uuid.uuid1(), title=grouptitle)
434 self.group_member_name = myname
435 self.group = group
436 self.save()
438 other.group_member_name = othername
439 other.group = group
440 other.save()
442 return group
444 elif group1:
445 # add other to self's group
446 other.group_member_name = othername
447 other.group = group1
448 other.save()
449 return group1
451 else:
452 # add self to other's group
453 self.group_member_name = myname
454 self.group = group2
455 self.save()
456 return group2
458 def get_common_episode_title(self, num_episodes=100):
460 if self.common_episode_title:
461 return self.common_episode_title
463 episodes = self.episode_set.all()[:num_episodes]
465 # We take all non-empty titles
466 titles = filter(None, (e.title for e in episodes))
468 # there can not be a "common" title of a single title
469 if len(titles) < 2:
470 return None
472 # get the longest common substring
473 common_title = utils.longest_substr(titles)
475 # but consider only the part up to the first number. Otherwise we risk
476 # removing part of the number (eg if a feed contains episodes 100-199)
477 common_title = re.search(r'^\D*', common_title).group(0)
479 if len(common_title.strip()) < 2:
480 return None
482 return common_title
485 def get_episode_before(self, episode):
486 if not episode.released:
487 return None
488 return self.episode_set.filter(released__lt=episode.released).latest()
490 def get_episode_after(self, episode):
491 if not episode.released:
492 return None
493 return self.episode_set.filter(released__gt=episode.released).first()
495 @property
496 def scope(self):
497 """ A podcast is always in the global scope """
498 return ''
500 @property
501 def as_scope(self):
502 """ If models use this object as scope, they'll use this value """
503 return self.id.hex
505 @property
506 def display_title(self):
507 """ a title for display purposes """
508 if self.title:
509 return self.title
511 if not self.url:
512 logger.warn('Podcast with ID {podcast_id} does not have a URL'
513 .format(podcast_id=self.id.hex))
514 return _('Unknown Podcast')
516 return _('Unknown Podcast from {domain}'.format(
517 domain=utils.get_domain(self.url)))
520 class EpisodeQuerySet(MergedUUIDQuerySet):
521 """ QuerySet for Episodes """
523 def toplist(self, language=None):
524 toplist = self
525 if language:
526 toplist = toplist.filter(language=language)
528 return toplist.order_by('-listeners')
530 def by_released(self):
531 """ Sorts by release date, sorting missing release date last
533 When sorting by release date, we want to list those with the most
534 revent release date first. At the end the episodes without release date
535 should be sorted. """
536 return self.extra(select={
537 'has_released': 'released IS NOT NULL',
538 }).\
539 order_by('-has_released', '-released')
542 class EpisodeManager(GenericManager):
543 """ Custom queries for Episodes """
545 def get_queryset(self):
546 return EpisodeQuerySet(self.model, using=self._db)
548 @transaction.atomic
549 def get_or_create_for_url(self, podcast, url, defaults={}):
550 # TODO: where to specify how uuid is created?
551 import uuid
553 try:
554 url = URL.objects.get(url=url, scope=podcast.as_scope)
556 except URL.DoesNotExist:
557 episode = Episode.objects.create(podcast=podcast,
558 id=uuid.uuid1().hex,
559 **defaults
561 url = URL.objects.create(url=url,
562 order=0,
563 scope=episode.scope,
564 content_object=episode,
566 return episode
568 else:
569 return url.content_object
572 class Episode(UUIDModel, TitleModel, DescriptionModel, LinkModel,
573 LanguageModel, LastUpdateModel, UpdateInfoModel, LicenseModel,
574 FlattrModel, ContentTypesModel, MergedIdsModel, OutdatedModel,
575 AuthorModel, UrlsMixin, SlugsMixin, MergedUUIDsMixin,
576 OptionallyOrderedModel):
577 """ An episode """
579 guid = models.CharField(max_length=200, null=True)
580 content = models.TextField()
581 released = models.DateTimeField(null=True, db_index=True)
582 duration = models.BigIntegerField(null=True)
583 filesize = models.BigIntegerField(null=True)
584 mimetypes = models.CharField(max_length=200)
585 podcast = models.ForeignKey(Podcast, on_delete=models.PROTECT)
586 listeners = models.PositiveIntegerField(null=True, db_index=True)
588 objects = EpisodeManager()
590 class Meta:
591 ordering = ['-released']
593 index_together = [
594 ('podcast', 'outdated', 'released'),
595 ('podcast', 'released'),
596 ('released', 'podcast'),
598 # index for typical episode toplist queries
599 ('language', 'listeners'),
602 @property
603 def scope(self):
604 """ An episode's scope is its podcast """
605 return self.podcast_id.hex
607 @property
608 def display_title(self):
609 # TODO: return basename of URL (see Podcast.display_title)
610 return self.title
612 def get_short_title(self, common_title):
613 """ Title when used within the podcast's context """
614 if not self.title or not common_title:
615 return None
617 title = self.title.replace(common_title, '').strip()
618 title = re.sub(r'^[\W\d]+', '', title)
619 return title
622 def get_episode_number(self, common_title):
623 """ Number of the episode """
624 if not self.title or not common_title:
625 return None
627 title = self.title.replace(common_title, '').strip()
628 match = re.search(r'^\W*(\d+)', title)
629 if not match:
630 return None
632 return int(match.group(1))
635 class ScopedModel(models.Model):
636 """ A model that belongs to some scope, usually for limited uniqueness
638 scope does not allow null values, because null is not equal to null in SQL.
639 It could therefore not be used in unique constraints. """
641 # A slug / URL is unique within a scope; no two podcasts can have the same
642 # URL (scope ''), and no two episdoes of the same podcast (scope =
643 # podcast-ID) can have the same URL
644 scope = models.CharField(max_length=32, null=False, blank=True,
645 db_index=True)
647 class Meta:
648 abstract = True
650 def get_default_scope(self):
651 """ Returns the default scope of the object """
652 raise NotImplementedError('{cls} should implement get_default_scope'
653 .format(cls=self.__class__.__name__))
656 class URL(OrderedModel, ScopedModel):
657 """ Podcasts and Episodes can have multiple URLs
659 URLs are ordered, and the first slug is considered the canonical one """
661 url = models.URLField(max_length=2048)
663 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
664 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
665 object_id = UUIDField()
666 content_object = generic.GenericForeignKey('content_type', 'object_id')
668 class Meta(OrderedModel.Meta):
669 unique_together = (
670 # a URL is unique per scope
671 ('url', 'scope'),
673 # URLs of an object must be ordered, so that no two slugs of one
674 # object have the same order key
675 ('content_type', 'object_id', 'order'),
678 verbose_name = 'URL'
679 verbose_name_plural = 'URLs'
681 def get_default_scope(self):
682 return self.content_object.scope
685 class Tag(models.Model):
686 """ Tags any kind of Model
688 See also :class:`TagsMixin`
691 FEED = 1
692 DELICIOUS = 2
693 USER = 4
695 SOURCE_CHOICES = (
696 (FEED, 'Feed'),
697 (DELICIOUS, 'delicious'),
698 (USER, 'User'),
701 tag = models.SlugField()
703 # indicates where the tag came from
704 source = models.PositiveSmallIntegerField(choices=SOURCE_CHOICES)
706 # the user that created the tag (if it was created by a user,
707 # null otherwise)
708 user = models.ForeignKey(settings.AUTH_USER_MODEL, null=True,
709 on_delete=models.CASCADE)
711 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
712 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
713 object_id = UUIDField()
714 content_object = generic.GenericForeignKey('content_type', 'object_id')
716 class Meta:
717 unique_together = (
718 # a tag can only be assigned once from one source to one item
719 ('tag', 'source', 'user', 'content_type', 'object_id'),
723 class Slug(OrderedModel, ScopedModel):
724 """ Slug for any kind of Model
726 Slugs are ordered, and the first slug is considered the canonical one.
727 See also :class:`SlugsMixin`
730 slug = models.SlugField(max_length=150, db_index=True)
732 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
733 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
734 object_id = UUIDField()
735 content_object = generic.GenericForeignKey('content_type', 'object_id')
737 class Meta(OrderedModel.Meta):
738 unique_together = (
739 # a slug is unique per type; eg a podcast can have the same slug
740 # as an episode, but no two podcasts can have the same slug
741 ('slug', 'scope'),
743 # slugs of an object must be ordered, so that no two slugs of one
744 # object have the same order key
745 ('content_type', 'object_id', 'order'),
748 index_together = [
749 ('slug', 'content_type')
752 def __repr__(self):
753 return '{cls}(slug={slug}, order={order}, content_object={obj}'.format(
754 cls=self.__class__.__name__,
755 slug=self.slug,
756 order=self.order,
757 obj=self.content_object
761 class MergedUUID(models.Model):
762 """ If objects are merged their UUIDs are stored for later reference
764 see also :class:`MergedUUIDsMixin`
767 uuid = UUIDField(unique=True)
769 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
770 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
771 object_id = UUIDField()
772 content_object = generic.GenericForeignKey('content_type', 'object_id')
774 class Meta:
775 verbose_name = 'Merged UUID'
776 verbose_name_plural = 'Merged UUIDs'