Update to Django 1.9.1
[mygpo.git] / mygpo / podcasts / models.py
blob6726e6bff88025acc50cc4091fec48fd58bf6fb8
3 import re
4 from datetime import datetime
6 from django.conf import settings
7 from django.db import models, transaction, IntegrityError
8 from django.db.models import F
9 from django.utils.translation import ugettext as _
10 from django.contrib.contenttypes.models import ContentType
11 from django.contrib.contenttypes.fields import (GenericRelation,
12 GenericForeignKey)
14 from mygpo import utils
15 from mygpo.core.models import (TwitterModel, UUIDModel, GenericManager,
16 UpdateInfoModel, OrderedModel, OptionallyOrderedModel)
18 import logging
19 logger = logging.getLogger(__name__)
22 # default podcast update interval in hours
23 DEFAULT_UPDATE_INTERVAL = 7 * 24
25 # minium podcast update interval in hours
26 MIN_UPDATE_INTERVAL = 5
28 # every podcast should be updated at least once a month
29 MAX_UPDATE_INTERVAL = 24 * 30
32 class TitleModel(models.Model):
33 """ Model that has a title """
35 title = models.CharField(max_length=1000, null=False, blank=True,
36 db_index=True)
37 subtitle = models.TextField(null=False, blank=True)
39 def __str__(self):
40 return self.title
42 class Meta:
43 abstract = True
46 class DescriptionModel(models.Model):
47 """ Model that has a description """
49 description = models.TextField(null=False, blank=True)
51 class Meta:
52 abstract = True
55 class LinkModel(models.Model):
56 """ Model that has a link """
58 link = models.URLField(null=True, max_length=1000)
60 class Meta:
61 abstract = True
64 class LanguageModel(models.Model):
65 """ Model that has a language """
67 language = models.CharField(max_length=10, null=True, blank=False,
68 db_index=True)
70 class Meta:
71 abstract = True
74 class LastUpdateModel(models.Model):
75 """ Model with timestamp of last update from its source """
77 # date and time at which the model has last been updated from its source
78 # (eg a podcast feed). None means that the object has been created as a
79 # stub, without information from the source.
80 last_update = models.DateTimeField(null=True)
82 class Meta:
83 abstract = True
86 class LicenseModel(models.Model):
87 # URL to a license (usually Creative Commons)
88 license = models.CharField(max_length=100, null=True, blank=False,
89 db_index=True)
91 class Meta:
92 abstract = True
95 class FlattrModel(models.Model):
96 # A Flattr payment URL
97 flattr_url = models.URLField(null=True, blank=False, max_length=1000,
98 db_index=True)
100 class Meta:
101 abstract = True
104 class ContentTypesModel(models.Model):
105 # contains a comma-separated values of content types, eg 'audio,video'
106 content_types = models.CharField(max_length=20, null=False, blank=True)
108 class Meta:
109 abstract = True
112 class MergedIdsModel(models.Model):
114 class Meta:
115 abstract = True
118 class OutdatedModel(models.Model):
119 outdated = models.BooleanField(default=False, db_index=True)
121 class Meta:
122 abstract = True
125 class AuthorModel(models.Model):
126 author = models.CharField(max_length=350, null=True, blank=True)
128 class Meta:
129 abstract = True
132 class MergedUUIDQuerySet(models.QuerySet):
133 """ QuerySet for Models inheriting from MergedUUID """
135 def get_by_any_id(self, id):
136 """ Find am Episode by its own ID or by a merged ID """
137 # TODO: should this be done in the model?
138 try:
139 return self.get(id=id)
140 except self.model.DoesNotExist:
141 return self.get(merged_uuids__uuid=id)
144 class TagsMixin(models.Model):
145 """ Methods for working with Tag objects """
147 tags = GenericRelation('Tag', related_query_name='tags')
149 class Meta:
150 abstract = True
153 class ScopedModel(models.Model):
154 """ A model that belongs to some scope, usually for limited uniqueness
156 scope does not allow null values, because null is not equal to null in SQL.
157 It could therefore not be used in unique constraints. """
159 # A slug / URL is unique within a scope; no two podcasts can have the same
160 # URL (scope ''), and no two episdoes of the same podcast (scope =
161 # podcast-ID) can have the same URL
162 scope = models.CharField(max_length=32, null=False, blank=True,
163 db_index=True)
165 class Meta:
166 abstract = True
168 def get_default_scope(self):
169 """ Returns the default scope of the object """
170 raise NotImplementedError('{cls} should implement get_default_scope'
171 .format(cls=self.__class__.__name__))
175 class Slug(OrderedModel, ScopedModel):
176 """ Slug for any kind of Model
178 Slugs are ordered, and the first slug is considered the canonical one.
179 See also :class:`SlugsMixin`
182 slug = models.SlugField(max_length=150, db_index=True)
184 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
185 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
186 object_id = models.UUIDField()
187 content_object = GenericForeignKey('content_type', 'object_id')
189 class Meta(OrderedModel.Meta):
190 unique_together = (
191 # a slug is unique per type; eg a podcast can have the same slug
192 # as an episode, but no two podcasts can have the same slug
193 ('slug', 'scope'),
195 # slugs of an object must be ordered, so that no two slugs of one
196 # object have the same order key
197 ('content_type', 'object_id', 'order'),
200 index_together = [
201 ('slug', 'content_type')
204 def __repr__(self):
205 return '{cls}(slug={slug}, order={order}, content_object={obj}'.format(
206 cls=self.__class__.__name__,
207 slug=self.slug,
208 order=self.order,
209 obj=self.content_object
214 class SlugsMixin(models.Model):
215 """ Methods for working with Slug objects """
217 slugs = GenericRelation(Slug, related_query_name='slugs')
219 class Meta:
220 abstract = True
222 @property
223 def slug(self):
224 """ The main slug of the podcast
226 TODO: should be retrieved from a (materialized) view """
228 # We could also use self.slugs.first() here, but this would result in a
229 # different query and would render a .prefetch_related('slugs') useless
230 # The assumption is that we will never have loads of slugs, so
231 # fetching all won't hurt
232 slugs = list(self.slugs.all())
233 slug = slugs[0].slug if slugs else None
234 logger.debug('Found slugs %r, picking %r', slugs, slug)
235 return slug
238 def add_slug(self, slug):
239 """ Adds a (non-cannonical) slug """
241 if not slug:
242 raise ValueError("'%s' is not a valid slug" % slug)
244 existing_slugs = self.slugs.all()
246 # cut slug to the maximum allowed length
247 slug = utils.to_maxlength(Slug, 'slug', slug)
249 # check if slug already exists
250 if slug in [s.slug for s in existing_slugs]:
251 return
253 max_order = max([-1] + [s.order for s in existing_slugs])
254 next_order = max_order + 1
255 Slug.objects.create(scope=self.scope,
256 slug=slug,
257 content_object=self,
258 order=next_order,
261 def set_slug(self, slug):
262 """ Sets the canonical slug """
264 slugs = [s.slug for s in self.slugs.all()]
265 if slug in slugs:
266 slugs.remove(slug)
268 slugs.insert(0, slug)
269 self.set_slugs(slugs)
272 def remove_slug(self, slug):
273 """ Removes a slug """
274 Slug.objects.filter(
275 slug=slug,
276 content_type=ContentType.objects.get_for_model(self),
277 object_id=self.id,
278 ).delete()
281 def set_slugs(self, slugs):
282 """ Update the object's slugs to the given list
284 'slugs' should be a list of strings. Slugs that do not exist are
285 created. Existing slugs that are not in the 'slugs' list are
286 deleted. """
287 slugs = [utils.to_maxlength(Slug, 'slug', slug) for slug in slugs]
288 existing = {s.slug: s for s in self.slugs.all()}
289 utils.set_ordered_entries(self, slugs, existing, Slug, 'slug',
290 'content_object')
294 class PodcastGroup(UUIDModel, TitleModel, SlugsMixin):
295 """ Groups multiple podcasts together """
297 @property
298 def scope(self):
299 """ A podcast group is always in the global scope """
300 return ''
302 def subscriber_count(self):
303 # this could be done directly in the DB
304 return sum([p.subscriber_count() for p in self.podcast_set.all()] + [0])
306 @property
307 def logo_url(self):
308 podcast = self.podcast_set.first()
309 podcast.logo_url
312 class PodcastQuerySet(MergedUUIDQuerySet):
313 """ Custom queries for Podcasts """
315 def random(self):
316 """ Random podcasts
318 Excludes podcasts with missing title to guarantee some
319 minimum quality of the results """
321 # Using PostgreSQL's RANDOM() is very expensive, so we're generating a
322 # random uuid and query podcasts with a higher ID
323 # This returns podcasts in order of their ID, but the assumption is
324 # that usually only one podcast will be required anyway
325 import uuid
326 ruuid = uuid.uuid1()
327 return self.exclude(title='').filter(id__gt=ruuid)
329 def flattr(self):
330 """ Podcasts providing Flattr information """
331 return self.exclude(flattr_url__isnull=True)
333 def license(self, license_url=None):
334 """ Podcasts with any / the given license """
335 if license_url:
336 return self.filter(license=license_url)
337 else:
338 return self.exclude(license__isnull=True)
340 def order_by_next_update(self):
341 """ Sort podcasts by next scheduled update """
342 NEXTUPDATE = "last_update + (update_interval || ' hours')::INTERVAL"
343 q = self.extra(select={'next_update': NEXTUPDATE})
344 return q.order_by('next_update')
346 def next_update_between(self, start, end):
347 NEXTUPDATE_BETWEEN = ("(last_update + (update_interval || "
348 "' hours')::INTERVAL) BETWEEN %s AND %s")
349 return self.extra(
350 where=[NEXTUPDATE_BETWEEN], params=[start, end]
353 def toplist(self, language=None):
354 toplist = self
355 if language:
356 toplist = toplist.filter(language=language)
358 return toplist.order_by('-subscribers')
361 class PodcastManager(GenericManager):
362 """ Manager for the Podcast model """
364 def get_queryset(self):
365 return PodcastQuerySet(self.model, using=self._db)
367 @transaction.atomic
368 def get_or_create_for_url(self, url, defaults={}):
370 if not url:
371 raise ValueError('The URL must not be empty')
373 # TODO: where to specify how uuid is created?
374 import uuid
375 defaults.update({
376 'id': uuid.uuid1(),
379 url = utils.to_maxlength(URL, 'url', url)
380 try:
381 # try to fetch the podcast
382 return Podcast.objects.get(urls__url=url,
383 urls__scope='',
385 except Podcast.DoesNotExist:
386 # episode did not exist, try to create it
387 try:
388 with transaction.atomic():
389 podcast = Podcast.objects.create(**defaults)
390 url = URL.objects.create(url=url,
391 order=0,
392 scope='',
393 content_object=podcast,
395 return podcast
397 # URL could not be created, so it was created since the first get
398 except IntegrityError:
399 return Podcast.objects.get(urls__url=url,
400 urls__scope='',
404 class URL(OrderedModel, ScopedModel):
405 """ Podcasts and Episodes can have multiple URLs
407 URLs are ordered, and the first slug is considered the canonical one """
409 url = models.URLField(max_length=2048)
411 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
412 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
413 object_id = models.UUIDField()
414 content_object = GenericForeignKey('content_type', 'object_id')
416 class Meta(OrderedModel.Meta):
417 unique_together = (
418 # a URL is unique per scope
419 ('url', 'scope'),
421 # URLs of an object must be ordered, so that no two slugs of one
422 # object have the same order key
423 ('content_type', 'object_id', 'order'),
426 verbose_name = 'URL'
427 verbose_name_plural = 'URLs'
429 def get_default_scope(self):
430 return self.content_object.scope
434 class UrlsMixin(models.Model):
435 """ Methods for working with URL objects """
437 urls = GenericRelation(URL, related_query_name='urls')
439 class Meta:
440 abstract = True
442 @property
443 def url(self):
444 """ The main URL of the model """
445 # We could also use self.urls.first() here, but this would result in a
446 # different query and would render a .prefetch_related('urls') useless
447 # The assumption is that we will never have loads of URLS, so
448 # fetching all won't hurt
449 urls = list(self.urls.all())
450 return urls[0].url if urls else None
452 def add_missing_urls(self, new_urls):
453 """ Adds missing URLS from new_urls
455 The order of existing URLs is not changed """
456 existing_urls = self.urls.all()
457 next_order = max([-1] + [u.order for u in existing_urls]) + 1
458 existing_urls = [u.url for u in existing_urls]
460 for url in new_urls:
461 if url in existing_urls:
462 continue
464 try:
465 URL.objects.create(url=url,
466 order=next_order,
467 scope=self.scope,
468 content_object=self,
470 next_order += 1
471 except IntegrityError as ie:
472 err = str(ie)
473 logger.warn(u'Could not add URL: {0}'.format(err))
474 continue
476 def set_url(self, url):
477 """ Sets the canonical URL """
479 urls = [u.url for u in self.urls.all()]
480 if url in urls:
481 urls.remove(url)
483 urls.insert(0, url)
484 self.set_urls(urls)
486 def set_urls(self, urls):
487 """ Update the object's URLS to the given list
489 'urls' should be a list of strings. Slugs that do not exist are
490 created. Existing urls that are not in the 'urls' list are
491 deleted. """
492 urls = [utils.to_maxlength(URL, 'url', url) for url in urls]
493 existing = {u.url: u for u in self.urls.all()}
494 utils.set_ordered_entries(self, urls, existing, URL, 'url',
495 'content_object')
498 class MergedUUID(models.Model):
499 """ If objects are merged their UUIDs are stored for later reference
501 see also :class:`MergedUUIDsMixin`
504 uuid = models.UUIDField(unique=True)
506 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
507 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
508 object_id = models.UUIDField()
509 content_object = GenericForeignKey('content_type', 'object_id')
511 class Meta:
512 verbose_name = 'Merged UUID'
513 verbose_name_plural = 'Merged UUIDs'
516 class MergedUUIDsMixin(models.Model):
517 """ Methods for working with MergedUUID objects """
519 merged_uuids = GenericRelation(MergedUUID,
520 related_query_name='merged_uuids')
522 class Meta:
523 abstract = True
528 class Podcast(UUIDModel, TitleModel, DescriptionModel, LinkModel,
529 LanguageModel, LastUpdateModel, UpdateInfoModel, LicenseModel,
530 FlattrModel, ContentTypesModel, MergedIdsModel, OutdatedModel,
531 AuthorModel, UrlsMixin, SlugsMixin, TagsMixin, MergedUUIDsMixin,
532 TwitterModel, ):
533 """ A Podcast """
535 logo_url = models.URLField(null=True, max_length=1000)
536 group = models.ForeignKey(PodcastGroup, null=True,
537 on_delete=models.PROTECT)
538 group_member_name = models.CharField(max_length=30, null=True, blank=False)
540 # if p1 is related to p2, p2 is also related to p1
541 related_podcasts = models.ManyToManyField('self', symmetrical=True)
543 subscribers = models.PositiveIntegerField(default=0)
544 restrictions = models.CharField(max_length=20, null=False, blank=True,
545 default='')
546 common_episode_title = models.CharField(max_length=100, null=False, blank=True)
547 new_location = models.URLField(max_length=1000, null=True, blank=False)
548 latest_episode_timestamp = models.DateTimeField(null=True)
549 episode_count = models.PositiveIntegerField(default=0)
550 hub = models.URLField(null=True)
551 update_interval = models.PositiveSmallIntegerField(null=False,
552 default=DEFAULT_UPDATE_INTERVAL)
554 # "order" value of the most recent episode (will be the highest of all)
555 max_episode_order = models.PositiveIntegerField(null=True, default=None)
557 objects = PodcastManager()
559 def subscriber_count(self):
560 # TODO: implement
561 return self.subscribers
563 def group_with(self, other, grouptitle, myname, othername):
564 """ Group the podcast with another one """
565 # TODO: move to PodcastGroup?
567 if bool(self.group) and (self.group == other.group):
568 # they are already grouped
569 return
571 group1 = self.group
572 group2 = other.group
574 if group1 and group2:
575 raise ValueError('both podcasts already are in different groups')
577 elif not (group1 or group2):
578 # Form a new group
579 import uuid
580 group = PodcastGroup.objects.create(id=uuid.uuid1(), title=grouptitle)
581 self.group_member_name = myname
582 self.group = group
583 self.save()
585 other.group_member_name = othername
586 other.group = group
587 other.save()
589 return group
591 elif group1:
592 # add other to self's group
593 other.group_member_name = othername
594 other.group = group1
595 other.save()
596 return group1
598 else:
599 # add self to other's group
600 self.group_member_name = myname
601 self.group = group2
602 self.save()
603 return group2
605 def get_common_episode_title(self, num_episodes=100):
607 if self.common_episode_title:
608 return self.common_episode_title
610 episodes = self.episode_set.all()[:num_episodes]
612 # We take all non-empty titles
613 titles = [_f for _f in (e.title for e in episodes) if _f]
615 # there can not be a "common" title of a single title
616 if len(titles) < 2:
617 return None
619 # get the longest common substring
620 common_title = utils.longest_substr(titles)
622 # but consider only the part up to the first number. Otherwise we risk
623 # removing part of the number (eg if a feed contains episodes 100-199)
624 common_title = re.search(r'^\D*', common_title).group(0)
626 if len(common_title.strip()) < 2:
627 return None
629 return common_title
632 def get_episode_before(self, episode):
633 if not episode.released:
634 return None
635 return self.episode_set.filter(released__lt=episode.released).latest()
637 def get_episode_after(self, episode):
638 if not episode.released:
639 return None
640 return self.episode_set.filter(released__gt=episode.released).first()
642 @property
643 def scope(self):
644 """ A podcast is always in the global scope """
645 return ''
647 @property
648 def as_scope(self):
649 """ If models use this object as scope, they'll use this value """
650 return self.id.hex
652 @property
653 def display_title(self):
654 """ a title for display purposes """
655 if self.title:
656 return self.title
658 if not self.url:
659 logger.warn('Podcast with ID {podcast_id} does not have a URL'
660 .format(podcast_id=self.id))
661 return _('Unknown Podcast')
663 return _('Unknown Podcast from {domain}'.format(
664 domain=utils.get_domain(self.url)))
667 class EpisodeQuerySet(MergedUUIDQuerySet):
668 """ QuerySet for Episodes """
670 def toplist(self, language=None):
671 toplist = self
672 if language:
673 toplist = toplist.filter(language=language)
675 return toplist.order_by('-listeners')
678 class EpisodeManager(GenericManager):
679 """ Custom queries for Episodes """
681 def get_queryset(self):
682 return EpisodeQuerySet(self.model, using=self._db)
684 def get_or_create_for_url(self, podcast, url, defaults={}):
685 """ Create an Episode for a given URL
687 This is the only place where new episodes are created """
689 if not url:
690 raise ValueError('The URL must not be empty')
692 # TODO: where to specify how uuid is created?
693 import uuid
695 url = utils.to_maxlength(URL, 'url', url)
697 try:
698 # try to fetch the episode
699 return Episode.objects.get(urls__url=url,
700 urls__scope=podcast.as_scope,
702 except Episode.DoesNotExist:
703 # episode did not exist, try to create it
704 try:
705 with transaction.atomic():
706 episode = Episode.objects.create(podcast=podcast,
707 id=uuid.uuid1(),
708 **defaults)
710 url = URL.objects.create(url=url,
711 order=0,
712 scope=episode.scope,
713 content_object=episode,
716 # Keep episode_count up to date here; it is not
717 # recalculated when updating the podcast because counting
718 # episodes can be very slow for podcasts with many episodes
719 Podcast.objects.filter(pk=podcast.pk)\
720 .update(episode_count=F('episode_count')+1)
722 return episode
724 # URL could not be created, so it was created since the first get
725 except IntegrityError:
726 return Episode.objects.get(urls__url=url,
727 urls__scope=podcast.as_scope,
731 class Episode(UUIDModel, TitleModel, DescriptionModel, LinkModel,
732 LanguageModel, LastUpdateModel, UpdateInfoModel, LicenseModel,
733 FlattrModel, ContentTypesModel, MergedIdsModel, OutdatedModel,
734 AuthorModel, UrlsMixin, SlugsMixin, MergedUUIDsMixin,
735 OptionallyOrderedModel):
736 """ An episode """
738 guid = models.CharField(max_length=200, null=True)
739 content = models.TextField()
740 released = models.DateTimeField(null=True, db_index=True)
741 duration = models.BigIntegerField(null=True)
742 filesize = models.BigIntegerField(null=True)
743 mimetypes = models.CharField(max_length=200)
744 podcast = models.ForeignKey(Podcast, on_delete=models.PROTECT)
745 listeners = models.PositiveIntegerField(null=True, db_index=True)
747 objects = EpisodeManager()
749 class Meta:
750 ordering = ['-order', '-released']
752 index_together = [
753 ('podcast', 'outdated', 'released'),
754 ('podcast', 'released'),
755 ('released', 'podcast'),
757 # index for typical episode toplist queries
758 ('language', 'listeners'),
760 ('podcast', 'order', 'released'),
763 @property
764 def scope(self):
765 """ An episode's scope is its podcast """
766 return self.podcast.id.hex
768 @property
769 def display_title(self):
770 # TODO: return basename of URL (see Podcast.display_title)
771 return self.title
773 def get_short_title(self, common_title):
774 """ Title when used within the podcast's context """
775 if not self.title or not common_title:
776 return None
778 title = self.title.replace(common_title, '').strip()
779 title = re.sub(r'^[\W\d]+', '', title)
780 return title
783 def get_episode_number(self, common_title):
784 """ Number of the episode """
785 if not self.title or not common_title:
786 return None
788 title = self.title.replace(common_title, '').strip()
789 match = re.search(r'^\W*(\d+)', title)
790 if not match:
791 return None
793 return int(match.group(1))
796 class Tag(models.Model):
797 """ Tags any kind of Model
799 See also :class:`TagsMixin`
802 FEED = 1
803 DELICIOUS = 2
804 USER = 4
806 SOURCE_CHOICES = (
807 (FEED, 'Feed'),
808 (DELICIOUS, 'delicious'),
809 (USER, 'User'),
812 tag = models.SlugField()
814 # indicates where the tag came from
815 source = models.PositiveSmallIntegerField(choices=SOURCE_CHOICES)
817 # the user that created the tag (if it was created by a user,
818 # null otherwise)
819 user = models.ForeignKey(settings.AUTH_USER_MODEL, null=True,
820 on_delete=models.CASCADE)
822 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
823 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
824 object_id = models.UUIDField()
825 content_object = GenericForeignKey('content_type', 'object_id')
827 class Meta:
828 unique_together = (
829 # a tag can only be assigned once from one source to one item
830 ('tag', 'source', 'user', 'content_type', 'object_id'),