Merge branch 'master' into py3
[mygpo.git] / mygpo / podcasts / models.py
blob8eb9fd02705a9cec767f33bc2ef432d5b30062e2
3 import re
4 from datetime import datetime
6 from django.conf import settings
7 from django.db import models, transaction, IntegrityError
8 from django.db.models import F
9 from django.utils.translation import ugettext as _
10 from django.contrib.contenttypes.models import ContentType
11 from django.contrib.contenttypes.fields import GenericRelation
12 from django.contrib.contenttypes import generic
14 from mygpo import utils
15 from mygpo.core.models import (TwitterModel, UUIDModel, GenericManager,
16 UpdateInfoModel, OrderedModel, OptionallyOrderedModel)
18 import logging
19 logger = logging.getLogger(__name__)
22 # default podcast update interval in hours
23 DEFAULT_UPDATE_INTERVAL = 7 * 24
25 # minium podcast update interval in hours
26 MIN_UPDATE_INTERVAL = 5
28 # every podcast should be updated at least once a month
29 MAX_UPDATE_INTERVAL = 24 * 30
32 class TitleModel(models.Model):
33 """ Model that has a title """
35 title = models.CharField(max_length=1000, null=False, blank=True,
36 db_index=True)
37 subtitle = models.TextField(null=False, blank=True)
39 def __str__(self):
40 return self.title
42 class Meta:
43 abstract = True
46 class DescriptionModel(models.Model):
47 """ Model that has a description """
49 description = models.TextField(null=False, blank=True)
51 class Meta:
52 abstract = True
55 class LinkModel(models.Model):
56 """ Model that has a link """
58 link = models.URLField(null=True, max_length=1000)
60 class Meta:
61 abstract = True
64 class LanguageModel(models.Model):
65 """ Model that has a language """
67 language = models.CharField(max_length=10, null=True, blank=False,
68 db_index=True)
70 class Meta:
71 abstract = True
74 class LastUpdateModel(models.Model):
75 """ Model with timestamp of last update from its source """
77 # date and time at which the model has last been updated from its source
78 # (eg a podcast feed). None means that the object has been created as a
79 # stub, without information from the source.
80 last_update = models.DateTimeField(null=True)
82 class Meta:
83 abstract = True
86 class LicenseModel(models.Model):
87 # URL to a license (usually Creative Commons)
88 license = models.CharField(max_length=100, null=True, blank=False,
89 db_index=True)
91 class Meta:
92 abstract = True
95 class FlattrModel(models.Model):
96 # A Flattr payment URL
97 flattr_url = models.URLField(null=True, blank=False, max_length=1000,
98 db_index=True)
100 class Meta:
101 abstract = True
104 class ContentTypesModel(models.Model):
105 # contains a comma-separated values of content types, eg 'audio,video'
106 content_types = models.CharField(max_length=20, null=False, blank=True)
108 class Meta:
109 abstract = True
112 class MergedIdsModel(models.Model):
114 class Meta:
115 abstract = True
118 class OutdatedModel(models.Model):
119 outdated = models.BooleanField(default=False, db_index=True)
121 class Meta:
122 abstract = True
125 class AuthorModel(models.Model):
126 author = models.CharField(max_length=350, null=True, blank=True)
128 class Meta:
129 abstract = True
132 class UrlsMixin(models.Model):
133 """ Methods for working with URL objects """
135 urls = GenericRelation('URL', related_query_name='urls')
137 class Meta:
138 abstract = True
140 @property
141 def url(self):
142 """ The main URL of the model """
143 # We could also use self.urls.first() here, but this would result in a
144 # different query and would render a .prefetch_related('urls') useless
145 # The assumption is that we will never have loads of URLS, so
146 # fetching all won't hurt
147 urls = list(self.urls.all())
148 return urls[0].url if urls else None
150 def add_missing_urls(self, new_urls):
151 """ Adds missing URLS from new_urls
153 The order of existing URLs is not changed """
154 existing_urls = self.urls.all()
155 next_order = max([-1] + [u.order for u in existing_urls]) + 1
156 existing_urls = [u.url for u in existing_urls]
158 for url in new_urls:
159 if url in existing_urls:
160 continue
162 try:
163 URL.objects.create(url=url,
164 order=next_order,
165 scope=self.scope,
166 content_object=self,
168 next_order += 1
169 except IntegrityError as ie:
170 err = str(ie).decode('utf-8')
171 logger.warn(u'Could not add URL: {0}'.format(err))
172 continue
174 def set_url(self, url):
175 """ Sets the canonical URL """
177 urls = [u.url for u in self.urls.all()]
178 if url in urls:
179 urls.remove(url)
181 urls.insert(0, url)
182 self.set_urls(urls)
184 def set_urls(self, urls):
185 """ Update the object's URLS to the given list
187 'urls' should be a list of strings. Slugs that do not exist are
188 created. Existing urls that are not in the 'urls' list are
189 deleted. """
190 urls = [utils.to_maxlength(URL, 'url', url) for url in urls]
191 existing = {u.url: u for u in self.urls.all()}
192 utils.set_ordered_entries(self, urls, existing, URL, 'url',
193 'content_object')
196 class SlugsMixin(models.Model):
197 """ Methods for working with Slug objects """
199 slugs = GenericRelation('Slug', related_query_name='slugs')
201 class Meta:
202 abstract = True
204 @property
205 def slug(self):
206 """ The main slug of the podcast
208 TODO: should be retrieved from a (materialized) view """
210 # We could also use self.slugs.first() here, but this would result in a
211 # different query and would render a .prefetch_related('slugs') useless
212 # The assumption is that we will never have loads of slugs, so
213 # fetching all won't hurt
214 slugs = list(self.slugs.all())
215 slug = slugs[0].slug if slugs else None
216 logger.debug('Found slugs %r, picking %r', slugs, slug)
217 return slug
220 def add_slug(self, slug):
221 """ Adds a (non-cannonical) slug """
223 if not slug:
224 raise ValueError("'%s' is not a valid slug" % slug)
226 existing_slugs = self.slugs.all()
228 # cut slug to the maximum allowed length
229 slug = utils.to_maxlength(Slug, 'slug', slug)
231 # check if slug already exists
232 if slug in [s.slug for s in existing_slugs]:
233 return
235 max_order = max([-1] + [s.order for s in existing_slugs])
236 next_order = max_order + 1
237 Slug.objects.create(scope=self.scope,
238 slug=slug,
239 content_object=self,
240 order=next_order,
243 def set_slug(self, slug):
244 """ Sets the canonical slug """
246 slugs = [s.slug for s in self.slugs.all()]
247 if slug in slugs:
248 slugs.remove(slug)
250 slugs.insert(0, slug)
251 self.set_slugs(slugs)
254 def remove_slug(self, slug):
255 """ Removes a slug """
256 Slug.objects.filter(
257 slug=slug,
258 content_type=ContentType.objects.get_for_model(self),
259 object_id=self.id,
260 ).delete()
263 def set_slugs(self, slugs):
264 """ Update the object's slugs to the given list
266 'slugs' should be a list of strings. Slugs that do not exist are
267 created. Existing slugs that are not in the 'slugs' list are
268 deleted. """
269 slugs = [utils.to_maxlength(Slug, 'slug', slug) for slug in slugs]
270 existing = {s.slug: s for s in self.slugs.all()}
271 utils.set_ordered_entries(self, slugs, existing, Slug, 'slug',
272 'content_object')
275 class MergedUUIDsMixin(models.Model):
276 """ Methods for working with MergedUUID objects """
278 merged_uuids = GenericRelation('MergedUUID',
279 related_query_name='merged_uuids')
281 class Meta:
282 abstract = True
285 class MergedUUIDQuerySet(models.QuerySet):
286 """ QuerySet for Models inheriting from MergedUUID """
288 def get_by_any_id(self, id):
289 """ Find am Episode by its own ID or by a merged ID """
290 # TODO: should this be done in the model?
291 try:
292 return self.get(id=id)
293 except self.model.DoesNotExist:
294 return self.get(merged_uuids__uuid=id)
297 class TagsMixin(models.Model):
298 """ Methods for working with Tag objects """
300 tags = GenericRelation('Tag', related_query_name='tags')
302 class Meta:
303 abstract = True
306 class PodcastGroup(UUIDModel, TitleModel, SlugsMixin):
307 """ Groups multiple podcasts together """
309 @property
310 def scope(self):
311 """ A podcast group is always in the global scope """
312 return ''
314 def subscriber_count(self):
315 # this could be done directly in the DB
316 return sum([p.subscriber_count() for p in self.podcast_set.all()] + [0])
318 @property
319 def logo_url(self):
320 podcast = self.podcast_set.first()
321 podcast.logo_url
324 class PodcastQuerySet(MergedUUIDQuerySet):
325 """ Custom queries for Podcasts """
327 def random(self):
328 """ Random podcasts
330 Excludes podcasts with missing title to guarantee some
331 minimum quality of the results """
333 # Using PostgreSQL's RANDOM() is very expensive, so we're generating a
334 # random uuid and query podcasts with a higher ID
335 # This returns podcasts in order of their ID, but the assumption is
336 # that usually only one podcast will be required anyway
337 import uuid
338 ruuid = uuid.uuid1()
339 return self.exclude(title='').filter(id__gt=ruuid)
341 def flattr(self):
342 """ Podcasts providing Flattr information """
343 return self.exclude(flattr_url__isnull=True)
345 def license(self, license_url=None):
346 """ Podcasts with any / the given license """
347 if license_url:
348 return self.filter(license=license_url)
349 else:
350 return self.exclude(license__isnull=True)
352 def order_by_next_update(self):
353 """ Sort podcasts by next scheduled update """
354 NEXTUPDATE = "last_update + (update_interval || ' hours')::INTERVAL"
355 q = self.extra(select={'next_update': NEXTUPDATE})
356 return q.order_by('next_update')
358 def next_update_between(self, start, end):
359 NEXTUPDATE_BETWEEN = ("(last_update + (update_interval || "
360 "' hours')::INTERVAL) BETWEEN %s AND %s")
361 return self.extra(
362 where=[NEXTUPDATE_BETWEEN], params=[start, end]
365 def toplist(self, language=None):
366 toplist = self
367 if language:
368 toplist = toplist.filter(language=language)
370 return toplist.order_by('-subscribers')
373 class PodcastManager(GenericManager):
374 """ Manager for the Podcast model """
376 def get_queryset(self):
377 return PodcastQuerySet(self.model, using=self._db)
379 @transaction.atomic
380 def get_or_create_for_url(self, url, defaults={}):
381 # TODO: where to specify how uuid is created?
382 import uuid
383 defaults.update({
384 'id': uuid.uuid1(),
387 url = utils.to_maxlength(URL, 'url', url)
388 try:
389 # try to fetch the podcast
390 return Podcast.objects.get(urls__url=url,
391 urls__scope='',
393 except Podcast.DoesNotExist:
394 # episode did not exist, try to create it
395 try:
396 with transaction.atomic():
397 podcast = Podcast.objects.create(**defaults)
398 url = URL.objects.create(url=url,
399 order=0,
400 scope='',
401 content_object=podcast,
403 return podcast
405 # URL could not be created, so it was created since the first get
406 except IntegrityError:
407 return Podcast.objects.get(urls__url=url,
408 urls__scope='',
412 class Podcast(UUIDModel, TitleModel, DescriptionModel, LinkModel,
413 LanguageModel, LastUpdateModel, UpdateInfoModel, LicenseModel,
414 FlattrModel, ContentTypesModel, MergedIdsModel, OutdatedModel,
415 AuthorModel, UrlsMixin, SlugsMixin, TagsMixin, MergedUUIDsMixin,
416 TwitterModel, ):
417 """ A Podcast """
419 logo_url = models.URLField(null=True, max_length=1000)
420 group = models.ForeignKey(PodcastGroup, null=True,
421 on_delete=models.PROTECT)
422 group_member_name = models.CharField(max_length=30, null=True, blank=False)
424 # if p1 is related to p2, p2 is also related to p1
425 related_podcasts = models.ManyToManyField('self', symmetrical=True)
427 subscribers = models.PositiveIntegerField(default=0)
428 restrictions = models.CharField(max_length=20, null=False, blank=True,
429 default='')
430 common_episode_title = models.CharField(max_length=100, null=False, blank=True)
431 new_location = models.URLField(max_length=1000, null=True, blank=False)
432 latest_episode_timestamp = models.DateTimeField(null=True)
433 episode_count = models.PositiveIntegerField(default=0)
434 hub = models.URLField(null=True)
435 update_interval = models.PositiveSmallIntegerField(null=False,
436 default=DEFAULT_UPDATE_INTERVAL)
438 # "order" value of the most recent episode (will be the highest of all)
439 max_episode_order = models.PositiveIntegerField(null=True, default=None)
441 objects = PodcastManager()
443 def subscriber_count(self):
444 # TODO: implement
445 return self.subscribers
447 def group_with(self, other, grouptitle, myname, othername):
448 """ Group the podcast with another one """
449 # TODO: move to PodcastGroup?
451 if bool(self.group) and (self.group == other.group):
452 # they are already grouped
453 return
455 group1 = self.group
456 group2 = other.group
458 if group1 and group2:
459 raise ValueError('both podcasts already are in different groups')
461 elif not (group1 or group2):
462 # Form a new group
463 import uuid
464 group = PodcastGroup.objects.create(id=uuid.uuid1(), title=grouptitle)
465 self.group_member_name = myname
466 self.group = group
467 self.save()
469 other.group_member_name = othername
470 other.group = group
471 other.save()
473 return group
475 elif group1:
476 # add other to self's group
477 other.group_member_name = othername
478 other.group = group1
479 other.save()
480 return group1
482 else:
483 # add self to other's group
484 self.group_member_name = myname
485 self.group = group2
486 self.save()
487 return group2
489 def get_common_episode_title(self, num_episodes=100):
491 if self.common_episode_title:
492 return self.common_episode_title
494 episodes = self.episode_set.all()[:num_episodes]
496 # We take all non-empty titles
497 titles = [_f for _f in (e.title for e in episodes) if _f]
499 # there can not be a "common" title of a single title
500 if len(titles) < 2:
501 return None
503 # get the longest common substring
504 common_title = utils.longest_substr(titles)
506 # but consider only the part up to the first number. Otherwise we risk
507 # removing part of the number (eg if a feed contains episodes 100-199)
508 common_title = re.search(r'^\D*', common_title).group(0)
510 if len(common_title.strip()) < 2:
511 return None
513 return common_title
516 def get_episode_before(self, episode):
517 if not episode.released:
518 return None
519 return self.episode_set.filter(released__lt=episode.released).latest()
521 def get_episode_after(self, episode):
522 if not episode.released:
523 return None
524 return self.episode_set.filter(released__gt=episode.released).first()
526 @property
527 def scope(self):
528 """ A podcast is always in the global scope """
529 return ''
531 @property
532 def as_scope(self):
533 """ If models use this object as scope, they'll use this value """
534 return self.id.hex
536 @property
537 def display_title(self):
538 """ a title for display purposes """
539 if self.title:
540 return self.title
542 if not self.url:
543 logger.warn('Podcast with ID {podcast_id} does not have a URL'
544 .format(podcast_id=self.id))
545 return _('Unknown Podcast')
547 return _('Unknown Podcast from {domain}'.format(
548 domain=utils.get_domain(self.url)))
551 class EpisodeQuerySet(MergedUUIDQuerySet):
552 """ QuerySet for Episodes """
554 def toplist(self, language=None):
555 toplist = self
556 if language:
557 toplist = toplist.filter(language=language)
559 return toplist.order_by('-listeners')
562 class EpisodeManager(GenericManager):
563 """ Custom queries for Episodes """
565 def get_queryset(self):
566 return EpisodeQuerySet(self.model, using=self._db)
568 def get_or_create_for_url(self, podcast, url, defaults={}):
569 """ Create an Episode for a given URL
571 This is the only place where new episodes are created """
573 # TODO: where to specify how uuid is created?
574 import uuid
576 url = utils.to_maxlength(URL, 'url', url)
578 try:
579 # try to fetch the episode
580 return Episode.objects.get(urls__url=url,
581 urls__scope=podcast.as_scope,
583 except Episode.DoesNotExist:
584 # episode did not exist, try to create it
585 try:
586 with transaction.atomic():
587 episode = Episode.objects.create(podcast=podcast,
588 id=uuid.uuid1(),
589 **defaults)
591 url = URL.objects.create(url=url,
592 order=0,
593 scope=episode.scope,
594 content_object=episode,
597 # Keep episode_count up to date here; it is not
598 # recalculated when updating the podcast because counting
599 # episodes can be very slow for podcasts with many episodes
600 Podcast.objects.filter(pk=podcast.pk)\
601 .update(episode_count=F('episode_count')+1)
603 return episode
605 # URL could not be created, so it was created since the first get
606 except IntegrityError:
607 return Episode.objects.get(urls__url=url,
608 urls__scope=podcast.as_scope,
612 class Episode(UUIDModel, TitleModel, DescriptionModel, LinkModel,
613 LanguageModel, LastUpdateModel, UpdateInfoModel, LicenseModel,
614 FlattrModel, ContentTypesModel, MergedIdsModel, OutdatedModel,
615 AuthorModel, UrlsMixin, SlugsMixin, MergedUUIDsMixin,
616 OptionallyOrderedModel):
617 """ An episode """
619 guid = models.CharField(max_length=200, null=True)
620 content = models.TextField()
621 released = models.DateTimeField(null=True, db_index=True)
622 duration = models.BigIntegerField(null=True)
623 filesize = models.BigIntegerField(null=True)
624 mimetypes = models.CharField(max_length=200)
625 podcast = models.ForeignKey(Podcast, on_delete=models.PROTECT)
626 listeners = models.PositiveIntegerField(null=True, db_index=True)
628 objects = EpisodeManager()
630 class Meta:
631 ordering = ['-order', '-released']
633 index_together = [
634 ('podcast', 'outdated', 'released'),
635 ('podcast', 'released'),
636 ('released', 'podcast'),
638 # index for typical episode toplist queries
639 ('language', 'listeners'),
641 ('podcast', 'order', 'released'),
644 @property
645 def scope(self):
646 """ An episode's scope is its podcast """
647 return self.podcast.id.hex
649 @property
650 def display_title(self):
651 # TODO: return basename of URL (see Podcast.display_title)
652 return self.title
654 def get_short_title(self, common_title):
655 """ Title when used within the podcast's context """
656 if not self.title or not common_title:
657 return None
659 title = self.title.replace(common_title, '').strip()
660 title = re.sub(r'^[\W\d]+', '', title)
661 return title
664 def get_episode_number(self, common_title):
665 """ Number of the episode """
666 if not self.title or not common_title:
667 return None
669 title = self.title.replace(common_title, '').strip()
670 match = re.search(r'^\W*(\d+)', title)
671 if not match:
672 return None
674 return int(match.group(1))
677 class ScopedModel(models.Model):
678 """ A model that belongs to some scope, usually for limited uniqueness
680 scope does not allow null values, because null is not equal to null in SQL.
681 It could therefore not be used in unique constraints. """
683 # A slug / URL is unique within a scope; no two podcasts can have the same
684 # URL (scope ''), and no two episdoes of the same podcast (scope =
685 # podcast-ID) can have the same URL
686 scope = models.CharField(max_length=32, null=False, blank=True,
687 db_index=True)
689 class Meta:
690 abstract = True
692 def get_default_scope(self):
693 """ Returns the default scope of the object """
694 raise NotImplementedError('{cls} should implement get_default_scope'
695 .format(cls=self.__class__.__name__))
698 class URL(OrderedModel, ScopedModel):
699 """ Podcasts and Episodes can have multiple URLs
701 URLs are ordered, and the first slug is considered the canonical one """
703 url = models.URLField(max_length=2048)
705 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
706 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
707 object_id = models.UUIDField()
708 content_object = generic.GenericForeignKey('content_type', 'object_id')
710 class Meta(OrderedModel.Meta):
711 unique_together = (
712 # a URL is unique per scope
713 ('url', 'scope'),
715 # URLs of an object must be ordered, so that no two slugs of one
716 # object have the same order key
717 ('content_type', 'object_id', 'order'),
720 verbose_name = 'URL'
721 verbose_name_plural = 'URLs'
723 def get_default_scope(self):
724 return self.content_object.scope
727 class Tag(models.Model):
728 """ Tags any kind of Model
730 See also :class:`TagsMixin`
733 FEED = 1
734 DELICIOUS = 2
735 USER = 4
737 SOURCE_CHOICES = (
738 (FEED, 'Feed'),
739 (DELICIOUS, 'delicious'),
740 (USER, 'User'),
743 tag = models.SlugField()
745 # indicates where the tag came from
746 source = models.PositiveSmallIntegerField(choices=SOURCE_CHOICES)
748 # the user that created the tag (if it was created by a user,
749 # null otherwise)
750 user = models.ForeignKey(settings.AUTH_USER_MODEL, null=True,
751 on_delete=models.CASCADE)
753 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
754 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
755 object_id = models.UUIDField()
756 content_object = generic.GenericForeignKey('content_type', 'object_id')
758 class Meta:
759 unique_together = (
760 # a tag can only be assigned once from one source to one item
761 ('tag', 'source', 'user', 'content_type', 'object_id'),
765 class Slug(OrderedModel, ScopedModel):
766 """ Slug for any kind of Model
768 Slugs are ordered, and the first slug is considered the canonical one.
769 See also :class:`SlugsMixin`
772 slug = models.SlugField(max_length=150, db_index=True)
774 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
775 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
776 object_id = models.UUIDField()
777 content_object = generic.GenericForeignKey('content_type', 'object_id')
779 class Meta(OrderedModel.Meta):
780 unique_together = (
781 # a slug is unique per type; eg a podcast can have the same slug
782 # as an episode, but no two podcasts can have the same slug
783 ('slug', 'scope'),
785 # slugs of an object must be ordered, so that no two slugs of one
786 # object have the same order key
787 ('content_type', 'object_id', 'order'),
790 index_together = [
791 ('slug', 'content_type')
794 def __repr__(self):
795 return '{cls}(slug={slug}, order={order}, content_object={obj}'.format(
796 cls=self.__class__.__name__,
797 slug=self.slug,
798 order=self.order,
799 obj=self.content_object
803 class MergedUUID(models.Model):
804 """ If objects are merged their UUIDs are stored for later reference
806 see also :class:`MergedUUIDsMixin`
809 uuid = models.UUIDField(unique=True)
811 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
812 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
813 object_id = models.UUIDField()
814 content_object = generic.GenericForeignKey('content_type', 'object_id')
816 class Meta:
817 verbose_name = 'Merged UUID'
818 verbose_name_plural = 'Merged UUIDs'