[Episodes] trim URL when creating episode
[mygpo.git] / mygpo / podcasts / models.py
blob11ee20e76b5c7d02c50272d86cfa1c4c44ea64e3
1 from __future__ import unicode_literals
3 import re
4 from datetime import datetime
6 from django.conf import settings
7 from django.db import models, transaction, IntegrityError
8 from django.db.models import F
9 from django.utils.translation import ugettext as _
10 from django.contrib.contenttypes.models import ContentType
11 from django.contrib.contenttypes.fields import GenericRelation
12 from django.contrib.contenttypes import generic
14 from mygpo import utils
15 from mygpo.core.models import (TwitterModel, UUIDModel, GenericManager,
16 UpdateInfoModel, OrderedModel, OptionallyOrderedModel)
18 import logging
19 logger = logging.getLogger(__name__)
22 # default podcast update interval in hours
23 DEFAULT_UPDATE_INTERVAL = 7 * 24
25 # minium podcast update interval in hours
26 MIN_UPDATE_INTERVAL = 5
28 # every podcast should be updated at least once a month
29 MAX_UPDATE_INTERVAL = 24 * 30
32 class TitleModel(models.Model):
33 """ Model that has a title """
35 title = models.CharField(max_length=1000, null=False, blank=True,
36 db_index=True)
37 subtitle = models.TextField(null=False, blank=True)
39 def __str__(self):
40 return self.title.encode('ascii', errors='replace')
42 def __unicode(self):
43 return self.title
45 class Meta:
46 abstract = True
49 class DescriptionModel(models.Model):
50 """ Model that has a description """
52 description = models.TextField(null=False, blank=True)
54 class Meta:
55 abstract = True
58 class LinkModel(models.Model):
59 """ Model that has a link """
61 link = models.URLField(null=True, max_length=1000)
63 class Meta:
64 abstract = True
67 class LanguageModel(models.Model):
68 """ Model that has a language """
70 language = models.CharField(max_length=10, null=True, blank=False,
71 db_index=True)
73 class Meta:
74 abstract = True
77 class LastUpdateModel(models.Model):
78 """ Model with timestamp of last update from its source """
80 # date and time at which the model has last been updated from its source
81 # (eg a podcast feed). None means that the object has been created as a
82 # stub, without information from the source.
83 last_update = models.DateTimeField(null=True)
85 class Meta:
86 abstract = True
89 class LicenseModel(models.Model):
90 # URL to a license (usually Creative Commons)
91 license = models.CharField(max_length=100, null=True, blank=False,
92 db_index=True)
94 class Meta:
95 abstract = True
98 class FlattrModel(models.Model):
99 # A Flattr payment URL
100 flattr_url = models.URLField(null=True, blank=False, max_length=1000,
101 db_index=True)
103 class Meta:
104 abstract = True
107 class ContentTypesModel(models.Model):
108 # contains a comma-separated values of content types, eg 'audio,video'
109 content_types = models.CharField(max_length=20, null=False, blank=True)
111 class Meta:
112 abstract = True
115 class MergedIdsModel(models.Model):
117 class Meta:
118 abstract = True
121 class OutdatedModel(models.Model):
122 outdated = models.BooleanField(default=False, db_index=True)
124 class Meta:
125 abstract = True
128 class AuthorModel(models.Model):
129 author = models.CharField(max_length=350, null=True, blank=True)
131 class Meta:
132 abstract = True
135 class UrlsMixin(models.Model):
136 """ Methods for working with URL objects """
138 urls = GenericRelation('URL', related_query_name='urls')
140 class Meta:
141 abstract = True
143 @property
144 def url(self):
145 """ The main URL of the model """
146 # We could also use self.urls.first() here, but this would result in a
147 # different query and would render a .prefetch_related('urls') useless
148 # The assumption is that we will never have loads of URLS, so
149 # fetching all won't hurt
150 urls = list(self.urls.all())
151 return urls[0].url if urls else None
153 def add_missing_urls(self, new_urls):
154 """ Adds missing URLS from new_urls
156 The order of existing URLs is not changed """
157 existing_urls = self.urls.all()
158 next_order = max([-1] + [u.order for u in existing_urls]) + 1
159 existing_urls = [u.url for u in existing_urls]
161 for url in new_urls:
162 if url in existing_urls:
163 continue
165 try:
166 URL.objects.create(url=url,
167 order=next_order,
168 scope=self.scope,
169 content_object=self,
171 next_order += 1
172 except IntegrityError as ie:
173 logger.warn('Could not add URL: {err}'.format(err=ie))
174 continue
176 def set_url(self, url):
177 """ Sets the canonical URL """
179 urls = [u.url for u in self.urls.all()]
180 if url in urls:
181 urls.remove(url)
183 urls.insert(0, url)
184 self.set_urls(urls)
186 def set_urls(self, urls):
187 """ Update the object's URLS to the given list
189 'urls' should be a list of strings. Slugs that do not exist are
190 created. Existing urls that are not in the 'urls' list are
191 deleted. """
192 urls = [utils.to_maxlength(URL, 'url', url) for url in urls]
193 existing = {u.url: u for u in self.urls.all()}
194 utils.set_ordered_entries(self, urls, existing, URL, 'url',
195 'content_object')
198 class SlugsMixin(models.Model):
199 """ Methods for working with Slug objects """
201 slugs = GenericRelation('Slug', related_query_name='slugs')
203 class Meta:
204 abstract = True
206 @property
207 def slug(self):
208 """ The main slug of the podcast
210 TODO: should be retrieved from a (materialized) view """
212 # We could also use self.slugs.first() here, but this would result in a
213 # different query and would render a .prefetch_related('slugs') useless
214 # The assumption is that we will never have loads of slugs, so
215 # fetching all won't hurt
216 slugs = list(self.slugs.all())
217 slug = slugs[0].slug if slugs else None
218 logger.debug('Found slugs %r, picking %r', slugs, slug)
219 return slug
222 def add_slug(self, slug):
223 """ Adds a (non-cannonical) slug """
225 if not slug:
226 raise ValueError("'%s' is not a valid slug" % slug)
228 existing_slugs = self.slugs.all()
230 # cut slug to the maximum allowed length
231 slug = utils.to_maxlength(Slug, 'slug', slug)
233 # check if slug already exists
234 if slug in [s.slug for s in existing_slugs]:
235 return
237 max_order = max([-1] + [s.order for s in existing_slugs])
238 next_order = max_order + 1
239 Slug.objects.create(scope=self.scope,
240 slug=slug,
241 content_object=self,
242 order=next_order,
245 def set_slug(self, slug):
246 """ Sets the canonical slug """
248 slugs = [s.slug for s in self.slugs.all()]
249 if slug in slugs:
250 slugs.remove(slug)
252 slugs.insert(0, slug)
253 self.set_slugs(slugs)
256 def remove_slug(self, slug):
257 """ Removes a slug """
258 Slug.objects.filter(
259 slug=slug,
260 content_type=ContentType.objects.get_for_model(self),
261 object_id=self.id,
262 ).delete()
265 def set_slugs(self, slugs):
266 """ Update the object's slugs to the given list
268 'slugs' should be a list of strings. Slugs that do not exist are
269 created. Existing slugs that are not in the 'slugs' list are
270 deleted. """
271 slugs = [utils.to_maxlength(Slug, 'slug', slug) for slug in slugs]
272 existing = {s.slug: s for s in self.slugs.all()}
273 utils.set_ordered_entries(self, slugs, existing, Slug, 'slug',
274 'content_object')
277 class MergedUUIDsMixin(models.Model):
278 """ Methods for working with MergedUUID objects """
280 merged_uuids = GenericRelation('MergedUUID',
281 related_query_name='merged_uuids')
283 class Meta:
284 abstract = True
287 class MergedUUIDQuerySet(models.QuerySet):
288 """ QuerySet for Models inheriting from MergedUUID """
290 def get_by_any_id(self, id):
291 """ Find am Episode by its own ID or by a merged ID """
292 # TODO: should this be done in the model?
293 try:
294 return self.get(id=id)
295 except self.model.DoesNotExist:
296 return self.get(merged_uuids__uuid=id)
299 class TagsMixin(models.Model):
300 """ Methods for working with Tag objects """
302 tags = GenericRelation('Tag', related_query_name='tags')
304 class Meta:
305 abstract = True
308 class PodcastGroup(UUIDModel, TitleModel, SlugsMixin):
309 """ Groups multiple podcasts together """
311 @property
312 def scope(self):
313 """ A podcast group is always in the global scope """
314 return ''
316 def subscriber_count(self):
317 # this could be done directly in the DB
318 return sum([p.subscriber_count() for p in self.podcast_set.all()] + [0])
320 @property
321 def logo_url(self):
322 podcast = self.podcast_set.first()
323 podcast.logo_url
326 class PodcastQuerySet(MergedUUIDQuerySet):
327 """ Custom queries for Podcasts """
329 def random(self):
330 """ Random podcasts
332 Excludes podcasts with missing title to guarantee some
333 minimum quality of the results """
335 # Using PostgreSQL's RANDOM() is very expensive, so we're generating a
336 # random uuid and query podcasts with a higher ID
337 # This returns podcasts in order of their ID, but the assumption is
338 # that usually only one podcast will be required anyway
339 import uuid
340 ruuid = uuid.uuid1()
341 return self.exclude(title='').filter(id__gt=ruuid)
343 def flattr(self):
344 """ Podcasts providing Flattr information """
345 return self.exclude(flattr_url__isnull=True)
347 def license(self, license_url=None):
348 """ Podcasts with any / the given license """
349 if license_url:
350 return self.filter(license=license_url)
351 else:
352 return self.exclude(license__isnull=True)
354 def order_by_next_update(self):
355 """ Sort podcasts by next scheduled update """
356 NEXTUPDATE = "last_update + (update_interval || ' hours')::INTERVAL"
357 q = self.extra(select={'next_update': NEXTUPDATE})
358 return q.order_by('next_update')
360 def next_update_between(self, start, end):
361 NEXTUPDATE_BETWEEN = ("(last_update + (update_interval || "
362 "' hours')::INTERVAL) BETWEEN %s AND %s")
363 return self.extra(
364 where=[NEXTUPDATE_BETWEEN], params=[start, end]
367 def toplist(self, language=None):
368 toplist = self
369 if language:
370 toplist = toplist.filter(language=language)
372 return toplist.order_by('-subscribers')
375 class PodcastManager(GenericManager):
376 """ Manager for the Podcast model """
378 def get_queryset(self):
379 return PodcastQuerySet(self.model, using=self._db)
381 @transaction.atomic
382 def get_or_create_for_url(self, url, defaults={}):
383 # TODO: where to specify how uuid is created?
384 import uuid
385 defaults.update({
386 'id': uuid.uuid1(),
389 url = utils.to_maxlength(URL, 'url', url)
390 try:
391 # try to fetch the podcast
392 return Podcast.objects.get(urls__url=url,
393 urls__scope='',
395 except Podcast.DoesNotExist:
396 # episode did not exist, try to create it
397 try:
398 with transaction.atomic():
399 podcast = Podcast.objects.create(**defaults)
400 url = URL.objects.create(url=url,
401 order=0,
402 scope='',
403 content_object=podcast,
405 return podcast
407 # URL could not be created, so it was created since the first get
408 except IntegrityError:
409 return Podcast.objects.get(urls__url=url,
410 urls__scope='',
414 class Podcast(UUIDModel, TitleModel, DescriptionModel, LinkModel,
415 LanguageModel, LastUpdateModel, UpdateInfoModel, LicenseModel,
416 FlattrModel, ContentTypesModel, MergedIdsModel, OutdatedModel,
417 AuthorModel, UrlsMixin, SlugsMixin, TagsMixin, MergedUUIDsMixin,
418 TwitterModel, ):
419 """ A Podcast """
421 logo_url = models.URLField(null=True, max_length=1000)
422 group = models.ForeignKey(PodcastGroup, null=True,
423 on_delete=models.PROTECT)
424 group_member_name = models.CharField(max_length=30, null=True, blank=False)
426 # if p1 is related to p2, p2 is also related to p1
427 related_podcasts = models.ManyToManyField('self', symmetrical=True)
429 subscribers = models.PositiveIntegerField(default=0)
430 restrictions = models.CharField(max_length=20, null=False, blank=True,
431 default='')
432 common_episode_title = models.CharField(max_length=100, null=False, blank=True)
433 new_location = models.URLField(max_length=1000, null=True, blank=False)
434 latest_episode_timestamp = models.DateTimeField(null=True)
435 episode_count = models.PositiveIntegerField(default=0)
436 hub = models.URLField(null=True)
437 update_interval = models.PositiveSmallIntegerField(null=False,
438 default=DEFAULT_UPDATE_INTERVAL)
440 # "order" value of the most recent episode (will be the highest of all)
441 max_episode_order = models.PositiveIntegerField(null=True, default=None)
443 objects = PodcastManager()
445 def subscriber_count(self):
446 # TODO: implement
447 return self.subscribers
449 def group_with(self, other, grouptitle, myname, othername):
450 """ Group the podcast with another one """
451 # TODO: move to PodcastGroup?
453 if bool(self.group) and (self.group == other.group):
454 # they are already grouped
455 return
457 group1 = self.group
458 group2 = other.group
460 if group1 and group2:
461 raise ValueError('both podcasts already are in different groups')
463 elif not (group1 or group2):
464 # Form a new group
465 import uuid
466 group = PodcastGroup.objects.create(id=uuid.uuid1(), title=grouptitle)
467 self.group_member_name = myname
468 self.group = group
469 self.save()
471 other.group_member_name = othername
472 other.group = group
473 other.save()
475 return group
477 elif group1:
478 # add other to self's group
479 other.group_member_name = othername
480 other.group = group1
481 other.save()
482 return group1
484 else:
485 # add self to other's group
486 self.group_member_name = myname
487 self.group = group2
488 self.save()
489 return group2
491 def get_common_episode_title(self, num_episodes=100):
493 if self.common_episode_title:
494 return self.common_episode_title
496 episodes = self.episode_set.all()[:num_episodes]
498 # We take all non-empty titles
499 titles = filter(None, (e.title for e in episodes))
501 # there can not be a "common" title of a single title
502 if len(titles) < 2:
503 return None
505 # get the longest common substring
506 common_title = utils.longest_substr(titles)
508 # but consider only the part up to the first number. Otherwise we risk
509 # removing part of the number (eg if a feed contains episodes 100-199)
510 common_title = re.search(r'^\D*', common_title).group(0)
512 if len(common_title.strip()) < 2:
513 return None
515 return common_title
518 def get_episode_before(self, episode):
519 if not episode.released:
520 return None
521 return self.episode_set.filter(released__lt=episode.released).latest()
523 def get_episode_after(self, episode):
524 if not episode.released:
525 return None
526 return self.episode_set.filter(released__gt=episode.released).first()
528 @property
529 def scope(self):
530 """ A podcast is always in the global scope """
531 return ''
533 @property
534 def as_scope(self):
535 """ If models use this object as scope, they'll use this value """
536 return self.id.hex
538 @property
539 def display_title(self):
540 """ a title for display purposes """
541 if self.title:
542 return self.title
544 if not self.url:
545 logger.warn('Podcast with ID {podcast_id} does not have a URL'
546 .format(podcast_id=self.id.hex))
547 return _('Unknown Podcast')
549 return _('Unknown Podcast from {domain}'.format(
550 domain=utils.get_domain(self.url)))
553 class EpisodeQuerySet(MergedUUIDQuerySet):
554 """ QuerySet for Episodes """
556 def toplist(self, language=None):
557 toplist = self
558 if language:
559 toplist = toplist.filter(language=language)
561 return toplist.order_by('-listeners')
564 class EpisodeManager(GenericManager):
565 """ Custom queries for Episodes """
567 def get_queryset(self):
568 return EpisodeQuerySet(self.model, using=self._db)
570 def get_or_create_for_url(self, podcast, url, defaults={}):
571 """ Create an Episode for a given URL
573 This is the only place where new episodes are created """
575 # TODO: where to specify how uuid is created?
576 import uuid
578 url = utils.to_maxlength(URL, 'url', url)
580 try:
581 # try to fetch the episode
582 return Episode.objects.get(urls__url=url,
583 urls__scope=podcast.as_scope,
585 except Episode.DoesNotExist:
586 # episode did not exist, try to create it
587 try:
588 with transaction.atomic():
589 episode = Episode.objects.create(podcast=podcast,
590 id=uuid.uuid1(),
591 **defaults)
593 url = URL.objects.create(url=url,
594 order=0,
595 scope=episode.scope,
596 content_object=episode,
599 # Keep episode_count up to date here; it is not
600 # recalculated when updating the podcast because counting
601 # episodes can be very slow for podcasts with many episodes
602 Podcast.objects.filter(pk=podcast.pk)\
603 .update(episode_count=F('episode_count')+1)
605 return episode
607 # URL could not be created, so it was created since the first get
608 except IntegrityError:
609 return Episode.objects.get(urls__url=url,
610 urls__scope=podcast.as_scope,
614 class Episode(UUIDModel, TitleModel, DescriptionModel, LinkModel,
615 LanguageModel, LastUpdateModel, UpdateInfoModel, LicenseModel,
616 FlattrModel, ContentTypesModel, MergedIdsModel, OutdatedModel,
617 AuthorModel, UrlsMixin, SlugsMixin, MergedUUIDsMixin,
618 OptionallyOrderedModel):
619 """ An episode """
621 guid = models.CharField(max_length=200, null=True)
622 content = models.TextField()
623 released = models.DateTimeField(null=True, db_index=True)
624 duration = models.BigIntegerField(null=True)
625 filesize = models.BigIntegerField(null=True)
626 mimetypes = models.CharField(max_length=200)
627 podcast = models.ForeignKey(Podcast, on_delete=models.PROTECT)
628 listeners = models.PositiveIntegerField(null=True, db_index=True)
630 objects = EpisodeManager()
632 class Meta:
633 ordering = ['-order', '-released']
635 index_together = [
636 ('podcast', 'outdated', 'released'),
637 ('podcast', 'released'),
638 ('released', 'podcast'),
640 # index for typical episode toplist queries
641 ('language', 'listeners'),
643 ('podcast', 'order', 'released'),
646 @property
647 def scope(self):
648 """ An episode's scope is its podcast """
649 return self.podcast.id.hex
651 @property
652 def display_title(self):
653 # TODO: return basename of URL (see Podcast.display_title)
654 return self.title
656 def get_short_title(self, common_title):
657 """ Title when used within the podcast's context """
658 if not self.title or not common_title:
659 return None
661 title = self.title.replace(common_title, '').strip()
662 title = re.sub(r'^[\W\d]+', '', title)
663 return title
666 def get_episode_number(self, common_title):
667 """ Number of the episode """
668 if not self.title or not common_title:
669 return None
671 title = self.title.replace(common_title, '').strip()
672 match = re.search(r'^\W*(\d+)', title)
673 if not match:
674 return None
676 return int(match.group(1))
679 class ScopedModel(models.Model):
680 """ A model that belongs to some scope, usually for limited uniqueness
682 scope does not allow null values, because null is not equal to null in SQL.
683 It could therefore not be used in unique constraints. """
685 # A slug / URL is unique within a scope; no two podcasts can have the same
686 # URL (scope ''), and no two episdoes of the same podcast (scope =
687 # podcast-ID) can have the same URL
688 scope = models.CharField(max_length=32, null=False, blank=True,
689 db_index=True)
691 class Meta:
692 abstract = True
694 def get_default_scope(self):
695 """ Returns the default scope of the object """
696 raise NotImplementedError('{cls} should implement get_default_scope'
697 .format(cls=self.__class__.__name__))
700 class URL(OrderedModel, ScopedModel):
701 """ Podcasts and Episodes can have multiple URLs
703 URLs are ordered, and the first slug is considered the canonical one """
705 url = models.URLField(max_length=2048)
707 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
708 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
709 object_id = models.UUIDField()
710 content_object = generic.GenericForeignKey('content_type', 'object_id')
712 class Meta(OrderedModel.Meta):
713 unique_together = (
714 # a URL is unique per scope
715 ('url', 'scope'),
717 # URLs of an object must be ordered, so that no two slugs of one
718 # object have the same order key
719 ('content_type', 'object_id', 'order'),
722 verbose_name = 'URL'
723 verbose_name_plural = 'URLs'
725 def get_default_scope(self):
726 return self.content_object.scope
729 class Tag(models.Model):
730 """ Tags any kind of Model
732 See also :class:`TagsMixin`
735 FEED = 1
736 DELICIOUS = 2
737 USER = 4
739 SOURCE_CHOICES = (
740 (FEED, 'Feed'),
741 (DELICIOUS, 'delicious'),
742 (USER, 'User'),
745 tag = models.SlugField()
747 # indicates where the tag came from
748 source = models.PositiveSmallIntegerField(choices=SOURCE_CHOICES)
750 # the user that created the tag (if it was created by a user,
751 # null otherwise)
752 user = models.ForeignKey(settings.AUTH_USER_MODEL, null=True,
753 on_delete=models.CASCADE)
755 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
756 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
757 object_id = models.UUIDField()
758 content_object = generic.GenericForeignKey('content_type', 'object_id')
760 class Meta:
761 unique_together = (
762 # a tag can only be assigned once from one source to one item
763 ('tag', 'source', 'user', 'content_type', 'object_id'),
767 class Slug(OrderedModel, ScopedModel):
768 """ Slug for any kind of Model
770 Slugs are ordered, and the first slug is considered the canonical one.
771 See also :class:`SlugsMixin`
774 slug = models.SlugField(max_length=150, db_index=True)
776 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
777 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
778 object_id = models.UUIDField()
779 content_object = generic.GenericForeignKey('content_type', 'object_id')
781 class Meta(OrderedModel.Meta):
782 unique_together = (
783 # a slug is unique per type; eg a podcast can have the same slug
784 # as an episode, but no two podcasts can have the same slug
785 ('slug', 'scope'),
787 # slugs of an object must be ordered, so that no two slugs of one
788 # object have the same order key
789 ('content_type', 'object_id', 'order'),
792 index_together = [
793 ('slug', 'content_type')
796 def __repr__(self):
797 return '{cls}(slug={slug}, order={order}, content_object={obj}'.format(
798 cls=self.__class__.__name__,
799 slug=self.slug,
800 order=self.order,
801 obj=self.content_object
805 class MergedUUID(models.Model):
806 """ If objects are merged their UUIDs are stored for later reference
808 see also :class:`MergedUUIDsMixin`
811 uuid = models.UUIDField(unique=True)
813 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
814 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
815 object_id = models.UUIDField()
816 content_object = generic.GenericForeignKey('content_type', 'object_id')
818 class Meta:
819 verbose_name = 'Merged UUID'
820 verbose_name_plural = 'Merged UUIDs'