[Requirements] update to latest Pillow=3.0.0
[mygpo.git] / mygpo / podcasts / models.py
blob104a32dbb64465391d87bc05614e912ff37f4483
3 import re
4 from datetime import datetime
6 from django.conf import settings
7 from django.db import models, transaction, IntegrityError
8 from django.db.models import F
9 from django.utils.translation import ugettext as _
10 from django.contrib.contenttypes.models import ContentType
11 from django.contrib.contenttypes.fields import (GenericRelation,
12 GenericForeignKey)
14 from mygpo import utils
15 from mygpo.core.models import (TwitterModel, UUIDModel, GenericManager,
16 UpdateInfoModel, OrderedModel, OptionallyOrderedModel)
18 import logging
19 logger = logging.getLogger(__name__)
22 # default podcast update interval in hours
23 DEFAULT_UPDATE_INTERVAL = 7 * 24
25 # minium podcast update interval in hours
26 MIN_UPDATE_INTERVAL = 5
28 # every podcast should be updated at least once a month
29 MAX_UPDATE_INTERVAL = 24 * 30
32 class TitleModel(models.Model):
33 """ Model that has a title """
35 title = models.CharField(max_length=1000, null=False, blank=True,
36 db_index=True)
37 subtitle = models.TextField(null=False, blank=True)
39 def __str__(self):
40 return self.title
42 class Meta:
43 abstract = True
46 class DescriptionModel(models.Model):
47 """ Model that has a description """
49 description = models.TextField(null=False, blank=True)
51 class Meta:
52 abstract = True
55 class LinkModel(models.Model):
56 """ Model that has a link """
58 link = models.URLField(null=True, max_length=1000)
60 class Meta:
61 abstract = True
64 class LanguageModel(models.Model):
65 """ Model that has a language """
67 language = models.CharField(max_length=10, null=True, blank=False,
68 db_index=True)
70 class Meta:
71 abstract = True
74 class LastUpdateModel(models.Model):
75 """ Model with timestamp of last update from its source """
77 # date and time at which the model has last been updated from its source
78 # (eg a podcast feed). None means that the object has been created as a
79 # stub, without information from the source.
80 last_update = models.DateTimeField(null=True)
82 class Meta:
83 abstract = True
86 class LicenseModel(models.Model):
87 # URL to a license (usually Creative Commons)
88 license = models.CharField(max_length=100, null=True, blank=False,
89 db_index=True)
91 class Meta:
92 abstract = True
95 class FlattrModel(models.Model):
96 # A Flattr payment URL
97 flattr_url = models.URLField(null=True, blank=False, max_length=1000,
98 db_index=True)
100 class Meta:
101 abstract = True
104 class ContentTypesModel(models.Model):
105 # contains a comma-separated values of content types, eg 'audio,video'
106 content_types = models.CharField(max_length=20, null=False, blank=True)
108 class Meta:
109 abstract = True
112 class MergedIdsModel(models.Model):
114 class Meta:
115 abstract = True
118 class OutdatedModel(models.Model):
119 outdated = models.BooleanField(default=False, db_index=True)
121 class Meta:
122 abstract = True
125 class AuthorModel(models.Model):
126 author = models.CharField(max_length=350, null=True, blank=True)
128 class Meta:
129 abstract = True
132 class UrlsMixin(models.Model):
133 """ Methods for working with URL objects """
135 urls = GenericRelation('URL', related_query_name='urls')
137 class Meta:
138 abstract = True
140 @property
141 def url(self):
142 """ The main URL of the model """
143 # We could also use self.urls.first() here, but this would result in a
144 # different query and would render a .prefetch_related('urls') useless
145 # The assumption is that we will never have loads of URLS, so
146 # fetching all won't hurt
147 urls = list(self.urls.all())
148 return urls[0].url if urls else None
150 def add_missing_urls(self, new_urls):
151 """ Adds missing URLS from new_urls
153 The order of existing URLs is not changed """
154 existing_urls = self.urls.all()
155 next_order = max([-1] + [u.order for u in existing_urls]) + 1
156 existing_urls = [u.url for u in existing_urls]
158 for url in new_urls:
159 if url in existing_urls:
160 continue
162 try:
163 URL.objects.create(url=url,
164 order=next_order,
165 scope=self.scope,
166 content_object=self,
168 next_order += 1
169 except IntegrityError as ie:
170 err = str(ie)
171 logger.warn(u'Could not add URL: {0}'.format(err))
172 continue
174 def set_url(self, url):
175 """ Sets the canonical URL """
177 urls = [u.url for u in self.urls.all()]
178 if url in urls:
179 urls.remove(url)
181 urls.insert(0, url)
182 self.set_urls(urls)
184 def set_urls(self, urls):
185 """ Update the object's URLS to the given list
187 'urls' should be a list of strings. Slugs that do not exist are
188 created. Existing urls that are not in the 'urls' list are
189 deleted. """
190 urls = [utils.to_maxlength(URL, 'url', url) for url in urls]
191 existing = {u.url: u for u in self.urls.all()}
192 utils.set_ordered_entries(self, urls, existing, URL, 'url',
193 'content_object')
196 class SlugsMixin(models.Model):
197 """ Methods for working with Slug objects """
199 slugs = GenericRelation('Slug', related_query_name='slugs')
201 class Meta:
202 abstract = True
204 @property
205 def slug(self):
206 """ The main slug of the podcast
208 TODO: should be retrieved from a (materialized) view """
210 # We could also use self.slugs.first() here, but this would result in a
211 # different query and would render a .prefetch_related('slugs') useless
212 # The assumption is that we will never have loads of slugs, so
213 # fetching all won't hurt
214 slugs = list(self.slugs.all())
215 slug = slugs[0].slug if slugs else None
216 logger.debug('Found slugs %r, picking %r', slugs, slug)
217 return slug
220 def add_slug(self, slug):
221 """ Adds a (non-cannonical) slug """
223 if not slug:
224 raise ValueError("'%s' is not a valid slug" % slug)
226 existing_slugs = self.slugs.all()
228 # cut slug to the maximum allowed length
229 slug = utils.to_maxlength(Slug, 'slug', slug)
231 # check if slug already exists
232 if slug in [s.slug for s in existing_slugs]:
233 return
235 max_order = max([-1] + [s.order for s in existing_slugs])
236 next_order = max_order + 1
237 Slug.objects.create(scope=self.scope,
238 slug=slug,
239 content_object=self,
240 order=next_order,
243 def set_slug(self, slug):
244 """ Sets the canonical slug """
246 slugs = [s.slug for s in self.slugs.all()]
247 if slug in slugs:
248 slugs.remove(slug)
250 slugs.insert(0, slug)
251 self.set_slugs(slugs)
254 def remove_slug(self, slug):
255 """ Removes a slug """
256 Slug.objects.filter(
257 slug=slug,
258 content_type=ContentType.objects.get_for_model(self),
259 object_id=self.id,
260 ).delete()
263 def set_slugs(self, slugs):
264 """ Update the object's slugs to the given list
266 'slugs' should be a list of strings. Slugs that do not exist are
267 created. Existing slugs that are not in the 'slugs' list are
268 deleted. """
269 slugs = [utils.to_maxlength(Slug, 'slug', slug) for slug in slugs]
270 existing = {s.slug: s for s in self.slugs.all()}
271 utils.set_ordered_entries(self, slugs, existing, Slug, 'slug',
272 'content_object')
275 class MergedUUIDsMixin(models.Model):
276 """ Methods for working with MergedUUID objects """
278 merged_uuids = GenericRelation('MergedUUID',
279 related_query_name='merged_uuids')
281 class Meta:
282 abstract = True
285 class MergedUUIDQuerySet(models.QuerySet):
286 """ QuerySet for Models inheriting from MergedUUID """
288 def get_by_any_id(self, id):
289 """ Find am Episode by its own ID or by a merged ID """
290 # TODO: should this be done in the model?
291 try:
292 return self.get(id=id)
293 except self.model.DoesNotExist:
294 return self.get(merged_uuids__uuid=id)
297 class TagsMixin(models.Model):
298 """ Methods for working with Tag objects """
300 tags = GenericRelation('Tag', related_query_name='tags')
302 class Meta:
303 abstract = True
306 class PodcastGroup(UUIDModel, TitleModel, SlugsMixin):
307 """ Groups multiple podcasts together """
309 @property
310 def scope(self):
311 """ A podcast group is always in the global scope """
312 return ''
314 def subscriber_count(self):
315 # this could be done directly in the DB
316 return sum([p.subscriber_count() for p in self.podcast_set.all()] + [0])
318 @property
319 def logo_url(self):
320 podcast = self.podcast_set.first()
321 podcast.logo_url
324 class PodcastQuerySet(MergedUUIDQuerySet):
325 """ Custom queries for Podcasts """
327 def random(self):
328 """ Random podcasts
330 Excludes podcasts with missing title to guarantee some
331 minimum quality of the results """
333 # Using PostgreSQL's RANDOM() is very expensive, so we're generating a
334 # random uuid and query podcasts with a higher ID
335 # This returns podcasts in order of their ID, but the assumption is
336 # that usually only one podcast will be required anyway
337 import uuid
338 ruuid = uuid.uuid1()
339 return self.exclude(title='').filter(id__gt=ruuid)
341 def flattr(self):
342 """ Podcasts providing Flattr information """
343 return self.exclude(flattr_url__isnull=True)
345 def license(self, license_url=None):
346 """ Podcasts with any / the given license """
347 if license_url:
348 return self.filter(license=license_url)
349 else:
350 return self.exclude(license__isnull=True)
352 def order_by_next_update(self):
353 """ Sort podcasts by next scheduled update """
354 NEXTUPDATE = "last_update + (update_interval || ' hours')::INTERVAL"
355 q = self.extra(select={'next_update': NEXTUPDATE})
356 return q.order_by('next_update')
358 def next_update_between(self, start, end):
359 NEXTUPDATE_BETWEEN = ("(last_update + (update_interval || "
360 "' hours')::INTERVAL) BETWEEN %s AND %s")
361 return self.extra(
362 where=[NEXTUPDATE_BETWEEN], params=[start, end]
365 def toplist(self, language=None):
366 toplist = self
367 if language:
368 toplist = toplist.filter(language=language)
370 return toplist.order_by('-subscribers')
373 class PodcastManager(GenericManager):
374 """ Manager for the Podcast model """
376 def get_queryset(self):
377 return PodcastQuerySet(self.model, using=self._db)
379 @transaction.atomic
380 def get_or_create_for_url(self, url, defaults={}):
382 if not url:
383 raise ValueError('The URL must not be empty')
385 # TODO: where to specify how uuid is created?
386 import uuid
387 defaults.update({
388 'id': uuid.uuid1(),
391 url = utils.to_maxlength(URL, 'url', url)
392 try:
393 # try to fetch the podcast
394 return Podcast.objects.get(urls__url=url,
395 urls__scope='',
397 except Podcast.DoesNotExist:
398 # episode did not exist, try to create it
399 try:
400 with transaction.atomic():
401 podcast = Podcast.objects.create(**defaults)
402 url = URL.objects.create(url=url,
403 order=0,
404 scope='',
405 content_object=podcast,
407 return podcast
409 # URL could not be created, so it was created since the first get
410 except IntegrityError:
411 return Podcast.objects.get(urls__url=url,
412 urls__scope='',
416 class Podcast(UUIDModel, TitleModel, DescriptionModel, LinkModel,
417 LanguageModel, LastUpdateModel, UpdateInfoModel, LicenseModel,
418 FlattrModel, ContentTypesModel, MergedIdsModel, OutdatedModel,
419 AuthorModel, UrlsMixin, SlugsMixin, TagsMixin, MergedUUIDsMixin,
420 TwitterModel, ):
421 """ A Podcast """
423 logo_url = models.URLField(null=True, max_length=1000)
424 group = models.ForeignKey(PodcastGroup, null=True,
425 on_delete=models.PROTECT)
426 group_member_name = models.CharField(max_length=30, null=True, blank=False)
428 # if p1 is related to p2, p2 is also related to p1
429 related_podcasts = models.ManyToManyField('self', symmetrical=True)
431 subscribers = models.PositiveIntegerField(default=0)
432 restrictions = models.CharField(max_length=20, null=False, blank=True,
433 default='')
434 common_episode_title = models.CharField(max_length=100, null=False, blank=True)
435 new_location = models.URLField(max_length=1000, null=True, blank=False)
436 latest_episode_timestamp = models.DateTimeField(null=True)
437 episode_count = models.PositiveIntegerField(default=0)
438 hub = models.URLField(null=True)
439 update_interval = models.PositiveSmallIntegerField(null=False,
440 default=DEFAULT_UPDATE_INTERVAL)
442 # "order" value of the most recent episode (will be the highest of all)
443 max_episode_order = models.PositiveIntegerField(null=True, default=None)
445 objects = PodcastManager()
447 def subscriber_count(self):
448 # TODO: implement
449 return self.subscribers
451 def group_with(self, other, grouptitle, myname, othername):
452 """ Group the podcast with another one """
453 # TODO: move to PodcastGroup?
455 if bool(self.group) and (self.group == other.group):
456 # they are already grouped
457 return
459 group1 = self.group
460 group2 = other.group
462 if group1 and group2:
463 raise ValueError('both podcasts already are in different groups')
465 elif not (group1 or group2):
466 # Form a new group
467 import uuid
468 group = PodcastGroup.objects.create(id=uuid.uuid1(), title=grouptitle)
469 self.group_member_name = myname
470 self.group = group
471 self.save()
473 other.group_member_name = othername
474 other.group = group
475 other.save()
477 return group
479 elif group1:
480 # add other to self's group
481 other.group_member_name = othername
482 other.group = group1
483 other.save()
484 return group1
486 else:
487 # add self to other's group
488 self.group_member_name = myname
489 self.group = group2
490 self.save()
491 return group2
493 def get_common_episode_title(self, num_episodes=100):
495 if self.common_episode_title:
496 return self.common_episode_title
498 episodes = self.episode_set.all()[:num_episodes]
500 # We take all non-empty titles
501 titles = [_f for _f in (e.title for e in episodes) if _f]
503 # there can not be a "common" title of a single title
504 if len(titles) < 2:
505 return None
507 # get the longest common substring
508 common_title = utils.longest_substr(titles)
510 # but consider only the part up to the first number. Otherwise we risk
511 # removing part of the number (eg if a feed contains episodes 100-199)
512 common_title = re.search(r'^\D*', common_title).group(0)
514 if len(common_title.strip()) < 2:
515 return None
517 return common_title
520 def get_episode_before(self, episode):
521 if not episode.released:
522 return None
523 return self.episode_set.filter(released__lt=episode.released).latest()
525 def get_episode_after(self, episode):
526 if not episode.released:
527 return None
528 return self.episode_set.filter(released__gt=episode.released).first()
530 @property
531 def scope(self):
532 """ A podcast is always in the global scope """
533 return ''
535 @property
536 def as_scope(self):
537 """ If models use this object as scope, they'll use this value """
538 return self.id.hex
540 @property
541 def display_title(self):
542 """ a title for display purposes """
543 if self.title:
544 return self.title
546 if not self.url:
547 logger.warn('Podcast with ID {podcast_id} does not have a URL'
548 .format(podcast_id=self.id))
549 return _('Unknown Podcast')
551 return _('Unknown Podcast from {domain}'.format(
552 domain=utils.get_domain(self.url)))
555 class EpisodeQuerySet(MergedUUIDQuerySet):
556 """ QuerySet for Episodes """
558 def toplist(self, language=None):
559 toplist = self
560 if language:
561 toplist = toplist.filter(language=language)
563 return toplist.order_by('-listeners')
566 class EpisodeManager(GenericManager):
567 """ Custom queries for Episodes """
569 def get_queryset(self):
570 return EpisodeQuerySet(self.model, using=self._db)
572 def get_or_create_for_url(self, podcast, url, defaults={}):
573 """ Create an Episode for a given URL
575 This is the only place where new episodes are created """
577 if not url:
578 raise ValueError('The URL must not be empty')
580 # TODO: where to specify how uuid is created?
581 import uuid
583 url = utils.to_maxlength(URL, 'url', url)
585 try:
586 # try to fetch the episode
587 return Episode.objects.get(urls__url=url,
588 urls__scope=podcast.as_scope,
590 except Episode.DoesNotExist:
591 # episode did not exist, try to create it
592 try:
593 with transaction.atomic():
594 episode = Episode.objects.create(podcast=podcast,
595 id=uuid.uuid1(),
596 **defaults)
598 url = URL.objects.create(url=url,
599 order=0,
600 scope=episode.scope,
601 content_object=episode,
604 # Keep episode_count up to date here; it is not
605 # recalculated when updating the podcast because counting
606 # episodes can be very slow for podcasts with many episodes
607 Podcast.objects.filter(pk=podcast.pk)\
608 .update(episode_count=F('episode_count')+1)
610 return episode
612 # URL could not be created, so it was created since the first get
613 except IntegrityError:
614 return Episode.objects.get(urls__url=url,
615 urls__scope=podcast.as_scope,
619 class Episode(UUIDModel, TitleModel, DescriptionModel, LinkModel,
620 LanguageModel, LastUpdateModel, UpdateInfoModel, LicenseModel,
621 FlattrModel, ContentTypesModel, MergedIdsModel, OutdatedModel,
622 AuthorModel, UrlsMixin, SlugsMixin, MergedUUIDsMixin,
623 OptionallyOrderedModel):
624 """ An episode """
626 guid = models.CharField(max_length=200, null=True)
627 content = models.TextField()
628 released = models.DateTimeField(null=True, db_index=True)
629 duration = models.BigIntegerField(null=True)
630 filesize = models.BigIntegerField(null=True)
631 mimetypes = models.CharField(max_length=200)
632 podcast = models.ForeignKey(Podcast, on_delete=models.PROTECT)
633 listeners = models.PositiveIntegerField(null=True, db_index=True)
635 objects = EpisodeManager()
637 class Meta:
638 ordering = ['-order', '-released']
640 index_together = [
641 ('podcast', 'outdated', 'released'),
642 ('podcast', 'released'),
643 ('released', 'podcast'),
645 # index for typical episode toplist queries
646 ('language', 'listeners'),
648 ('podcast', 'order', 'released'),
651 @property
652 def scope(self):
653 """ An episode's scope is its podcast """
654 return self.podcast.id.hex
656 @property
657 def display_title(self):
658 # TODO: return basename of URL (see Podcast.display_title)
659 return self.title
661 def get_short_title(self, common_title):
662 """ Title when used within the podcast's context """
663 if not self.title or not common_title:
664 return None
666 title = self.title.replace(common_title, '').strip()
667 title = re.sub(r'^[\W\d]+', '', title)
668 return title
671 def get_episode_number(self, common_title):
672 """ Number of the episode """
673 if not self.title or not common_title:
674 return None
676 title = self.title.replace(common_title, '').strip()
677 match = re.search(r'^\W*(\d+)', title)
678 if not match:
679 return None
681 return int(match.group(1))
684 class ScopedModel(models.Model):
685 """ A model that belongs to some scope, usually for limited uniqueness
687 scope does not allow null values, because null is not equal to null in SQL.
688 It could therefore not be used in unique constraints. """
690 # A slug / URL is unique within a scope; no two podcasts can have the same
691 # URL (scope ''), and no two episdoes of the same podcast (scope =
692 # podcast-ID) can have the same URL
693 scope = models.CharField(max_length=32, null=False, blank=True,
694 db_index=True)
696 class Meta:
697 abstract = True
699 def get_default_scope(self):
700 """ Returns the default scope of the object """
701 raise NotImplementedError('{cls} should implement get_default_scope'
702 .format(cls=self.__class__.__name__))
705 class URL(OrderedModel, ScopedModel):
706 """ Podcasts and Episodes can have multiple URLs
708 URLs are ordered, and the first slug is considered the canonical one """
710 url = models.URLField(max_length=2048)
712 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
713 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
714 object_id = models.UUIDField()
715 content_object = GenericForeignKey('content_type', 'object_id')
717 class Meta(OrderedModel.Meta):
718 unique_together = (
719 # a URL is unique per scope
720 ('url', 'scope'),
722 # URLs of an object must be ordered, so that no two slugs of one
723 # object have the same order key
724 ('content_type', 'object_id', 'order'),
727 verbose_name = 'URL'
728 verbose_name_plural = 'URLs'
730 def get_default_scope(self):
731 return self.content_object.scope
734 class Tag(models.Model):
735 """ Tags any kind of Model
737 See also :class:`TagsMixin`
740 FEED = 1
741 DELICIOUS = 2
742 USER = 4
744 SOURCE_CHOICES = (
745 (FEED, 'Feed'),
746 (DELICIOUS, 'delicious'),
747 (USER, 'User'),
750 tag = models.SlugField()
752 # indicates where the tag came from
753 source = models.PositiveSmallIntegerField(choices=SOURCE_CHOICES)
755 # the user that created the tag (if it was created by a user,
756 # null otherwise)
757 user = models.ForeignKey(settings.AUTH_USER_MODEL, null=True,
758 on_delete=models.CASCADE)
760 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
761 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
762 object_id = models.UUIDField()
763 content_object = GenericForeignKey('content_type', 'object_id')
765 class Meta:
766 unique_together = (
767 # a tag can only be assigned once from one source to one item
768 ('tag', 'source', 'user', 'content_type', 'object_id'),
772 class Slug(OrderedModel, ScopedModel):
773 """ Slug for any kind of Model
775 Slugs are ordered, and the first slug is considered the canonical one.
776 See also :class:`SlugsMixin`
779 slug = models.SlugField(max_length=150, db_index=True)
781 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
782 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
783 object_id = models.UUIDField()
784 content_object = GenericForeignKey('content_type', 'object_id')
786 class Meta(OrderedModel.Meta):
787 unique_together = (
788 # a slug is unique per type; eg a podcast can have the same slug
789 # as an episode, but no two podcasts can have the same slug
790 ('slug', 'scope'),
792 # slugs of an object must be ordered, so that no two slugs of one
793 # object have the same order key
794 ('content_type', 'object_id', 'order'),
797 index_together = [
798 ('slug', 'content_type')
801 def __repr__(self):
802 return '{cls}(slug={slug}, order={order}, content_object={obj}'.format(
803 cls=self.__class__.__name__,
804 slug=self.slug,
805 order=self.order,
806 obj=self.content_object
810 class MergedUUID(models.Model):
811 """ If objects are merged their UUIDs are stored for later reference
813 see also :class:`MergedUUIDsMixin`
816 uuid = models.UUIDField(unique=True)
818 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
819 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
820 object_id = models.UUIDField()
821 content_object = GenericForeignKey('content_type', 'object_id')
823 class Meta:
824 verbose_name = 'Merged UUID'
825 verbose_name_plural = 'Merged UUIDs'