[Directory] remove unused imports
[mygpo.git] / mygpo / podcasts / models.py
blob79ff29657c4adce4ebb5dd33e0f88fd761560d53
1 from __future__ import unicode_literals
3 import re
4 from datetime import datetime
6 from django.conf import settings
7 from django.db import models, transaction, IntegrityError
8 from django.db.models import F
9 from django.utils.translation import ugettext as _
10 from django.contrib.contenttypes.models import ContentType
11 from django.contrib.contenttypes.fields import GenericRelation
12 from django.contrib.contenttypes import generic
14 from uuidfield import UUIDField
16 from mygpo import utils
17 from mygpo.core.models import (TwitterModel, UUIDModel, GenericManager,
18 UpdateInfoModel, OrderedModel, OptionallyOrderedModel)
20 import logging
21 logger = logging.getLogger(__name__)
24 # default podcast update interval in hours
25 DEFAULT_UPDATE_INTERVAL = 7 * 24
27 # minium podcast update interval in hours
28 MIN_UPDATE_INTERVAL = 5
30 # every podcast should be updated at least once a month
31 MAX_UPDATE_INTERVAL = 24 * 30
34 class TitleModel(models.Model):
35 """ Model that has a title """
37 title = models.CharField(max_length=1000, null=False, blank=True,
38 db_index=True)
39 subtitle = models.TextField(null=False, blank=True)
41 def __str__(self):
42 return self.title.encode('ascii', errors='replace')
44 def __unicode(self):
45 return self.title
47 class Meta:
48 abstract = True
51 class DescriptionModel(models.Model):
52 """ Model that has a description """
54 description = models.TextField(null=False, blank=True)
56 class Meta:
57 abstract = True
60 class LinkModel(models.Model):
61 """ Model that has a link """
63 link = models.URLField(null=True, max_length=1000)
65 class Meta:
66 abstract = True
69 class LanguageModel(models.Model):
70 """ Model that has a language """
72 language = models.CharField(max_length=10, null=True, blank=False,
73 db_index=True)
75 class Meta:
76 abstract = True
79 class LastUpdateModel(models.Model):
80 """ Model with timestamp of last update from its source """
82 # date and time at which the model has last been updated from its source
83 # (eg a podcast feed). None means that the object has been created as a
84 # stub, without information from the source.
85 last_update = models.DateTimeField(null=True)
87 class Meta:
88 abstract = True
91 class LicenseModel(models.Model):
92 # URL to a license (usually Creative Commons)
93 license = models.CharField(max_length=100, null=True, blank=False,
94 db_index=True)
96 class Meta:
97 abstract = True
100 class FlattrModel(models.Model):
101 # A Flattr payment URL
102 flattr_url = models.URLField(null=True, blank=False, max_length=1000,
103 db_index=True)
105 class Meta:
106 abstract = True
109 class ContentTypesModel(models.Model):
110 # contains a comma-separated values of content types, eg 'audio,video'
111 content_types = models.CharField(max_length=20, null=False, blank=True)
113 class Meta:
114 abstract = True
117 class MergedIdsModel(models.Model):
119 class Meta:
120 abstract = True
123 class OutdatedModel(models.Model):
124 outdated = models.BooleanField(default=False, db_index=True)
126 class Meta:
127 abstract = True
130 class AuthorModel(models.Model):
131 author = models.CharField(max_length=350, null=True, blank=True)
133 class Meta:
134 abstract = True
137 class UrlsMixin(models.Model):
138 """ Methods for working with URL objects """
140 urls = GenericRelation('URL', related_query_name='urls')
142 class Meta:
143 abstract = True
145 @property
146 def url(self):
147 """ The main URL of the model """
148 # We could also use self.urls.first() here, but this would result in a
149 # different query and would render a .prefetch_related('urls') useless
150 # The assumption is that we will never have loads of URLS, so
151 # fetching all won't hurt
152 urls = list(self.urls.all())
153 return urls[0].url if urls else None
155 def add_missing_urls(self, new_urls):
156 """ Adds missing URLS from new_urls
158 The order of existing URLs is not changed """
159 existing_urls = self.urls.all()
160 next_order = max([-1] + [u.order for u in existing_urls]) + 1
161 existing_urls = [u.url for u in existing_urls]
163 for url in new_urls:
164 if url in existing_urls:
165 continue
167 try:
168 URL.objects.create(url=url,
169 order=next_order,
170 scope=self.scope,
171 content_object=self,
173 next_order += 1
174 except IntegrityError as ie:
175 logger.warn('Could not add URL: {err}'.format(err=ie))
176 continue
178 def set_url(self, url):
179 """ Sets the canonical URL """
181 urls = [u.url for u in self.urls.all()]
182 if url in urls:
183 urls.remove(url)
185 urls.insert(0, url)
186 self.set_urls(urls)
188 def set_urls(self, urls):
189 """ Update the object's URLS to the given list
191 'urls' should be a list of strings. Slugs that do not exist are
192 created. Existing urls that are not in the 'urls' list are
193 deleted. """
194 urls = [utils.to_maxlength(URL, 'url', url) for url in urls]
195 existing = {u.url: u for u in self.urls.all()}
196 utils.set_ordered_entries(self, urls, existing, URL, 'url',
197 'content_object')
200 class SlugsMixin(models.Model):
201 """ Methods for working with Slug objects """
203 slugs = GenericRelation('Slug', related_query_name='slugs')
205 class Meta:
206 abstract = True
208 @property
209 def slug(self):
210 """ The main slug of the podcast
212 TODO: should be retrieved from a (materialized) view """
214 # We could also use self.slugs.first() here, but this would result in a
215 # different query and would render a .prefetch_related('slugs') useless
216 # The assumption is that we will never have loads of slugs, so
217 # fetching all won't hurt
218 slugs = list(self.slugs.all())
219 slug = slugs[0].slug if slugs else None
220 logger.debug('Found slugs %r, picking %r', slugs, slug)
221 return slug
224 def add_slug(self, slug):
225 """ Adds a (non-cannonical) slug """
227 if not slug:
228 raise ValueError("'%s' is not a valid slug" % slug)
230 existing_slugs = self.slugs.all()
232 # cut slug to the maximum allowed length
233 slug = utils.to_maxlength(Slug, 'slug', slug)
235 # check if slug already exists
236 if slug in [s.slug for s in existing_slugs]:
237 return
239 max_order = max([-1] + [s.order for s in existing_slugs])
240 next_order = max_order + 1
241 Slug.objects.create(scope=self.scope,
242 slug=slug,
243 content_object=self,
244 order=next_order,
247 def set_slug(self, slug):
248 """ Sets the canonical slug """
250 slugs = [s.slug for s in self.slugs.all()]
251 if slug in slugs:
252 slugs.remove(slug)
254 slugs.insert(0, slug)
255 self.set_slugs(slugs)
258 def remove_slug(self, slug):
259 """ Removes a slug """
260 Slug.objects.filter(
261 slug=slug,
262 content_type=ContentType.objects.get_for_model(self),
263 object_id=self.id,
264 ).delete()
267 def set_slugs(self, slugs):
268 """ Update the object's slugs to the given list
270 'slugs' should be a list of strings. Slugs that do not exist are
271 created. Existing slugs that are not in the 'slugs' list are
272 deleted. """
273 slugs = [utils.to_maxlength(Slug, 'slug', slug) for slug in slugs]
274 existing = {s.slug: s for s in self.slugs.all()}
275 utils.set_ordered_entries(self, slugs, existing, Slug, 'slug',
276 'content_object')
279 class MergedUUIDsMixin(models.Model):
280 """ Methods for working with MergedUUID objects """
282 merged_uuids = GenericRelation('MergedUUID',
283 related_query_name='merged_uuids')
285 class Meta:
286 abstract = True
289 class MergedUUIDQuerySet(models.QuerySet):
290 """ QuerySet for Models inheriting from MergedUUID """
292 def get_by_any_id(self, id):
293 """ Find am Episode by its own ID or by a merged ID """
294 # TODO: should this be done in the model?
295 try:
296 return self.get(id=id)
297 except self.model.DoesNotExist:
298 return self.get(merged_uuids__uuid=id)
301 class TagsMixin(models.Model):
302 """ Methods for working with Tag objects """
304 tags = GenericRelation('Tag', related_query_name='tags')
306 class Meta:
307 abstract = True
310 class PodcastGroup(UUIDModel, TitleModel, SlugsMixin):
311 """ Groups multiple podcasts together """
313 @property
314 def scope(self):
315 """ A podcast group is always in the global scope """
316 return ''
318 def subscriber_count(self):
319 # this could be done directly in the DB
320 return sum([p.subscriber_count() for p in self.podcast_set.all()] + [0])
322 @property
323 def logo_url(self):
324 podcast = self.podcast_set.first()
325 podcast.logo_url
328 class PodcastQuerySet(MergedUUIDQuerySet):
329 """ Custom queries for Podcasts """
331 def random(self):
332 """ Random podcasts
334 Excludes podcasts with missing title to guarantee some
335 minimum quality of the results """
337 # Using PostgreSQL's RANDOM() is very expensive, so we're generating a
338 # random uuid and query podcasts with a higher ID
339 # This returns podcasts in order of their ID, but the assumption is
340 # that usually only one podcast will be required anyway
341 import uuid
342 ruuid = uuid.uuid1()
343 return self.exclude(title='').filter(id__gt=ruuid)
345 def flattr(self):
346 """ Podcasts providing Flattr information """
347 return self.exclude(flattr_url__isnull=True)
349 def license(self, license_url=None):
350 """ Podcasts with any / the given license """
351 if license_url:
352 return self.filter(license=license_url)
353 else:
354 return self.exclude(license__isnull=True)
356 def order_by_next_update(self):
357 """ Sort podcasts by next scheduled update """
358 NEXTUPDATE = "last_update + (update_interval || ' hours')::INTERVAL"
359 q = self.extra(select={'next_update': NEXTUPDATE})
360 return q.order_by('next_update')
362 def next_update_between(self, start, end):
363 NEXTUPDATE_BETWEEN = ("(last_update + (update_interval || "
364 "' hours')::INTERVAL) BETWEEN %s AND %s")
365 return self.extra(
366 where=[NEXTUPDATE_BETWEEN], params=[start, end]
369 def toplist(self, language=None):
370 toplist = self
371 if language:
372 toplist = toplist.filter(language=language)
374 return toplist.order_by('-subscribers')
377 class PodcastManager(GenericManager):
378 """ Manager for the Podcast model """
380 def get_queryset(self):
381 return PodcastQuerySet(self.model, using=self._db)
383 @transaction.atomic
384 def get_or_create_for_url(self, url, defaults={}):
385 # TODO: where to specify how uuid is created?
386 import uuid
387 defaults.update({
388 'id': uuid.uuid1().hex,
391 url = utils.to_maxlength(URL, 'url', url)
392 try:
393 # try to fetch the podcast
394 return Podcast.objects.get(urls__url=url,
395 urls__scope='',
397 except Podcast.DoesNotExist:
398 # episode did not exist, try to create it
399 try:
400 with transaction.atomic():
401 podcast = Podcast.objects.create(**defaults)
402 url = URL.objects.create(url=url,
403 order=0,
404 scope='',
405 content_object=podcast,
407 return podcast
409 # URL could not be created, so it was created since the first get
410 except IntegrityError:
411 return Podcast.objects.get(urls__url=url,
412 urls__scope='',
416 class Podcast(UUIDModel, TitleModel, DescriptionModel, LinkModel,
417 LanguageModel, LastUpdateModel, UpdateInfoModel, LicenseModel,
418 FlattrModel, ContentTypesModel, MergedIdsModel, OutdatedModel,
419 AuthorModel, UrlsMixin, SlugsMixin, TagsMixin, MergedUUIDsMixin,
420 TwitterModel, ):
421 """ A Podcast """
423 logo_url = models.URLField(null=True, max_length=1000)
424 group = models.ForeignKey(PodcastGroup, null=True,
425 on_delete=models.PROTECT)
426 group_member_name = models.CharField(max_length=30, null=True, blank=False)
428 # if p1 is related to p2, p2 is also related to p1
429 related_podcasts = models.ManyToManyField('self', symmetrical=True)
431 subscribers = models.PositiveIntegerField(default=0)
432 restrictions = models.CharField(max_length=20, null=False, blank=True,
433 default='')
434 common_episode_title = models.CharField(max_length=100, null=False, blank=True)
435 new_location = models.URLField(max_length=1000, null=True, blank=False)
436 latest_episode_timestamp = models.DateTimeField(null=True)
437 episode_count = models.PositiveIntegerField(default=0)
438 hub = models.URLField(null=True)
439 update_interval = models.PositiveSmallIntegerField(null=False,
440 default=DEFAULT_UPDATE_INTERVAL)
442 # "order" value of the most recent episode (will be the highest of all)
443 max_episode_order = models.PositiveIntegerField(null=True, default=None)
445 objects = PodcastManager()
447 def subscriber_count(self):
448 # TODO: implement
449 return self.subscribers
451 def group_with(self, other, grouptitle, myname, othername):
452 """ Group the podcast with another one """
453 # TODO: move to PodcastGroup?
455 if bool(self.group) and (self.group == other.group):
456 # they are already grouped
457 return
459 group1 = self.group
460 group2 = other.group
462 if group1 and group2:
463 raise ValueError('both podcasts already are in different groups')
465 elif not (group1 or group2):
466 # Form a new group
467 import uuid
468 group = PodcastGroup.objects.create(id=uuid.uuid1(), title=grouptitle)
469 self.group_member_name = myname
470 self.group = group
471 self.save()
473 other.group_member_name = othername
474 other.group = group
475 other.save()
477 return group
479 elif group1:
480 # add other to self's group
481 other.group_member_name = othername
482 other.group = group1
483 other.save()
484 return group1
486 else:
487 # add self to other's group
488 self.group_member_name = myname
489 self.group = group2
490 self.save()
491 return group2
493 def get_common_episode_title(self, num_episodes=100):
495 if self.common_episode_title:
496 return self.common_episode_title
498 episodes = self.episode_set.all()[:num_episodes]
500 # We take all non-empty titles
501 titles = filter(None, (e.title for e in episodes))
503 # there can not be a "common" title of a single title
504 if len(titles) < 2:
505 return None
507 # get the longest common substring
508 common_title = utils.longest_substr(titles)
510 # but consider only the part up to the first number. Otherwise we risk
511 # removing part of the number (eg if a feed contains episodes 100-199)
512 common_title = re.search(r'^\D*', common_title).group(0)
514 if len(common_title.strip()) < 2:
515 return None
517 return common_title
520 def get_episode_before(self, episode):
521 if not episode.released:
522 return None
523 return self.episode_set.filter(released__lt=episode.released).latest()
525 def get_episode_after(self, episode):
526 if not episode.released:
527 return None
528 return self.episode_set.filter(released__gt=episode.released).first()
530 @property
531 def scope(self):
532 """ A podcast is always in the global scope """
533 return ''
535 @property
536 def as_scope(self):
537 """ If models use this object as scope, they'll use this value """
538 return self.id.hex
540 @property
541 def display_title(self):
542 """ a title for display purposes """
543 if self.title:
544 return self.title
546 if not self.url:
547 logger.warn('Podcast with ID {podcast_id} does not have a URL'
548 .format(podcast_id=self.id.hex))
549 return _('Unknown Podcast')
551 return _('Unknown Podcast from {domain}'.format(
552 domain=utils.get_domain(self.url)))
555 class EpisodeQuerySet(MergedUUIDQuerySet):
556 """ QuerySet for Episodes """
558 def toplist(self, language=None):
559 toplist = self
560 if language:
561 toplist = toplist.filter(language=language)
563 return toplist.order_by('-listeners')
566 class EpisodeManager(GenericManager):
567 """ Custom queries for Episodes """
569 def get_queryset(self):
570 return EpisodeQuerySet(self.model, using=self._db)
572 def get_or_create_for_url(self, podcast, url, defaults={}):
573 """ Create an Episode for a given URL
575 This is the only place where new episodes are created """
577 # TODO: where to specify how uuid is created?
578 import uuid
580 try:
581 # try to fetch the episode
582 return Episode.objects.get(urls__url=url,
583 urls__scope=podcast.as_scope,
585 except Episode.DoesNotExist:
586 # episode did not exist, try to create it
587 try:
588 with transaction.atomic():
589 episode = Episode.objects.create(podcast=podcast,
590 id=uuid.uuid1().hex,
591 **defaults)
593 url = URL.objects.create(url=url,
594 order=0,
595 scope=episode.scope,
596 content_object=episode,
599 # Keep episode_count up to date here; it is not
600 # recalculated when updating the podcast because counting
601 # episodes can be very slow for podcasts with many episodes
602 Podcast.objects.filter(pk=podcast.pk)\
603 .update(episode_count=F('episode_count')+1)
605 return episode
607 # URL could not be created, so it was created since the first get
608 except IntegrityError:
609 return Episode.objects.get(urls__url=url,
610 urls__scope=podcast.as_scope,
614 class Episode(UUIDModel, TitleModel, DescriptionModel, LinkModel,
615 LanguageModel, LastUpdateModel, UpdateInfoModel, LicenseModel,
616 FlattrModel, ContentTypesModel, MergedIdsModel, OutdatedModel,
617 AuthorModel, UrlsMixin, SlugsMixin, MergedUUIDsMixin,
618 OptionallyOrderedModel):
619 """ An episode """
621 guid = models.CharField(max_length=200, null=True)
622 content = models.TextField()
623 released = models.DateTimeField(null=True, db_index=True)
624 duration = models.BigIntegerField(null=True)
625 filesize = models.BigIntegerField(null=True)
626 mimetypes = models.CharField(max_length=200)
627 podcast = models.ForeignKey(Podcast, on_delete=models.PROTECT)
628 listeners = models.PositiveIntegerField(null=True, db_index=True)
630 objects = EpisodeManager()
632 class Meta:
633 ordering = ['-order', '-released']
635 index_together = [
636 ('podcast', 'outdated', 'released'),
637 ('podcast', 'released'),
638 ('released', 'podcast'),
640 # index for typical episode toplist queries
641 ('language', 'listeners'),
643 ('podcast', 'order', 'released'),
646 @property
647 def scope(self):
648 """ An episode's scope is its podcast """
649 return self.podcast_id.hex
651 @property
652 def display_title(self):
653 # TODO: return basename of URL (see Podcast.display_title)
654 return self.title
656 def get_short_title(self, common_title):
657 """ Title when used within the podcast's context """
658 if not self.title or not common_title:
659 return None
661 title = self.title.replace(common_title, '').strip()
662 title = re.sub(r'^[\W\d]+', '', title)
663 return title
666 def get_episode_number(self, common_title):
667 """ Number of the episode """
668 if not self.title or not common_title:
669 return None
671 title = self.title.replace(common_title, '').strip()
672 match = re.search(r'^\W*(\d+)', title)
673 if not match:
674 return None
676 return int(match.group(1))
679 class ScopedModel(models.Model):
680 """ A model that belongs to some scope, usually for limited uniqueness
682 scope does not allow null values, because null is not equal to null in SQL.
683 It could therefore not be used in unique constraints. """
685 # A slug / URL is unique within a scope; no two podcasts can have the same
686 # URL (scope ''), and no two episdoes of the same podcast (scope =
687 # podcast-ID) can have the same URL
688 scope = models.CharField(max_length=32, null=False, blank=True,
689 db_index=True)
691 class Meta:
692 abstract = True
694 def get_default_scope(self):
695 """ Returns the default scope of the object """
696 raise NotImplementedError('{cls} should implement get_default_scope'
697 .format(cls=self.__class__.__name__))
700 class URL(OrderedModel, ScopedModel):
701 """ Podcasts and Episodes can have multiple URLs
703 URLs are ordered, and the first slug is considered the canonical one """
705 url = models.URLField(max_length=2048)
707 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
708 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
709 object_id = UUIDField()
710 content_object = generic.GenericForeignKey('content_type', 'object_id')
712 class Meta(OrderedModel.Meta):
713 unique_together = (
714 # a URL is unique per scope
715 ('url', 'scope'),
717 # URLs of an object must be ordered, so that no two slugs of one
718 # object have the same order key
719 ('content_type', 'object_id', 'order'),
722 verbose_name = 'URL'
723 verbose_name_plural = 'URLs'
725 def get_default_scope(self):
726 return self.content_object.scope
729 class Tag(models.Model):
730 """ Tags any kind of Model
732 See also :class:`TagsMixin`
735 FEED = 1
736 DELICIOUS = 2
737 USER = 4
739 SOURCE_CHOICES = (
740 (FEED, 'Feed'),
741 (DELICIOUS, 'delicious'),
742 (USER, 'User'),
745 tag = models.SlugField()
747 # indicates where the tag came from
748 source = models.PositiveSmallIntegerField(choices=SOURCE_CHOICES)
750 # the user that created the tag (if it was created by a user,
751 # null otherwise)
752 user = models.ForeignKey(settings.AUTH_USER_MODEL, null=True,
753 on_delete=models.CASCADE)
755 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
756 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
757 object_id = UUIDField()
758 content_object = generic.GenericForeignKey('content_type', 'object_id')
760 class Meta:
761 unique_together = (
762 # a tag can only be assigned once from one source to one item
763 ('tag', 'source', 'user', 'content_type', 'object_id'),
767 class Slug(OrderedModel, ScopedModel):
768 """ Slug for any kind of Model
770 Slugs are ordered, and the first slug is considered the canonical one.
771 See also :class:`SlugsMixin`
774 slug = models.SlugField(max_length=150, db_index=True)
776 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
777 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
778 object_id = UUIDField()
779 content_object = generic.GenericForeignKey('content_type', 'object_id')
781 class Meta(OrderedModel.Meta):
782 unique_together = (
783 # a slug is unique per type; eg a podcast can have the same slug
784 # as an episode, but no two podcasts can have the same slug
785 ('slug', 'scope'),
787 # slugs of an object must be ordered, so that no two slugs of one
788 # object have the same order key
789 ('content_type', 'object_id', 'order'),
792 index_together = [
793 ('slug', 'content_type')
796 def __repr__(self):
797 return '{cls}(slug={slug}, order={order}, content_object={obj}'.format(
798 cls=self.__class__.__name__,
799 slug=self.slug,
800 order=self.order,
801 obj=self.content_object
805 class MergedUUID(models.Model):
806 """ If objects are merged their UUIDs are stored for later reference
808 see also :class:`MergedUUIDsMixin`
811 uuid = UUIDField(unique=True)
813 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
814 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
815 object_id = UUIDField()
816 content_object = generic.GenericForeignKey('content_type', 'object_id')
818 class Meta:
819 verbose_name = 'Merged UUID'
820 verbose_name_plural = 'Merged UUIDs'