[Directory] migrate example podcasts to Django ORM
[mygpo.git] / mygpo / podcasts / models.py
blob062c69ee947b17cabbafa0e8ba8b4b9853ac2f70
1 from __future__ import unicode_literals
3 import re
4 from datetime import datetime
6 from django.conf import settings
7 from django.db import models, transaction, IntegrityError
8 from django.contrib.contenttypes.models import ContentType
9 from django.contrib.contenttypes.fields import GenericRelation
10 from django.contrib.contenttypes import generic
12 from uuidfield import UUIDField
14 from mygpo import utils
15 from mygpo.core.models import (TwitterModel, UUIDModel, GenericManager,
16 UpdateInfoModel, OrderedModel)
18 import logging
19 logger = logging.getLogger(__name__)
22 # default podcast update interval in hours
23 DEFAULT_UPDATE_INTERVAL = 7 * 24
25 # minium podcast update interval in hours
26 MIN_UPDATE_INTERVAL = 5
28 # every podcast should be updated at least once a month
29 MAX_UPDATE_INTERVAL = 24 * 30
32 class TitleModel(models.Model):
33 """ Model that has a title """
35 title = models.CharField(max_length=1000, null=False, blank=True,
36 db_index=True)
37 subtitle = models.TextField(null=False, blank=True)
39 def __str__(self):
40 return self.title.encode('ascii', errors='replace')
42 def __unicode(self):
43 return self.title
45 class Meta:
46 abstract = True
49 class DescriptionModel(models.Model):
50 """ Model that has a description """
52 description = models.TextField(null=False, blank=True)
54 class Meta:
55 abstract = True
58 class LinkModel(models.Model):
59 """ Model that has a link """
61 link = models.URLField(null=True, max_length=1000)
63 class Meta:
64 abstract = True
67 class LanguageModel(models.Model):
68 """ Model that has a language """
70 language = models.CharField(max_length=10, null=True, blank=False,
71 db_index=True)
73 class Meta:
74 abstract = True
77 class LastUpdateModel(models.Model):
78 """ Model with timestamp of last update from its source """
80 # date and time at which the model has last been updated from its source
81 # (eg a podcast feed). None means that the object has been created as a
82 # stub, without information from the source.
83 last_update = models.DateTimeField(null=True)
85 class Meta:
86 abstract = True
89 class LicenseModel(models.Model):
90 # URL to a license (usually Creative Commons)
91 license = models.CharField(max_length=100, null=True, blank=False,
92 db_index=True)
94 class Meta:
95 abstract = True
98 class FlattrModel(models.Model):
99 # A Flattr payment URL
100 flattr_url = models.URLField(null=True, blank=False, max_length=1000,
101 db_index=True)
103 class Meta:
104 abstract = True
107 class ContentTypesModel(models.Model):
108 # contains a comma-separated values of content types, eg 'audio,video'
109 content_types = models.CharField(max_length=20, null=False, blank=True)
111 class Meta:
112 abstract = True
115 class MergedIdsModel(models.Model):
117 class Meta:
118 abstract = True
121 class OutdatedModel(models.Model):
122 outdated = models.BooleanField(default=False, db_index=True)
124 class Meta:
125 abstract = True
128 class AuthorModel(models.Model):
129 author = models.CharField(max_length=350, null=True, blank=True)
131 class Meta:
132 abstract = True
135 class UrlsMixin(models.Model):
136 """ Methods for working with URL objects """
138 urls = GenericRelation('URL', related_query_name='urls')
140 class Meta:
141 abstract = True
143 @property
144 def url(self):
145 """ The main URL of the model """
146 # We could also use self.urls.first() here, but this would result in a
147 # different query and would render a .prefetch_related('urls') useless
148 # The assumption is that we will never have loads of URLS, so
149 # fetching all won't hurt
150 urls = list(self.urls.all())
151 return urls[0].url if urls else None
153 def add_missing_urls(self, new_urls):
154 """ Adds missing URLS from new_urls
156 The order of existing URLs is not changed """
157 existing_urls = self.urls.all()
158 next_order = max([-1] + [u.order for u in existing_urls]) + 1
159 existing_urls = [u.url for u in existing_urls]
161 for url in new_urls:
162 if url in existing_urls:
163 continue
165 try:
166 URL.objects.create(url=url,
167 order=next_order,
168 scope=self.scope,
169 content_object=self,
171 next_order += 1
172 except IntegrityError as ie:
173 logger.warn('Could not add URL: {err}'.format(err=ie))
174 continue
177 class SlugsMixin(models.Model):
178 """ Methods for working with Slug objects """
180 slugs = GenericRelation('Slug', related_query_name='slugs')
182 class Meta:
183 abstract = True
185 @property
186 def slug(self):
187 """ The main slug of the podcast
189 TODO: should be retrieved from a (materialized) view """
191 # We could also use self.slugs.first() here, but this would result in a
192 # different query and would render a .prefetch_related('slugs') useless
193 # The assumption is that we will never have loads of slugs, so
194 # fetching all won't hurt
195 slugs = list(self.slugs.all())
196 slug = slugs[0].slug if slugs else None
197 logger.debug('Found slugs %r, picking %r', slugs, slug)
198 return slug
201 def add_slug(self, slug):
202 """ Adds a (non-cannonical) slug """
204 if not slug:
205 raise ValueError("'%s' is not a valid slug" % slug)
207 existing_slugs = self.slugs.all()
209 # cut slug to the maximum allowed length
210 slug = utils.to_maxlength(Slug, 'slug', slug)
212 # check if slug already exists
213 if slug in [s.slug for s in existing_slugs]:
214 return
216 max_order = max([-1] + [s.order for s in existing_slugs])
217 next_order = max_order + 1
218 Slug.objects.create(scope=self.scope,
219 slug=slug,
220 content_object=self,
221 order=next_order,
224 def set_slug(self, slug):
225 """ Sets the canonical slug """
227 slugs = [s.slug for s in self.slugs.all()]
228 if slug in slugs:
229 slugs.remove(slug)
231 slugs.insert(0, slug)
232 self.set_slugs(slugs)
235 def remove_slug(self, slug):
236 """ Removes a slug """
237 Slug.objects.filter(
238 slug=slug,
239 content_type=ContentType.objects.get_for_model(self),
240 object_id=self.id,
241 ).delete()
244 def set_slugs(self, slugs):
245 """ Update the object's slugs to the given list
247 'slugs' should be a list of strings. Slugs that do not exist are
248 created. Existing slugs that are not in the 'slugs' list are
249 deleted. """
250 existing = {s.slug: s for s in self.slugs.all()}
251 logger.info('%d existing slugs', len(existing))
253 logger.info('%d new slugs', len(slugs))
255 with transaction.atomic():
256 max_order = max([s.order for s in existing.values()] + [len(slugs)])
257 logger.info('Renumbering slugs starting from %d', max_order+1)
258 for n, slug in enumerate(existing.values(), max_order+1):
259 slug.order = n
260 slug.save()
262 logger.info('%d existing slugs', len(existing))
264 slugs = [utils.to_maxlength(Slug, 'slug', slug) for slug in slugs]
265 for n, slug in enumerate(slugs):
266 try:
267 s = existing.pop(slug)
268 logger.info('Updating new slug %d: %s', n, slug)
269 s.order = n
270 s.save()
271 except KeyError:
272 logger.info('Creating new slug %d: %s', n, slug)
273 try:
274 Slug.objects.create(slug=slug,
275 content_object=self,
276 order=n,
277 scope=self.scope,
279 except IntegrityError as ie:
280 logger.warn('Could not create Slug for %s: %s', self, ie)
282 with transaction.atomic():
283 delete = [s.pk for s in existing.values()]
284 logger.info('Deleting %d slugs', len(delete))
285 Slug.objects.filter(id__in=delete).delete()
289 class MergedUUIDsMixin(models.Model):
290 """ Methods for working with MergedUUID objects """
292 merged_uuids = GenericRelation('MergedUUID',
293 related_query_name='merged_uuids')
295 class Meta:
296 abstract = True
299 class MergedUUIDQuerySet(models.QuerySet):
300 """ QuerySet for Models inheriting from MergedUUID """
302 def get_by_any_id(self, id):
303 """ Find am Episode by its own ID or by a merged ID """
304 # TODO: should this be done in the model?
305 try:
306 return self.get(id=id)
307 except self.model.DoesNotExist:
308 return self.get(merged_uuids__uuid=id)
311 class TagsMixin(models.Model):
312 """ Methods for working with Tag objects """
314 tags = GenericRelation('Tag', related_query_name='tags')
316 class Meta:
317 abstract = True
320 class PodcastGroup(UUIDModel, TitleModel, SlugsMixin):
321 """ Groups multiple podcasts together """
323 @property
324 def scope(self):
325 """ A podcast group is always in the global scope """
326 return ''
328 def subscriber_count(self):
329 # this could be done directly in the DB
330 return sum([p.subscriber_count() for p in self.podcast_set.all()] + [0])
332 @property
333 def logo_url(self):
334 podcast = self.podcast_set.first()
335 podcast.logo_url
338 class PodcastQuerySet(MergedUUIDQuerySet):
339 """ Custom queries for Podcasts """
341 def random(self):
342 """ Random podcasts
344 Excludes podcasts with missing title to guarantee some
345 minimum quality of the results """
347 # Using PostgreSQL's RANDOM() is very expensive, so we're generating a
348 # random uuid and query podcasts with a higher ID
349 # This returns podcasts in order of their ID, but the assumption is
350 # that usually only one podcast will be required anyway
351 import uuid
352 ruuid = uuid.uuid1()
353 return self.exclude(title='').filter(id__gt=ruuid)
355 def flattr(self):
356 """ Podcasts providing Flattr information """
357 return self.exclude(flattr_url__isnull=True)
359 def license(self, license_url=None):
360 """ Podcasts with any / the given license """
361 if license_url:
362 return self.filter(license=license_url)
363 else:
364 return self.exclude(license__isnull=True)
366 def order_by_next_update(self):
367 """ Sort podcasts by next scheduled update """
368 NEXTUPDATE = "last_update + (update_interval || ' hours')::INTERVAL"
369 q = self.extra(select={'next_update': NEXTUPDATE})
370 return q.order_by('next_update')
372 def toplist(self, language=None):
373 toplist = self
374 if language:
375 toplist = toplist.filter(language=language)
377 return toplist.order_by('-subscribers')
380 class PodcastManager(GenericManager):
381 """ Manager for the Podcast model """
383 def get_queryset(self):
384 return PodcastQuerySet(self.model, using=self._db)
386 @transaction.atomic
387 def get_or_create_for_url(self, url, defaults={}):
388 # TODO: where to specify how uuid is created?
389 import uuid
390 defaults.update({
391 'id': uuid.uuid1().hex,
394 url = utils.to_maxlength(URL, 'url', url)
395 podcast, created = self.get_or_create(urls__url=url, defaults=defaults)
397 if created:
398 url = URL.objects.create(url=url,
399 order=0,
400 scope='',
401 content_object=podcast,
403 return podcast
406 class Podcast(UUIDModel, TitleModel, DescriptionModel, LinkModel,
407 LanguageModel, LastUpdateModel, UpdateInfoModel, LicenseModel,
408 FlattrModel, ContentTypesModel, MergedIdsModel, OutdatedModel,
409 AuthorModel, UrlsMixin, SlugsMixin, TagsMixin, MergedUUIDsMixin,
410 TwitterModel, ):
411 """ A Podcast """
413 logo_url = models.URLField(null=True, max_length=1000)
414 group = models.ForeignKey(PodcastGroup, null=True,
415 on_delete=models.PROTECT)
416 group_member_name = models.CharField(max_length=30, null=True, blank=False)
418 # if p1 is related to p2, p2 is also related to p1
419 related_podcasts = models.ManyToManyField('self', symmetrical=True)
421 subscribers = models.PositiveIntegerField(default=0)
422 restrictions = models.CharField(max_length=20, null=False, blank=True,
423 default='')
424 common_episode_title = models.CharField(max_length=100, null=False, blank=True)
425 new_location = models.URLField(max_length=1000, null=True, blank=False)
426 latest_episode_timestamp = models.DateTimeField(null=True)
427 episode_count = models.PositiveIntegerField(default=0)
428 hub = models.URLField(null=True)
429 update_interval = models.PositiveSmallIntegerField(null=False,
430 default=DEFAULT_UPDATE_INTERVAL)
432 objects = PodcastManager()
434 def subscriber_count(self):
435 # TODO: implement
436 return self.subscribers
438 def group_with(self, other, grouptitle, myname, othername):
439 """ Group the podcast with another one """
440 # TODO: move to PodcastGroup?
442 if bool(self.group) and (self.group == other.group):
443 # they are already grouped
444 return
446 group1 = self.group
447 group2 = other.group
449 if group1 and group2:
450 raise ValueError('both podcasts already are in different groups')
452 elif not (group1 or group2):
453 # Form a new group
454 import uuid
455 group = PodcastGroup.objects.create(id=uuid.uuid1(), title=grouptitle)
456 self.group_member_name = myname
457 self.group = group
458 self.save()
460 other.group_member_name = othername
461 other.group = group
462 other.save()
464 return group
466 elif group1:
467 # add other to self's group
468 other.group_member_name = othername
469 other.group = group1
470 other.save()
471 return group1
473 else:
474 # add self to other's group
475 self.group_member_name = myname
476 self.group = group2
477 self.save()
478 return group2
480 def get_common_episode_title(self, num_episodes=100):
482 if self.common_episode_title:
483 return self.common_episode_title
485 episodes = self.episode_set.all()[:num_episodes]
487 # We take all non-empty titles
488 titles = filter(None, (e.title for e in episodes))
490 # there can not be a "common" title of a single title
491 if len(titles) < 2:
492 return None
494 # get the longest common substring
495 common_title = utils.longest_substr(titles)
497 # but consider only the part up to the first number. Otherwise we risk
498 # removing part of the number (eg if a feed contains episodes 100-199)
499 common_title = re.search(r'^\D*', common_title).group(0)
501 if len(common_title.strip()) < 2:
502 return None
504 return common_title
507 def get_episode_before(self, episode):
508 if not episode.released:
509 return None
510 return self.episode_set.filter(released__lt=episode.released).latest()
512 def get_episode_after(self, episode):
513 if not episode.released:
514 return None
515 return self.episode_set.filter(released__gt=episode.released).first()
517 @property
518 def scope(self):
519 """ A podcast is always in the global scope """
520 return ''
522 @property
523 def as_scope(self):
524 """ If models use this object as scope, they'll use this value """
525 return self.id.hex
527 @property
528 def display_title(self):
529 # TODO
530 return self.title
533 class EpisodeQuerySet(MergedUUIDQuerySet):
534 """ QuerySet for Episodes """
536 def toplist(self, language=None):
537 toplist = self
538 if language:
539 toplist = toplist.filter(language=language)
541 return toplist.order_by('-listeners')
543 def by_released(self):
544 """ Sorts by release date, sorting missing release date last
546 When sorting by release date, we want to list those with the most
547 revent release date first. At the end the episodes without release date
548 should be sorted. """
549 return self.extra(select={
550 'has_released': 'released IS NOT NULL',
551 }).\
552 order_by('-has_released', '-released')
555 class EpisodeManager(GenericManager):
556 """ Custom queries for Episodes """
558 def get_queryset(self):
559 return EpisodeQuerySet(self.model, using=self._db)
561 @transaction.atomic
562 def get_or_create_for_url(self, podcast, url, defaults={}):
563 # TODO: where to specify how uuid is created?
564 import uuid
566 try:
567 url = URL.objects.get(url=url, scope=podcast.as_scope)
569 except URL.DoesNotExist:
570 episode = Episode.objects.create(podcast=podcast,
571 id=uuid.uuid1().hex,
572 **defaults
574 url = URL.objects.create(url=url,
575 order=0,
576 scope=episode.scope,
577 content_object=episode,
579 return episode
581 else:
582 return url.content_object
585 class Episode(UUIDModel, TitleModel, DescriptionModel, LinkModel,
586 LanguageModel, LastUpdateModel, UpdateInfoModel, LicenseModel,
587 FlattrModel, ContentTypesModel, MergedIdsModel, OutdatedModel,
588 AuthorModel, UrlsMixin, SlugsMixin, MergedUUIDsMixin):
589 """ An episode """
591 guid = models.CharField(max_length=200, null=True)
592 content = models.TextField()
593 released = models.DateTimeField(null=True, db_index=True)
594 duration = models.PositiveIntegerField(null=True)
595 filesize = models.BigIntegerField(null=True)
596 mimetypes = models.CharField(max_length=200)
597 podcast = models.ForeignKey(Podcast, on_delete=models.PROTECT)
598 listeners = models.PositiveIntegerField(null=True, db_index=True)
600 objects = EpisodeManager()
602 class Meta:
603 ordering = ['-released']
605 index_together = [
606 ('podcast', 'outdated', 'released'),
607 ('podcast', 'released'),
608 ('released', 'podcast'),
611 @property
612 def scope(self):
613 """ An episode's scope is its podcast """
614 return self.podcast_id.hex
616 def get_short_title(self, common_title):
617 """ Title when used within the podcast's context """
618 if not self.title or not common_title:
619 return None
621 title = self.title.replace(common_title, '').strip()
622 title = re.sub(r'^[\W\d]+', '', title)
623 return title
626 def get_episode_number(self, common_title):
627 """ Number of the episode """
628 if not self.title or not common_title:
629 return None
631 title = self.title.replace(common_title, '').strip()
632 match = re.search(r'^\W*(\d+)', title)
633 if not match:
634 return None
636 return int(match.group(1))
639 class ScopedModel(models.Model):
640 """ A model that belongs to some scope, usually for limited uniqueness
642 scope does not allow null values, because null is not equal to null in SQL.
643 It could therefore not be used in unique constraints. """
645 # A slug / URL is unique within a scope; no two podcasts can have the same
646 # URL (scope ''), and no two episdoes of the same podcast (scope =
647 # podcast-ID) can have the same URL
648 scope = models.CharField(max_length=32, null=False, blank=True,
649 db_index=True)
651 class Meta:
652 abstract = True
654 def get_default_scope(self):
655 """ Returns the default scope of the object """
656 raise NotImplementedError('{cls} should implement get_default_scope'
657 .format(cls=self.__class__.__name__))
660 class URL(OrderedModel, ScopedModel):
661 """ Podcasts and Episodes can have multiple URLs
663 URLs are ordered, and the first slug is considered the canonical one """
665 url = models.URLField(max_length=2048)
667 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
668 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
669 object_id = UUIDField()
670 content_object = generic.GenericForeignKey('content_type', 'object_id')
672 class Meta(OrderedModel.Meta):
673 unique_together = (
674 # a URL is unique per scope
675 ('url', 'scope'),
677 # URLs of an object must be ordered, so that no two slugs of one
678 # object have the same order key
679 ('content_type', 'object_id', 'order'),
682 verbose_name = 'URL'
683 verbose_name_plural = 'URLs'
685 def get_default_scope(self):
686 return self.content_object.scope
689 class Tag(models.Model):
690 """ Tags any kind of Model
692 See also :class:`TagsMixin`
695 FEED = 1
696 DELICIOUS = 2
697 USER = 4
699 SOURCE_CHOICES = (
700 (FEED, 'Feed'),
701 (DELICIOUS, 'delicious'),
702 (USER, 'User'),
705 tag = models.SlugField()
707 # indicates where the tag came from
708 source = models.PositiveSmallIntegerField(choices=SOURCE_CHOICES)
710 # the user that created the tag (if it was created by a user,
711 # null otherwise)
712 user = models.ForeignKey(settings.AUTH_USER_MODEL, null=True,
713 on_delete=models.CASCADE)
715 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
716 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
717 object_id = UUIDField()
718 content_object = generic.GenericForeignKey('content_type', 'object_id')
720 class Meta:
721 unique_together = (
722 # a tag can only be assigned once from one source to one item
723 ('tag', 'source', 'user', 'content_type', 'object_id'),
727 class Slug(OrderedModel, ScopedModel):
728 """ Slug for any kind of Model
730 Slugs are ordered, and the first slug is considered the canonical one.
731 See also :class:`SlugsMixin`
734 slug = models.SlugField(max_length=150, db_index=True)
736 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
737 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
738 object_id = UUIDField()
739 content_object = generic.GenericForeignKey('content_type', 'object_id')
741 class Meta(OrderedModel.Meta):
742 unique_together = (
743 # a slug is unique per type; eg a podcast can have the same slug
744 # as an episode, but no two podcasts can have the same slug
745 ('slug', 'scope'),
747 # slugs of an object must be ordered, so that no two slugs of one
748 # object have the same order key
749 ('content_type', 'object_id', 'order'),
752 index_together = [
753 ('slug', 'content_type')
756 def __repr__(self):
757 return '{cls}(slug={slug}, order={order}, content_object={obj}'.format(
758 cls=self.__class__.__name__,
759 slug=self.slug,
760 order=self.order,
761 obj=self.content_object
765 class MergedUUID(models.Model):
766 """ If objects are merged their UUIDs are stored for later reference
768 see also :class:`MergedUUIDsMixin`
771 uuid = UUIDField(unique=True)
773 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
774 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
775 object_id = UUIDField()
776 content_object = generic.GenericForeignKey('content_type', 'object_id')
778 class Meta:
779 verbose_name = 'Merged UUID'
780 verbose_name_plural = 'Merged UUIDs'