[Podcasts] trim new slugs to maximum length
[mygpo.git] / mygpo / podcasts / models.py
blob49158b6aed78108fb241823b82d49d32a4c18540
1 from __future__ import unicode_literals
3 import re
4 from datetime import datetime
6 from django.conf import settings
7 from django.db import models, transaction, IntegrityError
8 from django.contrib.contenttypes.models import ContentType
9 from django.contrib.contenttypes.fields import GenericRelation
10 from django.contrib.contenttypes import generic
12 from uuidfield import UUIDField
14 from mygpo import utils
15 from mygpo.core.models import (TwitterModel, UUIDModel, GenericManager,
16 UpdateInfoModel)
18 import logging
19 logger = logging.getLogger(__name__)
22 # default podcast update interval in hours
23 DEFAULT_UPDATE_INTERVAL = 7 * 24
25 # minium podcast update interval in hours
26 MIN_UPDATE_INTERVAL = 5
28 # every podcast should be updated at least once a month
29 MAX_UPDATE_INTERVAL = 24 * 30
32 class TitleModel(models.Model):
33 """ Model that has a title """
35 title = models.CharField(max_length=1000, null=False, blank=True,
36 db_index=True)
37 subtitle = models.TextField(null=False, blank=True)
39 def __str__(self):
40 return self.title.encode('ascii', errors='replace')
42 def __unicode(self):
43 return self.title
45 class Meta:
46 abstract = True
49 class DescriptionModel(models.Model):
50 """ Model that has a description """
52 description = models.TextField(null=False, blank=True)
54 class Meta:
55 abstract = True
58 class LinkModel(models.Model):
59 """ Model that has a link """
61 link = models.URLField(null=True, max_length=1000)
63 class Meta:
64 abstract = True
67 class LanguageModel(models.Model):
68 """ Model that has a language """
70 language = models.CharField(max_length=10, null=True, blank=False,
71 db_index=True)
73 class Meta:
74 abstract = True
77 class LastUpdateModel(models.Model):
78 """ Model with timestamp of last update from its source """
80 # date and time at which the model has last been updated from its source
81 # (eg a podcast feed). None means that the object has been created as a
82 # stub, without information from the source.
83 last_update = models.DateTimeField(null=True)
85 class Meta:
86 abstract = True
89 class LicenseModel(models.Model):
90 # URL to a license (usually Creative Commons)
91 license = models.CharField(max_length=100, null=True, blank=False,
92 db_index=True)
94 class Meta:
95 abstract = True
98 class FlattrModel(models.Model):
99 # A Flattr payment URL
100 flattr_url = models.URLField(null=True, blank=False, max_length=1000,
101 db_index=True)
103 class Meta:
104 abstract = True
107 class ContentTypesModel(models.Model):
108 # contains a comma-separated values of content types, eg 'audio,video'
109 content_types = models.CharField(max_length=20, null=False, blank=True)
111 class Meta:
112 abstract = True
115 class MergedIdsModel(models.Model):
117 class Meta:
118 abstract = True
121 class OutdatedModel(models.Model):
122 outdated = models.BooleanField(default=False, db_index=True)
124 class Meta:
125 abstract = True
128 class AuthorModel(models.Model):
129 author = models.CharField(max_length=350, null=True, blank=True)
131 class Meta:
132 abstract = True
135 class UrlsMixin(models.Model):
136 """ Methods for working with URL objects """
138 urls = GenericRelation('URL', related_query_name='urls')
140 class Meta:
141 abstract = True
143 @property
144 def url(self):
145 """ The main URL of the model """
146 # We could also use self.urls.first() here, but this would result in a
147 # different query and would render a .prefetch_related('urls') useless
148 # The assumption is that we will never have loads of URLS, so
149 # fetching all won't hurt
150 urls = list(self.urls.all())
151 return urls[0].url if urls else None
153 def add_missing_urls(self, new_urls):
154 """ Adds missing URLS from new_urls
156 The order of existing URLs is not changed """
157 existing_urls = self.urls.all()
158 next_order = max([-1] + [u.order for u in existing_urls]) + 1
159 existing_urls = [u.url for u in existing_urls]
161 for url in new_urls:
162 if url in existing_urls:
163 continue
165 URL.objects.create(url=url,
166 order=next_order,
167 scope=self.scope,
168 content_object=self,
171 next_order += 1
174 class SlugsMixin(models.Model):
175 """ Methods for working with Slug objects """
177 slugs = GenericRelation('Slug', related_query_name='slugs')
179 class Meta:
180 abstract = True
182 @property
183 def slug(self):
184 """ The main slug of the podcast
186 TODO: should be retrieved from a (materialized) view """
188 # We could also use self.slugs.first() here, but this would result in a
189 # different query and would render a .prefetch_related('slugs') useless
190 # The assumption is that we will never have loads of slugs, so
191 # fetching all won't hurt
192 slugs = list(self.slugs.all())
193 slug = slugs[0].slug if slugs else None
194 logger.debug('Found slugs %r, picking %r', slugs, slug)
195 return slug
198 def add_slug(self, slug):
199 """ Adds a (non-cannonical) slug """
201 if not slug:
202 raise ValueError("'%s' is not a valid slug" % slug)
204 existing_slugs = self.slugs.all()
206 # cut slug to the maximum allowed length
207 slug = utils.to_maxlength(Slug, 'slug', slug)
209 # check if slug already exists
210 if slug in [s.slug for s in existing_slugs]:
211 return
213 max_order = max([-1] + [s.order for s in existing_slugs])
214 next_order = max_order + 1
215 Slug.objects.create(scope=self.scope,
216 slug=slug,
217 content_object=self,
218 order=next_order,
221 def set_slug(self, slug):
222 """ Sets the canonical slug """
224 slugs = [s.slug for s in self.slugs.all()]
225 if slug in slugs:
226 slugs.remove(slug)
228 slugs.insert(0, slug)
229 self.set_slugs(slugs)
232 def remove_slug(self, slug):
233 """ Removes a slug """
234 Slug.objects.filter(
235 slug=slug,
236 content_type=ContentType.objects.get_for_model(self),
237 object_id=self.id,
238 ).delete()
241 def set_slugs(self, slugs):
242 """ Update the object's slugs to the given list
244 'slugs' should be a list of strings. Slugs that do not exist are
245 created. Existing slugs that are not in the 'slugs' list are
246 deleted. """
247 existing = {s.slug: s for s in self.slugs.all()}
248 logger.info('%d existing slugs', len(existing))
250 logger.info('%d new slugs', len(slugs))
252 with transaction.atomic():
253 max_order = max([s.order for s in existing.values()] + [len(slugs)])
254 logger.info('Renumbering slugs starting from %d', max_order+1)
255 for n, slug in enumerate(existing.values(), max_order+1):
256 slug.order = n
257 slug.save()
259 logger.info('%d existing slugs', len(existing))
261 for n, slug in enumerate(slugs):
262 try:
263 s = existing.pop(slug)
264 logger.info('Updating new slug %d: %s', n, slug)
265 s.order = n
266 s.save()
267 except KeyError:
268 logger.info('Creating new slug %d: %s', n, slug)
269 try:
270 Slug.objects.create(slug=slug,
271 content_object=self,
272 order=n,
273 scope=self.scope,
275 except IntegrityError as ie:
276 logger.warn('Could not create Slug for %s: %s', self, ie)
278 with transaction.atomic():
279 delete = [s.pk for s in existing.values()]
280 logger.info('Deleting %d slugs', len(delete))
281 Slug.objects.filter(id__in=delete).delete()
285 class MergedUUIDsMixin(models.Model):
286 """ Methods for working with MergedUUID objects """
288 merged_uuids = GenericRelation('MergedUUID',
289 related_query_name='merged_uuids')
291 class Meta:
292 abstract = True
295 class MergedUUIDQuerySet(models.QuerySet):
296 """ QuerySet for Models inheriting from MergedUUID """
298 def get_by_any_id(self, id):
299 """ Find am Episode by its own ID or by a merged ID """
300 # TODO: should this be done in the model?
301 try:
302 return self.get(id=id)
303 except self.model.DoesNotExist:
304 return self.get(merged_uuids__uuid=id)
307 class TagsMixin(models.Model):
308 """ Methods for working with Tag objects """
310 tags = GenericRelation('Tag', related_query_name='tags')
312 class Meta:
313 abstract = True
316 class OrderedModel(models.Model):
317 """ A model that can be ordered
319 The implementing Model must make sure that 'order' is sufficiently unique
322 order = models.PositiveSmallIntegerField()
324 class Meta:
325 abstract = True
326 ordering = ['order']
329 class PodcastGroup(UUIDModel, TitleModel, SlugsMixin):
330 """ Groups multiple podcasts together """
332 @property
333 def scope(self):
334 """ A podcast group is always in the global scope """
335 return ''
337 def subscriber_count(self):
338 # this could be done directly in the DB
339 return sum([p.subscriber_count() for p in self.podcast_set.all()] + [0])
341 class PodcastQuerySet(MergedUUIDQuerySet):
342 """ Custom queries for Podcasts """
344 def random(self):
345 """ Random podcasts
347 Excludes podcasts with missing title to guarantee some
348 minimum quality of the results """
350 # Using PostgreSQL's RANDOM() is very expensive, so we're generating a
351 # random uuid and query podcasts with a higher ID
352 # This returns podcasts in order of their ID, but the assumption is
353 # that usually only one podcast will be required anyway
354 import uuid
355 ruuid = uuid.uuid1()
356 return self.exclude(title='').filter(id__gt=ruuid)
358 def flattr(self):
359 """ Podcasts providing Flattr information """
360 return self.exclude(flattr_url__isnull=True)
362 def license(self, license_url=None):
363 """ Podcasts with any / the given license """
364 if license_url:
365 return self.filter(license=license_url)
366 else:
367 return self.exclude(license__isnull=True)
369 def order_by_next_update(self):
370 """ Sort podcasts by next scheduled update """
371 NEXTUPDATE = "last_update + (update_interval || ' hours')::INTERVAL"
372 q = self.extra(select={'next_update': NEXTUPDATE})
373 return q.order_by('next_update')
375 def toplist(self, language=None):
376 toplist = self
377 if language:
378 toplist = toplist.filter(language=language)
380 return toplist.order_by('-subscribers')
383 class PodcastManager(GenericManager):
384 """ Manager for the Podcast model """
386 def get_queryset(self):
387 return PodcastQuerySet(self.model, using=self._db)
389 @transaction.atomic
390 def get_or_create_for_url(self, url, defaults={}):
391 # TODO: where to specify how uuid is created?
392 import uuid
393 defaults.update({
394 'id': uuid.uuid1().hex,
397 url = utils.to_maxlength(URL, 'url', url)
398 podcast, created = self.get_or_create(urls__url=url, defaults=defaults)
400 if created:
401 url = URL.objects.create(url=url,
402 order=0,
403 scope='',
404 content_object=podcast,
406 return podcast
409 class Podcast(UUIDModel, TitleModel, DescriptionModel, LinkModel,
410 LanguageModel, LastUpdateModel, UpdateInfoModel, LicenseModel,
411 FlattrModel, ContentTypesModel, MergedIdsModel, OutdatedModel,
412 AuthorModel, UrlsMixin, SlugsMixin, TagsMixin, MergedUUIDsMixin,
413 TwitterModel, ):
414 """ A Podcast """
416 logo_url = models.URLField(null=True, max_length=1000)
417 group = models.ForeignKey(PodcastGroup, null=True,
418 on_delete=models.PROTECT)
419 group_member_name = models.CharField(max_length=30, null=True, blank=False)
421 # if p1 is related to p2, p2 is also related to p1
422 related_podcasts = models.ManyToManyField('self', symmetrical=True)
424 subscribers = models.PositiveIntegerField(default=0)
425 restrictions = models.CharField(max_length=20, null=False, blank=True,
426 default='')
427 common_episode_title = models.CharField(max_length=100, null=False, blank=True)
428 new_location = models.URLField(max_length=1000, null=True, blank=False)
429 latest_episode_timestamp = models.DateTimeField(null=True)
430 episode_count = models.PositiveIntegerField(default=0)
431 hub = models.URLField(null=True)
432 update_interval = models.PositiveSmallIntegerField(null=False,
433 default=DEFAULT_UPDATE_INTERVAL)
435 objects = PodcastManager()
437 def subscriber_count(self):
438 # TODO: implement
439 return self.subscribers
441 def group_with(self, other, grouptitle, myname, othername):
442 """ Group the podcast with another one """
443 # TODO: move to PodcastGroup?
445 if bool(self.group) and (self.group == other.group):
446 # they are already grouped
447 return
449 group1 = self.group
450 group2 = other.group
452 if group1 and group2:
453 raise ValueError('both podcasts already are in different groups')
455 elif not (group1 or group2):
456 # Form a new group
457 import uuid
458 group = PodcastGroup.objects.create(id=uuid.uuid1(), title=grouptitle)
459 self.group_member_name = myname
460 self.group = group
461 self.save()
463 other.group_member_name = othername
464 other.group = group
465 other.save()
467 return group
469 elif group1:
470 # add other to self's group
471 other.group_member_name = othername
472 other.group = group1
473 other.save()
474 return group1
476 else:
477 # add self to other's group
478 self.group_member_name = myname
479 self.group = group2
480 self.save()
481 return group2
483 def get_common_episode_title(self, num_episodes=100):
485 if self.common_episode_title:
486 return self.common_episode_title
488 episodes = self.episode_set.all()[:num_episodes]
490 # We take all non-empty titles
491 titles = filter(None, (e.title for e in episodes))
493 # there can not be a "common" title of a single title
494 if len(titles) < 2:
495 return None
497 # get the longest common substring
498 common_title = utils.longest_substr(titles)
500 # but consider only the part up to the first number. Otherwise we risk
501 # removing part of the number (eg if a feed contains episodes 100-199)
502 common_title = re.search(r'^\D*', common_title).group(0)
504 if len(common_title.strip()) < 2:
505 return None
507 return common_title
510 def get_episode_before(self, episode):
511 if not episode.released:
512 return None
513 return self.episode_set.filter(released__lt=episode.released).latest()
515 def get_episode_after(self, episode):
516 if not episode.released:
517 return None
518 return self.episode_set.filter(released__gt=episode.released).first()
520 @property
521 def scope(self):
522 """ A podcast is always in the global scope """
523 return ''
525 @property
526 def as_scope(self):
527 """ If models use this object as scope, they'll use this value """
528 return self.id.hex
530 @property
531 def display_title(self):
532 # TODO
533 return self.title
536 class EpisodeQuerySet(MergedUUIDQuerySet):
537 """ QuerySet for Episodes """
539 def toplist(self, language=None):
540 toplist = self
541 if language:
542 toplist = toplist.filter(language=language)
544 return toplist.order_by('-listeners')
546 def by_released(self):
547 """ Sorts by release date, sorting missing release date last
549 When sorting by release date, we want to list those with the most
550 revent release date first. At the end the episodes without release date
551 should be sorted. """
552 return self.extra(select={
553 'has_released': 'released IS NOT NULL',
554 }).\
555 order_by('-has_released', '-released')
558 class EpisodeManager(GenericManager):
559 """ Custom queries for Episodes """
561 def get_queryset(self):
562 return EpisodeQuerySet(self.model, using=self._db)
564 @transaction.atomic
565 def get_or_create_for_url(self, podcast, url, defaults={}):
566 # TODO: where to specify how uuid is created?
567 import uuid
569 try:
570 url = URL.objects.get(url=url, scope=podcast.as_scope)
572 except URL.DoesNotExist:
573 episode = Episode.objects.create(podcast=podcast,
574 id=uuid.uuid1().hex,
575 **defaults
577 url = URL.objects.create(url=url,
578 order=0,
579 scope=episode.scope,
580 content_object=episode,
582 return episode
584 else:
585 return url.content_object
588 class Episode(UUIDModel, TitleModel, DescriptionModel, LinkModel,
589 LanguageModel, LastUpdateModel, UpdateInfoModel, LicenseModel,
590 FlattrModel, ContentTypesModel, MergedIdsModel, OutdatedModel,
591 AuthorModel, UrlsMixin, SlugsMixin, MergedUUIDsMixin):
592 """ An episode """
594 guid = models.CharField(max_length=200, null=True)
595 content = models.TextField()
596 released = models.DateTimeField(null=True, db_index=True)
597 duration = models.PositiveIntegerField(null=True)
598 filesize = models.BigIntegerField(null=True)
599 mimetypes = models.CharField(max_length=200)
600 podcast = models.ForeignKey(Podcast, on_delete=models.PROTECT)
601 listeners = models.PositiveIntegerField(null=True, db_index=True)
603 objects = EpisodeManager()
605 class Meta:
606 ordering = ['-released']
608 index_together = [
609 ('podcast', 'outdated', 'released'),
610 ('podcast', 'released'),
611 ('released', 'podcast'),
614 @property
615 def scope(self):
616 """ An episode's scope is its podcast """
617 return self.podcast_id.hex
619 def get_short_title(self, common_title):
620 """ Title when used within the podcast's context """
621 if not self.title or not common_title:
622 return None
624 title = self.title.replace(common_title, '').strip()
625 title = re.sub(r'^[\W\d]+', '', title)
626 return title
629 def get_episode_number(self, common_title):
630 """ Number of the episode """
631 if not self.title or not common_title:
632 return None
634 title = self.title.replace(common_title, '').strip()
635 match = re.search(r'^\W*(\d+)', title)
636 if not match:
637 return None
639 return int(match.group(1))
642 class ScopedModel(models.Model):
643 """ A model that belongs to some scope, usually for limited uniqueness
645 scope does not allow null values, because null is not equal to null in SQL.
646 It could therefore not be used in unique constraints. """
648 # A slug / URL is unique within a scope; no two podcasts can have the same
649 # URL (scope ''), and no two episdoes of the same podcast (scope =
650 # podcast-ID) can have the same URL
651 scope = models.CharField(max_length=32, null=False, blank=True,
652 db_index=True)
654 class Meta:
655 abstract = True
657 def get_default_scope(self):
658 """ Returns the default scope of the object """
659 raise NotImplementedError('{cls} should implement get_default_scope'
660 .format(cls=self.__class__.__name__))
663 class URL(OrderedModel, ScopedModel):
664 """ Podcasts and Episodes can have multiple URLs
666 URLs are ordered, and the first slug is considered the canonical one """
668 url = models.URLField(max_length=2048)
670 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
671 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
672 object_id = UUIDField()
673 content_object = generic.GenericForeignKey('content_type', 'object_id')
675 class Meta(OrderedModel.Meta):
676 unique_together = (
677 # a URL is unique per scope
678 ('url', 'scope'),
680 # URLs of an object must be ordered, so that no two slugs of one
681 # object have the same order key
682 ('content_type', 'object_id', 'order'),
685 verbose_name = 'URL'
686 verbose_name_plural = 'URLs'
688 def get_default_scope(self):
689 return self.content_object.scope
692 class Tag(models.Model):
693 """ Tags any kind of Model
695 See also :class:`TagsMixin`
698 FEED = 1
699 DELICIOUS = 2
700 USER = 4
702 SOURCE_CHOICES = (
703 (FEED, 'Feed'),
704 (DELICIOUS, 'delicious'),
705 (USER, 'User'),
708 tag = models.SlugField()
710 # indicates where the tag came from
711 source = models.PositiveSmallIntegerField(choices=SOURCE_CHOICES)
713 # the user that created the tag (if it was created by a user,
714 # null otherwise)
715 user = models.ForeignKey(settings.AUTH_USER_MODEL, null=True,
716 on_delete=models.CASCADE)
718 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
719 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
720 object_id = UUIDField()
721 content_object = generic.GenericForeignKey('content_type', 'object_id')
723 class Meta:
724 unique_together = (
725 # a tag can only be assigned once from one source to one item
726 ('tag', 'source', 'user', 'content_type', 'object_id'),
730 class Slug(OrderedModel, ScopedModel):
731 """ Slug for any kind of Model
733 Slugs are ordered, and the first slug is considered the canonical one.
734 See also :class:`SlugsMixin`
737 slug = models.SlugField(max_length=150, db_index=True)
739 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
740 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
741 object_id = UUIDField()
742 content_object = generic.GenericForeignKey('content_type', 'object_id')
744 class Meta(OrderedModel.Meta):
745 unique_together = (
746 # a slug is unique per type; eg a podcast can have the same slug
747 # as an episode, but no two podcasts can have the same slug
748 ('slug', 'scope'),
750 # slugs of an object must be ordered, so that no two slugs of one
751 # object have the same order key
752 ('content_type', 'object_id', 'order'),
755 index_together = [
756 ('slug', 'content_type')
759 def __repr__(self):
760 return '{cls}(slug={slug}, order={order}, content_object={obj}'.format(
761 cls=self.__class__.__name__,
762 slug=self.slug,
763 order=self.order,
764 obj=self.content_object
768 class MergedUUID(models.Model):
769 """ If objects are merged their UUIDs are stored for later reference
771 see also :class:`MergedUUIDsMixin`
774 uuid = UUIDField(unique=True)
776 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
777 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
778 object_id = UUIDField()
779 content_object = generic.GenericForeignKey('content_type', 'object_id')
781 class Meta:
782 verbose_name = 'Merged UUID'
783 verbose_name_plural = 'Merged UUIDs'