[Feeds] update / fix feed-downloader
[mygpo.git] / mygpo / podcasts / models.py
blob2dbbbbc891e3399890f1d98dee0f7cde84abd3e8
1 from __future__ import unicode_literals
3 import re
4 from datetime import datetime
6 from django.conf import settings
7 from django.db import models, transaction, IntegrityError
8 from django.utils.translation import ugettext as _
9 from django.contrib.contenttypes.models import ContentType
10 from django.contrib.contenttypes.fields import GenericRelation
11 from django.contrib.contenttypes import generic
13 from uuidfield import UUIDField
15 from mygpo import utils
16 from mygpo.core.models import (TwitterModel, UUIDModel, GenericManager,
17 UpdateInfoModel, OrderedModel, OptionallyOrderedModel)
19 import logging
20 logger = logging.getLogger(__name__)
23 # default podcast update interval in hours
24 DEFAULT_UPDATE_INTERVAL = 7 * 24
26 # minium podcast update interval in hours
27 MIN_UPDATE_INTERVAL = 5
29 # every podcast should be updated at least once a month
30 MAX_UPDATE_INTERVAL = 24 * 30
33 class TitleModel(models.Model):
34 """ Model that has a title """
36 title = models.CharField(max_length=1000, null=False, blank=True,
37 db_index=True)
38 subtitle = models.TextField(null=False, blank=True)
40 def __str__(self):
41 return self.title.encode('ascii', errors='replace')
43 def __unicode(self):
44 return self.title
46 class Meta:
47 abstract = True
50 class DescriptionModel(models.Model):
51 """ Model that has a description """
53 description = models.TextField(null=False, blank=True)
55 class Meta:
56 abstract = True
59 class LinkModel(models.Model):
60 """ Model that has a link """
62 link = models.URLField(null=True, max_length=1000)
64 class Meta:
65 abstract = True
68 class LanguageModel(models.Model):
69 """ Model that has a language """
71 language = models.CharField(max_length=10, null=True, blank=False,
72 db_index=True)
74 class Meta:
75 abstract = True
78 class LastUpdateModel(models.Model):
79 """ Model with timestamp of last update from its source """
81 # date and time at which the model has last been updated from its source
82 # (eg a podcast feed). None means that the object has been created as a
83 # stub, without information from the source.
84 last_update = models.DateTimeField(null=True)
86 class Meta:
87 abstract = True
90 class LicenseModel(models.Model):
91 # URL to a license (usually Creative Commons)
92 license = models.CharField(max_length=100, null=True, blank=False,
93 db_index=True)
95 class Meta:
96 abstract = True
99 class FlattrModel(models.Model):
100 # A Flattr payment URL
101 flattr_url = models.URLField(null=True, blank=False, max_length=1000,
102 db_index=True)
104 class Meta:
105 abstract = True
108 class ContentTypesModel(models.Model):
109 # contains a comma-separated values of content types, eg 'audio,video'
110 content_types = models.CharField(max_length=20, null=False, blank=True)
112 class Meta:
113 abstract = True
116 class MergedIdsModel(models.Model):
118 class Meta:
119 abstract = True
122 class OutdatedModel(models.Model):
123 outdated = models.BooleanField(default=False, db_index=True)
125 class Meta:
126 abstract = True
129 class AuthorModel(models.Model):
130 author = models.CharField(max_length=350, null=True, blank=True)
132 class Meta:
133 abstract = True
136 class UrlsMixin(models.Model):
137 """ Methods for working with URL objects """
139 urls = GenericRelation('URL', related_query_name='urls')
141 class Meta:
142 abstract = True
144 @property
145 def url(self):
146 """ The main URL of the model """
147 # We could also use self.urls.first() here, but this would result in a
148 # different query and would render a .prefetch_related('urls') useless
149 # The assumption is that we will never have loads of URLS, so
150 # fetching all won't hurt
151 urls = list(self.urls.all())
152 return urls[0].url if urls else None
154 def add_missing_urls(self, new_urls):
155 """ Adds missing URLS from new_urls
157 The order of existing URLs is not changed """
158 existing_urls = self.urls.all()
159 next_order = max([-1] + [u.order for u in existing_urls]) + 1
160 existing_urls = [u.url for u in existing_urls]
162 for url in new_urls:
163 if url in existing_urls:
164 continue
166 try:
167 URL.objects.create(url=url,
168 order=next_order,
169 scope=self.scope,
170 content_object=self,
172 next_order += 1
173 except IntegrityError as ie:
174 logger.warn('Could not add URL: {err}'.format(err=ie))
175 continue
177 def set_url(self, url):
178 """ Sets the canonical URL """
180 urls = [u.url for u in self.urls.all()]
181 if url in urls:
182 urls.remove(url)
184 urls.insert(0, url)
185 self.set_urls(urls)
187 def set_urls(self, urls):
188 """ Update the object's URLS to the given list
190 'urls' should be a list of strings. Slugs that do not exist are
191 created. Existing urls that are not in the 'urls' list are
192 deleted. """
193 urls = [utils.to_maxlength(URL, 'url', url) for url in urls]
194 existing = {u.url: u for u in self.urls.all()}
195 utils.set_ordered_entries(self, urls, existing, URL, 'url',
196 'content_object')
199 class SlugsMixin(models.Model):
200 """ Methods for working with Slug objects """
202 slugs = GenericRelation('Slug', related_query_name='slugs')
204 class Meta:
205 abstract = True
207 @property
208 def slug(self):
209 """ The main slug of the podcast
211 TODO: should be retrieved from a (materialized) view """
213 # We could also use self.slugs.first() here, but this would result in a
214 # different query and would render a .prefetch_related('slugs') useless
215 # The assumption is that we will never have loads of slugs, so
216 # fetching all won't hurt
217 slugs = list(self.slugs.all())
218 slug = slugs[0].slug if slugs else None
219 logger.debug('Found slugs %r, picking %r', slugs, slug)
220 return slug
223 def add_slug(self, slug):
224 """ Adds a (non-cannonical) slug """
226 if not slug:
227 raise ValueError("'%s' is not a valid slug" % slug)
229 existing_slugs = self.slugs.all()
231 # cut slug to the maximum allowed length
232 slug = utils.to_maxlength(Slug, 'slug', slug)
234 # check if slug already exists
235 if slug in [s.slug for s in existing_slugs]:
236 return
238 max_order = max([-1] + [s.order for s in existing_slugs])
239 next_order = max_order + 1
240 Slug.objects.create(scope=self.scope,
241 slug=slug,
242 content_object=self,
243 order=next_order,
246 def set_slug(self, slug):
247 """ Sets the canonical slug """
249 slugs = [s.slug for s in self.slugs.all()]
250 if slug in slugs:
251 slugs.remove(slug)
253 slugs.insert(0, slug)
254 self.set_slugs(slugs)
257 def remove_slug(self, slug):
258 """ Removes a slug """
259 Slug.objects.filter(
260 slug=slug,
261 content_type=ContentType.objects.get_for_model(self),
262 object_id=self.id,
263 ).delete()
266 def set_slugs(self, slugs):
267 """ Update the object's slugs to the given list
269 'slugs' should be a list of strings. Slugs that do not exist are
270 created. Existing slugs that are not in the 'slugs' list are
271 deleted. """
272 slugs = [utils.to_maxlength(Slug, 'slug', slug) for slug in slugs]
273 existing = {s.slug: s for s in self.slugs.all()}
274 utils.set_ordered_entries(self, slugs, existing, Slug, 'slug',
275 'content_object')
278 class MergedUUIDsMixin(models.Model):
279 """ Methods for working with MergedUUID objects """
281 merged_uuids = GenericRelation('MergedUUID',
282 related_query_name='merged_uuids')
284 class Meta:
285 abstract = True
288 class MergedUUIDQuerySet(models.QuerySet):
289 """ QuerySet for Models inheriting from MergedUUID """
291 def get_by_any_id(self, id):
292 """ Find am Episode by its own ID or by a merged ID """
293 # TODO: should this be done in the model?
294 try:
295 return self.get(id=id)
296 except self.model.DoesNotExist:
297 return self.get(merged_uuids__uuid=id)
300 class TagsMixin(models.Model):
301 """ Methods for working with Tag objects """
303 tags = GenericRelation('Tag', related_query_name='tags')
305 class Meta:
306 abstract = True
309 class PodcastGroup(UUIDModel, TitleModel, SlugsMixin):
310 """ Groups multiple podcasts together """
312 @property
313 def scope(self):
314 """ A podcast group is always in the global scope """
315 return ''
317 def subscriber_count(self):
318 # this could be done directly in the DB
319 return sum([p.subscriber_count() for p in self.podcast_set.all()] + [0])
321 @property
322 def logo_url(self):
323 podcast = self.podcast_set.first()
324 podcast.logo_url
327 class PodcastQuerySet(MergedUUIDQuerySet):
328 """ Custom queries for Podcasts """
330 def random(self):
331 """ Random podcasts
333 Excludes podcasts with missing title to guarantee some
334 minimum quality of the results """
336 # Using PostgreSQL's RANDOM() is very expensive, so we're generating a
337 # random uuid and query podcasts with a higher ID
338 # This returns podcasts in order of their ID, but the assumption is
339 # that usually only one podcast will be required anyway
340 import uuid
341 ruuid = uuid.uuid1()
342 return self.exclude(title='').filter(id__gt=ruuid)
344 def flattr(self):
345 """ Podcasts providing Flattr information """
346 return self.exclude(flattr_url__isnull=True)
348 def license(self, license_url=None):
349 """ Podcasts with any / the given license """
350 if license_url:
351 return self.filter(license=license_url)
352 else:
353 return self.exclude(license__isnull=True)
355 def order_by_next_update(self):
356 """ Sort podcasts by next scheduled update """
357 NEXTUPDATE = "last_update + (update_interval || ' hours')::INTERVAL"
358 q = self.extra(select={'next_update': NEXTUPDATE})
359 return q.order_by('next_update')
361 def next_update_between(self, start, end):
362 NEXTUPDATE_BETWEEN = ("(last_update + (update_interval || "
363 "' hours')::INTERVAL) BETWEEN %s AND %s")
364 return self.extra(
365 where=[NEXTUPDATE_BETWEEN], params=[start, end]
368 def toplist(self, language=None):
369 toplist = self
370 if language:
371 toplist = toplist.filter(language=language)
373 return toplist.order_by('-subscribers')
376 class PodcastManager(GenericManager):
377 """ Manager for the Podcast model """
379 def get_queryset(self):
380 return PodcastQuerySet(self.model, using=self._db)
382 @transaction.atomic
383 def get_or_create_for_url(self, url, defaults={}):
384 # TODO: where to specify how uuid is created?
385 import uuid
386 defaults.update({
387 'id': uuid.uuid1().hex,
390 url = utils.to_maxlength(URL, 'url', url)
391 podcast, created = self.get_or_create(urls__url=url, defaults=defaults)
393 if created:
394 url = URL.objects.create(url=url,
395 order=0,
396 scope='',
397 content_object=podcast,
399 return podcast
402 class Podcast(UUIDModel, TitleModel, DescriptionModel, LinkModel,
403 LanguageModel, LastUpdateModel, UpdateInfoModel, LicenseModel,
404 FlattrModel, ContentTypesModel, MergedIdsModel, OutdatedModel,
405 AuthorModel, UrlsMixin, SlugsMixin, TagsMixin, MergedUUIDsMixin,
406 TwitterModel, ):
407 """ A Podcast """
409 logo_url = models.URLField(null=True, max_length=1000)
410 group = models.ForeignKey(PodcastGroup, null=True,
411 on_delete=models.PROTECT)
412 group_member_name = models.CharField(max_length=30, null=True, blank=False)
414 # if p1 is related to p2, p2 is also related to p1
415 related_podcasts = models.ManyToManyField('self', symmetrical=True)
417 subscribers = models.PositiveIntegerField(default=0)
418 restrictions = models.CharField(max_length=20, null=False, blank=True,
419 default='')
420 common_episode_title = models.CharField(max_length=100, null=False, blank=True)
421 new_location = models.URLField(max_length=1000, null=True, blank=False)
422 latest_episode_timestamp = models.DateTimeField(null=True)
423 episode_count = models.PositiveIntegerField(default=0)
424 hub = models.URLField(null=True)
425 update_interval = models.PositiveSmallIntegerField(null=False,
426 default=DEFAULT_UPDATE_INTERVAL)
428 # "order" value of the most recent episode (will be the highest of all)
429 max_episode_order = models.PositiveIntegerField(null=True, default=None)
431 objects = PodcastManager()
433 def subscriber_count(self):
434 # TODO: implement
435 return self.subscribers
437 def group_with(self, other, grouptitle, myname, othername):
438 """ Group the podcast with another one """
439 # TODO: move to PodcastGroup?
441 if bool(self.group) and (self.group == other.group):
442 # they are already grouped
443 return
445 group1 = self.group
446 group2 = other.group
448 if group1 and group2:
449 raise ValueError('both podcasts already are in different groups')
451 elif not (group1 or group2):
452 # Form a new group
453 import uuid
454 group = PodcastGroup.objects.create(id=uuid.uuid1(), title=grouptitle)
455 self.group_member_name = myname
456 self.group = group
457 self.save()
459 other.group_member_name = othername
460 other.group = group
461 other.save()
463 return group
465 elif group1:
466 # add other to self's group
467 other.group_member_name = othername
468 other.group = group1
469 other.save()
470 return group1
472 else:
473 # add self to other's group
474 self.group_member_name = myname
475 self.group = group2
476 self.save()
477 return group2
479 def get_common_episode_title(self, num_episodes=100):
481 if self.common_episode_title:
482 return self.common_episode_title
484 episodes = self.episode_set.all()[:num_episodes]
486 # We take all non-empty titles
487 titles = filter(None, (e.title for e in episodes))
489 # there can not be a "common" title of a single title
490 if len(titles) < 2:
491 return None
493 # get the longest common substring
494 common_title = utils.longest_substr(titles)
496 # but consider only the part up to the first number. Otherwise we risk
497 # removing part of the number (eg if a feed contains episodes 100-199)
498 common_title = re.search(r'^\D*', common_title).group(0)
500 if len(common_title.strip()) < 2:
501 return None
503 return common_title
506 def get_episode_before(self, episode):
507 if not episode.released:
508 return None
509 return self.episode_set.filter(released__lt=episode.released).latest()
511 def get_episode_after(self, episode):
512 if not episode.released:
513 return None
514 return self.episode_set.filter(released__gt=episode.released).first()
516 @property
517 def scope(self):
518 """ A podcast is always in the global scope """
519 return ''
521 @property
522 def as_scope(self):
523 """ If models use this object as scope, they'll use this value """
524 return self.id.hex
526 @property
527 def display_title(self):
528 """ a title for display purposes """
529 if self.title:
530 return self.title
532 if not self.url:
533 logger.warn('Podcast with ID {podcast_id} does not have a URL'
534 .format(podcast_id=self.id.hex))
535 return _('Unknown Podcast')
537 return _('Unknown Podcast from {domain}'.format(
538 domain=utils.get_domain(self.url)))
541 class EpisodeQuerySet(MergedUUIDQuerySet):
542 """ QuerySet for Episodes """
544 def toplist(self, language=None):
545 toplist = self
546 if language:
547 toplist = toplist.filter(language=language)
549 return toplist.order_by('-listeners')
552 class EpisodeManager(GenericManager):
553 """ Custom queries for Episodes """
555 def get_queryset(self):
556 return EpisodeQuerySet(self.model, using=self._db)
558 @transaction.atomic
559 def get_or_create_for_url(self, podcast, url, defaults={}):
560 # TODO: where to specify how uuid is created?
561 import uuid
563 try:
564 url = URL.objects.get(url=url, scope=podcast.as_scope)
566 except URL.DoesNotExist:
567 episode = Episode.objects.create(podcast=podcast,
568 id=uuid.uuid1().hex,
569 **defaults
571 url = URL.objects.create(url=url,
572 order=0,
573 scope=episode.scope,
574 content_object=episode,
576 return episode
578 else:
579 return url.content_object
582 class Episode(UUIDModel, TitleModel, DescriptionModel, LinkModel,
583 LanguageModel, LastUpdateModel, UpdateInfoModel, LicenseModel,
584 FlattrModel, ContentTypesModel, MergedIdsModel, OutdatedModel,
585 AuthorModel, UrlsMixin, SlugsMixin, MergedUUIDsMixin,
586 OptionallyOrderedModel):
587 """ An episode """
589 guid = models.CharField(max_length=200, null=True)
590 content = models.TextField()
591 released = models.DateTimeField(null=True, db_index=True)
592 duration = models.BigIntegerField(null=True)
593 filesize = models.BigIntegerField(null=True)
594 mimetypes = models.CharField(max_length=200)
595 podcast = models.ForeignKey(Podcast, on_delete=models.PROTECT)
596 listeners = models.PositiveIntegerField(null=True, db_index=True)
598 objects = EpisodeManager()
600 class Meta:
601 ordering = ['-order', '-released']
603 index_together = [
604 ('podcast', 'outdated', 'released'),
605 ('podcast', 'released'),
606 ('released', 'podcast'),
608 # index for typical episode toplist queries
609 ('language', 'listeners'),
611 ('podcast', 'order', 'released'),
614 @property
615 def scope(self):
616 """ An episode's scope is its podcast """
617 return self.podcast_id.hex
619 @property
620 def display_title(self):
621 # TODO: return basename of URL (see Podcast.display_title)
622 return self.title
624 def get_short_title(self, common_title):
625 """ Title when used within the podcast's context """
626 if not self.title or not common_title:
627 return None
629 title = self.title.replace(common_title, '').strip()
630 title = re.sub(r'^[\W\d]+', '', title)
631 return title
634 def get_episode_number(self, common_title):
635 """ Number of the episode """
636 if not self.title or not common_title:
637 return None
639 title = self.title.replace(common_title, '').strip()
640 match = re.search(r'^\W*(\d+)', title)
641 if not match:
642 return None
644 return int(match.group(1))
647 class ScopedModel(models.Model):
648 """ A model that belongs to some scope, usually for limited uniqueness
650 scope does not allow null values, because null is not equal to null in SQL.
651 It could therefore not be used in unique constraints. """
653 # A slug / URL is unique within a scope; no two podcasts can have the same
654 # URL (scope ''), and no two episdoes of the same podcast (scope =
655 # podcast-ID) can have the same URL
656 scope = models.CharField(max_length=32, null=False, blank=True,
657 db_index=True)
659 class Meta:
660 abstract = True
662 def get_default_scope(self):
663 """ Returns the default scope of the object """
664 raise NotImplementedError('{cls} should implement get_default_scope'
665 .format(cls=self.__class__.__name__))
668 class URL(OrderedModel, ScopedModel):
669 """ Podcasts and Episodes can have multiple URLs
671 URLs are ordered, and the first slug is considered the canonical one """
673 url = models.URLField(max_length=2048)
675 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
676 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
677 object_id = UUIDField()
678 content_object = generic.GenericForeignKey('content_type', 'object_id')
680 class Meta(OrderedModel.Meta):
681 unique_together = (
682 # a URL is unique per scope
683 ('url', 'scope'),
685 # URLs of an object must be ordered, so that no two slugs of one
686 # object have the same order key
687 ('content_type', 'object_id', 'order'),
690 verbose_name = 'URL'
691 verbose_name_plural = 'URLs'
693 def get_default_scope(self):
694 return self.content_object.scope
697 class Tag(models.Model):
698 """ Tags any kind of Model
700 See also :class:`TagsMixin`
703 FEED = 1
704 DELICIOUS = 2
705 USER = 4
707 SOURCE_CHOICES = (
708 (FEED, 'Feed'),
709 (DELICIOUS, 'delicious'),
710 (USER, 'User'),
713 tag = models.SlugField()
715 # indicates where the tag came from
716 source = models.PositiveSmallIntegerField(choices=SOURCE_CHOICES)
718 # the user that created the tag (if it was created by a user,
719 # null otherwise)
720 user = models.ForeignKey(settings.AUTH_USER_MODEL, null=True,
721 on_delete=models.CASCADE)
723 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
724 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
725 object_id = UUIDField()
726 content_object = generic.GenericForeignKey('content_type', 'object_id')
728 class Meta:
729 unique_together = (
730 # a tag can only be assigned once from one source to one item
731 ('tag', 'source', 'user', 'content_type', 'object_id'),
735 class Slug(OrderedModel, ScopedModel):
736 """ Slug for any kind of Model
738 Slugs are ordered, and the first slug is considered the canonical one.
739 See also :class:`SlugsMixin`
742 slug = models.SlugField(max_length=150, db_index=True)
744 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
745 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
746 object_id = UUIDField()
747 content_object = generic.GenericForeignKey('content_type', 'object_id')
749 class Meta(OrderedModel.Meta):
750 unique_together = (
751 # a slug is unique per type; eg a podcast can have the same slug
752 # as an episode, but no two podcasts can have the same slug
753 ('slug', 'scope'),
755 # slugs of an object must be ordered, so that no two slugs of one
756 # object have the same order key
757 ('content_type', 'object_id', 'order'),
760 index_together = [
761 ('slug', 'content_type')
764 def __repr__(self):
765 return '{cls}(slug={slug}, order={order}, content_object={obj}'.format(
766 cls=self.__class__.__name__,
767 slug=self.slug,
768 order=self.order,
769 obj=self.content_object
773 class MergedUUID(models.Model):
774 """ If objects are merged their UUIDs are stored for later reference
776 see also :class:`MergedUUIDsMixin`
779 uuid = UUIDField(unique=True)
781 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
782 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
783 object_id = UUIDField()
784 content_object = generic.GenericForeignKey('content_type', 'object_id')
786 class Meta:
787 verbose_name = 'Merged UUID'
788 verbose_name_plural = 'Merged UUIDs'