[Models] add missing import
[mygpo.git] / mygpo / podcasts / models.py
bloba4daeb4524a7252484d3257fe9e77fb6a4add71d
1 from __future__ import unicode_literals
3 import re
4 from datetime import datetime
6 from django.db import models, connection, transaction, IntegrityError
7 from django.contrib.contenttypes.models import ContentType
8 from django.contrib.contenttypes.fields import GenericRelation
9 from django.contrib.contenttypes import generic
11 from uuidfield import UUIDField
13 from mygpo import utils
15 import logging
16 logger = logging.getLogger(__name__)
19 # default podcast update interval in hours
20 DEFAULT_UPDATE_INTERVAL = 7 * 24
22 # minium podcast update interval in hours
23 MIN_UPDATE_INTERVAL = 5
25 # every podcast should be updated at least once a month
26 MAX_UPDATE_INTERVAL = 24 * 30
29 class UUIDModel(models.Model):
30 """ Models that have an UUID as primary key """
32 id = UUIDField(primary_key=True)
34 class Meta:
35 abstract = True
37 def get_id(self):
38 """ String representation of the ID """
39 return self.id.hex
42 class TitleModel(models.Model):
43 """ Model that has a title """
45 title = models.CharField(max_length=1000, null=False, blank=True,
46 db_index=True)
47 subtitle = models.TextField(null=False, blank=True)
49 def __str__(self):
50 return self.title.encode('ascii', errors='replace')
52 def __unicode(self):
53 return self.title
55 class Meta:
56 abstract = True
59 class DescriptionModel(models.Model):
60 """ Model that has a description """
62 description = models.TextField(null=False, blank=True)
64 class Meta:
65 abstract = True
68 class LinkModel(models.Model):
69 """ Model that has a link """
71 link = models.URLField(null=True, max_length=1000)
73 class Meta:
74 abstract = True
77 class LanguageModel(models.Model):
78 """ Model that has a language """
80 language = models.CharField(max_length=10, null=True, blank=False,
81 db_index=True)
83 class Meta:
84 abstract = True
87 class LastUpdateModel(models.Model):
88 """ Model with timestamp of last update from its source """
90 # date and time at which the model has last been updated from its source
91 # (eg a podcast feed). None means that the object has been created as a
92 # stub, without information from the source.
93 last_update = models.DateTimeField(null=True)
95 class Meta:
96 abstract = True
99 class UpdateInfoModel(models.Model):
101 # this does not use "auto_now_add=True" so that data
102 # can be migrated with its creation timestamp intact; it can be
103 # switched on after the migration is complete
104 created = models.DateTimeField(default=datetime.utcnow)
105 modified = models.DateTimeField(auto_now=True)
107 class Meta:
108 abstract = True
111 class LicenseModel(models.Model):
112 # URL to a license (usually Creative Commons)
113 license = models.CharField(max_length=100, null=True, blank=False,
114 db_index=True)
116 class Meta:
117 abstract = True
120 class FlattrModel(models.Model):
121 # A Flattr payment URL
122 flattr_url = models.URLField(null=True, blank=False, max_length=1000,
123 db_index=True)
125 class Meta:
126 abstract = True
129 class ContentTypesModel(models.Model):
130 # contains a comma-separated values of content types, eg 'audio,video'
131 content_types = models.CharField(max_length=20, null=False, blank=True)
133 class Meta:
134 abstract = True
137 class MergedIdsModel(models.Model):
139 class Meta:
140 abstract = True
143 class OutdatedModel(models.Model):
144 outdated = models.BooleanField(default=False, db_index=True)
146 class Meta:
147 abstract = True
150 class AuthorModel(models.Model):
151 author = models.CharField(max_length=350, null=True, blank=True)
153 class Meta:
154 abstract = True
157 class GenericManager(models.Manager):
158 """ Generic manager methods """
160 def count_fast(self):
161 """ Fast approximate count of all model instances
163 PostgreSQL is slow when counting records without an index. This is a
164 workaround which only gives approximate results. see:
165 http://wiki.postgresql.org/wiki/Slow_Counting """
166 cursor = connection.cursor()
167 cursor.execute("select reltuples from pg_class where relname='%s';" %
168 self.model._meta.db_table)
169 row = cursor.fetchone()
170 return int(row[0])
173 class UrlsMixin(models.Model):
174 """ Methods for working with URL objects """
176 urls = GenericRelation('URL', related_query_name='urls')
178 class Meta:
179 abstract = True
181 @property
182 def url(self):
183 """ The main URL of the model """
184 # We could also use self.urls.first() here, but this would result in a
185 # different query and would render a .prefetch_related('urls') useless
186 # The assumption is that we will never have loads of URLS, so
187 # fetching all won't hurt
188 urls = list(self.urls.all())
189 return urls[0].url if urls else None
191 def add_missing_urls(self, new_urls):
192 """ Adds missing URLS from new_urls
194 The order of existing URLs is not changed """
195 existing_urls = self.urls.all()
196 next_order = max([-1] + [u.order for u in existing_urls]) + 1
197 existing_urls = [u.url for u in existing_urls]
199 for url in new_urls:
200 if url in existing_urls:
201 continue
203 URL.objects.create(url=url,
204 order=next_order,
205 scope=self.scope,
206 content_object=obj,
209 next_order += 1
212 class SlugsMixin(models.Model):
213 """ Methods for working with Slug objects """
215 slugs = GenericRelation('Slug', related_query_name='slugs')
217 class Meta:
218 abstract = True
220 @property
221 def slug(self):
222 """ The main slug of the podcast
224 TODO: should be retrieved from a (materialized) view """
226 # We could also use self.slugs.first() here, but this would result in a
227 # different query and would render a .prefetch_related('slugs') useless
228 # The assumption is that we will never have loads of slugs, so
229 # fetching all won't hurt
230 slugs = list(self.slugs.all())
231 slug = slugs[0].slug if slugs else None
232 logger.debug('Found slugs %r, picking %r', slugs, slug)
233 return slug
236 def add_slug(self, slug):
237 """ Adds a (non-cannonical) slug """
239 if not slug:
240 raise ValueError("'%s' is not a valid slug" % slug)
242 existing_slugs = self.slugs.all()
244 # check if slug already exists
245 if slug in [s.slug for s in existing_slugs]:
246 return
248 max_order = max([-1] + [s.order for s in existing_slugs])
249 next_order = max_order + 1
250 Slug.objects.create(scope=self.scope,
251 slug=slug,
252 content_object=self,
253 order=next_order,
256 def set_slug(self, slug):
257 """ Sets the canonical slug """
259 slugs = [s.slug for s in self.slugs.all()]
260 if slug in slugs:
261 slugs.remove(slug)
263 slugs.insert(0, slug)
264 self.set_slugs(slugs)
267 def remove_slug(self, slug):
268 """ Removes a slug """
269 Slug.objects.filter(
270 slug=slug,
271 content_type=ContentType.objects.get_for_model(self),
272 object_id=self.id,
273 ).delete()
276 def set_slugs(self, slugs):
277 """ Update the object's slugs to the given list
279 'slugs' should be a list of strings. Slugs that do not exist are
280 created. Existing slugs that are not in the 'slugs' list are
281 deleted. """
282 existing = {s.slug: s for s in self.slugs.all()}
283 logger.info('%d existing slugs', len(existing))
285 logger.info('%d new slugs', len(slugs))
287 with transaction.atomic():
288 max_order = max([s.order for s in existing.values()] + [len(slugs)])
289 logger.info('Renumbering slugs starting from %d', max_order+1)
290 for n, slug in enumerate(existing.values(), max_order+1):
291 slug.order = n
292 slug.save()
294 logger.info('%d existing slugs', len(existing))
296 for n, slug in enumerate(slugs):
297 try:
298 s = existing.pop(slug)
299 logger.info('Updating new slug %d: %s', n, slug)
300 s.order = n
301 s.save()
302 except KeyError:
303 logger.info('Creating new slug %d: %s', n, slug)
304 try:
305 Slug.objects.create(slug=slug,
306 content_object=self,
307 order=n,
308 scope=self.scope,
310 except IntegrityError as ie:
311 logger.warn('Could not create Slug for %s: %s', self, ie)
313 with transaction.atomic():
314 delete = [s.pk for s in existing.values()]
315 logger.info('Deleting %d slugs', len(delete))
316 Slug.objects.filter(id__in=delete).delete()
320 class MergedUUIDsMixin(models.Model):
321 """ Methods for working with MergedUUID objects """
323 merged_uuids = GenericRelation('MergedUUID',
324 related_query_name='merged_uuids')
326 class Meta:
327 abstract = True
330 class MergedUUIDQuerySet(models.QuerySet):
331 """ QuerySet for Models inheriting from MergedUUID """
333 def get_by_any_id(self, id):
334 """ Find am Episode by its own ID or by a merged ID """
335 # TODO: should this be done in the model?
336 try:
337 return self.get(id=id)
338 except self.model.DoesNotExist:
339 return self.get(merged_uuids__uuid=id)
342 class TagsMixin(models.Model):
343 """ Methods for working with Tag objects """
345 tags = GenericRelation('Tag', related_query_name='tags')
347 class Meta:
348 abstract = True
351 class OrderedModel(models.Model):
352 """ A model that can be ordered
354 The implementing Model must make sure that 'order' is sufficiently unique
357 order = models.PositiveSmallIntegerField()
359 class Meta:
360 abstract = True
361 ordering = ['order']
364 class PodcastGroup(UUIDModel, TitleModel, SlugsMixin):
365 """ Groups multiple podcasts together """
367 @property
368 def scope(self):
369 """ A podcast group is always in the global scope """
370 return ''
373 class PodcastQuerySet(MergedUUIDQuerySet):
374 """ Custom queries for Podcasts """
376 def random(self):
377 """ Random podcasts
379 Excludes podcasts with missing title to guarantee some
380 minimum quality of the results """
382 # Using PostgreSQL's RANDOM() is very expensive, so we're generating a
383 # random uuid and query podcasts with a higher ID
384 # This returns podcasts in order of their ID, but the assumption is
385 # that usually only one podcast will be required anyway
386 import uuid
387 ruuid = uuid.uuid1()
388 return self.exclude(title='').filter(id__gt=ruuid)
390 def flattr(self):
391 """ Podcasts providing Flattr information """
392 return self.exclude(flattr_url__isnull=True)
394 def license(self, license_url=None):
395 """ Podcasts with any / the given license """
396 if license_url:
397 return self.filter(license=license_url)
398 else:
399 return self.exclude(license__isnull=True)
401 def order_by_next_update(self):
402 """ Sort podcasts by next scheduled update """
403 NEXTUPDATE = "last_update + (update_interval || ' hours')::INTERVAL"
404 q = self.extra(select={'next_update': NEXTUPDATE})
405 return q.order_by('next_update')
407 def toplist(self, language=None):
408 toplist = self
409 if language:
410 toplist = toplist.filter(language=language)
412 return toplist.order_by('-subscribers')
415 class PodcastManager(GenericManager):
416 """ Manager for the Podcast model """
418 def get_queryset(self):
419 return PodcastQuerySet(self.model, using=self._db)
421 @transaction.atomic
422 def get_or_create_for_url(self, url, defaults={}):
423 # TODO: where to specify how uuid is created?
424 import uuid
425 defaults.update({
426 'id': uuid.uuid1().hex,
428 podcast, created = self.get_or_create(urls__url=url, defaults=defaults)
430 if created:
431 url = URL.objects.create(url=url,
432 order=0,
433 scope='',
434 content_object=podcast,
436 return podcast
439 class Podcast(UUIDModel, TitleModel, DescriptionModel, LinkModel,
440 LanguageModel, LastUpdateModel, UpdateInfoModel, LicenseModel,
441 FlattrModel, ContentTypesModel, MergedIdsModel, OutdatedModel,
442 AuthorModel, UrlsMixin, SlugsMixin, TagsMixin, MergedUUIDsMixin):
443 """ A Podcast """
445 logo_url = models.URLField(null=True, max_length=1000)
446 group = models.ForeignKey(PodcastGroup, null=True,
447 on_delete=models.PROTECT)
448 group_member_name = models.CharField(max_length=30, null=True, blank=False)
450 # if p1 is related to p2, p2 is also related to p1
451 related_podcasts = models.ManyToManyField('self', symmetrical=True)
453 subscribers = models.PositiveIntegerField(default=0)
454 restrictions = models.CharField(max_length=20, null=False, blank=True,
455 default='')
456 common_episode_title = models.CharField(max_length=100, null=False, blank=True)
457 new_location = models.URLField(max_length=1000, null=True, blank=False)
458 latest_episode_timestamp = models.DateTimeField(null=True)
459 episode_count = models.PositiveIntegerField(default=0)
460 hub = models.URLField(null=True)
461 twitter = models.CharField(max_length=15, null=True, blank=False)
462 update_interval = models.PositiveSmallIntegerField(null=False,
463 default=DEFAULT_UPDATE_INTERVAL)
465 objects = PodcastManager()
467 def subscriber_count(self):
468 # TODO: implement
469 return 0
471 def group_with(self, other, grouptitle, myname, othername):
472 """ Group the podcast with another one """
473 # TODO: move to PodcastGroup?
475 if bool(self.group) and (self.group == other.group):
476 # they are already grouped
477 return
479 group1 = self.group
480 group2 = other.group
482 if group1 and group2:
483 raise ValueError('both podcasts already are in different groups')
485 elif not (group1 or group2):
486 # Form a new group
487 import uuid
488 group = PodcastGroup.objects.create(id=uuid.uuid1(), title=grouptitle)
489 self.group_member_name = myname
490 self.group = group
491 self.save()
493 other.group_member_name = othername
494 other.group = group
495 other.save()
497 return group
499 elif group1:
500 # add other to self's group
501 other.group_member_name = othername
502 other.group = group1
503 other.save()
504 return group1
506 else:
507 # add self to other's group
508 self.group_member_name = myname
509 self.group = group2
510 self.save()
511 return group2
514 def subscribe_targets(self, user):
516 returns all Devices and SyncGroups on which this podcast can be subsrbied. This excludes all
517 devices/syncgroups on which the podcast is already subscribed
519 targets = []
521 subscriptions_by_devices = user.get_subscriptions_by_device()
523 for group in user.get_grouped_devices():
525 if group.is_synced:
527 dev = group.devices[0]
529 if not self.get_id() in subscriptions_by_devices[dev.id]:
530 targets.append(group.devices)
532 else:
533 for device in group.devices:
534 if not self.get_id() in subscriptions_by_devices[device.id]:
535 targets.append(device)
537 return targets
540 def get_common_episode_title(self, num_episodes=100):
542 if self.common_episode_title:
543 return self.common_episode_title
545 episodes = self.episode_set.all()[:num_episodes]
547 # We take all non-empty titles
548 titles = filter(None, (e.title for e in episodes))
550 # there can not be a "common" title of a single title
551 if len(titles) < 2:
552 return None
554 # get the longest common substring
555 common_title = utils.longest_substr(titles)
557 # but consider only the part up to the first number. Otherwise we risk
558 # removing part of the number (eg if a feed contains episodes 100-199)
559 common_title = re.search(r'^\D*', common_title).group(0)
561 if len(common_title.strip()) < 2:
562 return None
564 return common_title
567 def get_episode_before(self, episode):
568 if not episode.released:
569 return None
570 return self.episode_set.filter(released__lt=episode.released).latest()
572 def get_episode_after(self, episode):
573 if not episode.released:
574 return None
575 return self.episode_set.filter(released__gt=episode.released).first()
577 @property
578 def scope(self):
579 """ A podcast is always in the global scope """
580 return ''
583 class EpisodeQuerySet(MergedUUIDQuerySet):
584 """ QuerySet for Episodes """
586 def toplist(self, language=None):
587 toplist = self
588 if language:
589 toplist = toplist.filter(language=language)
591 return toplist.order_by('-listeners')
593 def by_released(self):
594 """ Sorts by release date, sorting missing release date last
596 When sorting by release date, we want to list those with the most
597 revent release date first. At the end the episodes without release date
598 should be sorted. """
599 return self.extra(select={
600 'has_released': 'released IS NOT NULL',
601 }).\
602 order_by('-has_released', '-released')
605 class EpisodeManager(GenericManager):
606 """ Custom queries for Episodes """
608 def get_queryset(self):
609 return EpisodeQuerySet(self.model, using=self._db)
611 @transaction.atomic
612 def get_or_create_for_url(self, podcast, url, defaults={}):
613 # TODO: where to specify how uuid is created?
614 import uuid
615 defaults.update({
616 'id': uuid.uuid1().hex,
618 episode, created = self.get_or_create(podcast=podcast,
619 urls__url=url,
620 defaults=defaults,
623 if created:
624 url = URL.objects.create(url=url,
625 order=0,
626 scope=podcast.get_id(),
627 content_object=episode,
629 return episode
631 class Episode(UUIDModel, TitleModel, DescriptionModel, LinkModel,
632 LanguageModel, LastUpdateModel, UpdateInfoModel, LicenseModel,
633 FlattrModel, ContentTypesModel, MergedIdsModel, OutdatedModel,
634 AuthorModel, UrlsMixin, SlugsMixin, MergedUUIDsMixin):
635 """ An episode """
637 guid = models.CharField(max_length=200, null=True)
638 content = models.TextField()
639 released = models.DateTimeField(null=True, db_index=True)
640 duration = models.PositiveIntegerField(null=True)
641 filesize = models.BigIntegerField(null=True)
642 mimetypes = models.CharField(max_length=200)
643 podcast = models.ForeignKey(Podcast, on_delete=models.PROTECT)
644 listeners = models.PositiveIntegerField(null=True, db_index=True)
646 objects = EpisodeManager()
648 class Meta:
649 ordering = ['-released']
651 @property
652 def scope(self):
653 """ An episode's scope is its podcast """
654 return self.podcast_id.hex
656 def get_short_title(self, common_title):
657 """ Title when used within the podcast's context """
658 if not self.title or not common_title:
659 return None
661 title = self.title.replace(common_title, '').strip()
662 title = re.sub(r'^[\W\d]+', '', title)
663 return title
666 def get_episode_number(self, common_title):
667 """ Number of the episode """
668 if not self.title or not common_title:
669 return None
671 title = self.title.replace(common_title, '').strip()
672 match = re.search(r'^\W*(\d+)', title)
673 if not match:
674 return None
676 return int(match.group(1))
679 class ScopedModel(models.Model):
680 """ A model that belongs to some scope, usually for limited uniqueness
682 scope does not allow null values, because null is not equal to null in SQL.
683 It could therefore not be used in unique constraints. """
685 # A slug / URL is unique within a scope; no two podcasts can have the same
686 # URL (scope ''), and no two episdoes of the same podcast (scope =
687 # podcast-ID) can have the same URL
688 scope = models.CharField(max_length=32, null=False, blank=True,
689 db_index=True)
691 class Meta:
692 abstract = True
695 class URL(OrderedModel, ScopedModel):
696 """ Podcasts and Episodes can have multiple URLs
698 URLs are ordered, and the first slug is considered the canonical one """
700 url = models.URLField(max_length=2048)
702 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
703 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
704 object_id = UUIDField()
705 content_object = generic.GenericForeignKey('content_type', 'object_id')
707 class Meta(OrderedModel.Meta):
708 unique_together = (
709 # a URL is unique per scope
710 ('url', 'scope'),
712 # URLs of an object must be ordered, so that no two slugs of one
713 # object have the same order key
714 ('content_type', 'object_id', 'order'),
717 verbose_name = 'URL'
718 verbose_name_plural = 'URLs'
721 class Tag(models.Model):
722 """ Tags any kind of Model
724 See also :class:`TagsMixin`
727 FEED = 1
728 DELICIOUS = 2
729 USER = 4
731 SOURCE_CHOICES = (
732 (FEED, 'Feed'),
733 (DELICIOUS, 'delicious'),
734 (USER, 'User'),
737 tag = models.SlugField()
738 source = models.PositiveSmallIntegerField(choices=SOURCE_CHOICES)
739 #user = models.ForeignKey(null=True)
741 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
742 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
743 object_id = UUIDField()
744 content_object = generic.GenericForeignKey('content_type', 'object_id')
746 class Meta:
747 unique_together = (
748 # a tag can only be assigned once from one source to one item
749 # TODO: add user to tuple
750 ('tag', 'source', 'content_type', 'object_id'),
754 class Slug(OrderedModel, ScopedModel):
755 """ Slug for any kind of Model
757 Slugs are ordered, and the first slug is considered the canonical one.
758 See also :class:`SlugsMixin`
761 slug = models.SlugField(max_length=150, db_index=True)
763 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
764 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
765 object_id = UUIDField()
766 content_object = generic.GenericForeignKey('content_type', 'object_id')
768 class Meta(OrderedModel.Meta):
769 unique_together = (
770 # a slug is unique per type; eg a podcast can have the same slug
771 # as an episode, but no two podcasts can have the same slug
772 ('slug', 'scope'),
774 # slugs of an object must be ordered, so that no two slugs of one
775 # object have the same order key
776 ('content_type', 'object_id', 'order'),
779 def __repr__(self):
780 return '{cls}(slug={slug}, order={order}, content_object={obj}'.format(
781 cls=self.__class__.__name__,
782 slug=self.slug,
783 order=self.order,
784 obj=self.content_object
788 class MergedUUID(models.Model):
789 """ If objects are merged their UUIDs are stored for later reference
791 see also :class:`MergedUUIDsMixin`
794 uuid = UUIDField(unique=True)
796 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
797 content_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
798 object_id = UUIDField()
799 content_object = generic.GenericForeignKey('content_type', 'object_id')
801 class Meta:
802 verbose_name = 'Merged UUID'
803 verbose_name_plural = 'Merged UUIDs'