1 from __future__
import unicode_literals
4 from datetime
import datetime
6 from django
.conf
import settings
7 from django
.db
import models
, transaction
, IntegrityError
8 from django
.contrib
.contenttypes
.models
import ContentType
9 from django
.contrib
.contenttypes
.fields
import GenericRelation
10 from django
.contrib
.contenttypes
import generic
12 from uuidfield
import UUIDField
14 from mygpo
import utils
15 from mygpo
.core
.models
import (TwitterModel
, UUIDModel
, GenericManager
,
19 logger
= logging
.getLogger(__name__
)
22 # default podcast update interval in hours
23 DEFAULT_UPDATE_INTERVAL
= 7 * 24
25 # minium podcast update interval in hours
26 MIN_UPDATE_INTERVAL
= 5
28 # every podcast should be updated at least once a month
29 MAX_UPDATE_INTERVAL
= 24 * 30
32 class TitleModel(models
.Model
):
33 """ Model that has a title """
35 title
= models
.CharField(max_length
=1000, null
=False, blank
=True,
37 subtitle
= models
.TextField(null
=False, blank
=True)
40 return self
.title
.encode('ascii', errors
='replace')
49 class DescriptionModel(models
.Model
):
50 """ Model that has a description """
52 description
= models
.TextField(null
=False, blank
=True)
58 class LinkModel(models
.Model
):
59 """ Model that has a link """
61 link
= models
.URLField(null
=True, max_length
=1000)
67 class LanguageModel(models
.Model
):
68 """ Model that has a language """
70 language
= models
.CharField(max_length
=10, null
=True, blank
=False,
77 class LastUpdateModel(models
.Model
):
78 """ Model with timestamp of last update from its source """
80 # date and time at which the model has last been updated from its source
81 # (eg a podcast feed). None means that the object has been created as a
82 # stub, without information from the source.
83 last_update
= models
.DateTimeField(null
=True)
89 class LicenseModel(models
.Model
):
90 # URL to a license (usually Creative Commons)
91 license
= models
.CharField(max_length
=100, null
=True, blank
=False,
98 class FlattrModel(models
.Model
):
99 # A Flattr payment URL
100 flattr_url
= models
.URLField(null
=True, blank
=False, max_length
=1000,
107 class ContentTypesModel(models
.Model
):
108 # contains a comma-separated values of content types, eg 'audio,video'
109 content_types
= models
.CharField(max_length
=20, null
=False, blank
=True)
115 class MergedIdsModel(models
.Model
):
121 class OutdatedModel(models
.Model
):
122 outdated
= models
.BooleanField(default
=False, db_index
=True)
128 class AuthorModel(models
.Model
):
129 author
= models
.CharField(max_length
=350, null
=True, blank
=True)
135 class UrlsMixin(models
.Model
):
136 """ Methods for working with URL objects """
138 urls
= GenericRelation('URL', related_query_name
='urls')
145 """ The main URL of the model """
146 # We could also use self.urls.first() here, but this would result in a
147 # different query and would render a .prefetch_related('urls') useless
148 # The assumption is that we will never have loads of URLS, so
149 # fetching all won't hurt
150 urls
= list(self
.urls
.all())
151 return urls
[0].url
if urls
else None
153 def add_missing_urls(self
, new_urls
):
154 """ Adds missing URLS from new_urls
156 The order of existing URLs is not changed """
157 existing_urls
= self
.urls
.all()
158 next_order
= max([-1] + [u
.order
for u
in existing_urls
]) + 1
159 existing_urls
= [u
.url
for u
in existing_urls
]
162 if url
in existing_urls
:
165 URL
.objects
.create(url
=url
,
174 class SlugsMixin(models
.Model
):
175 """ Methods for working with Slug objects """
177 slugs
= GenericRelation('Slug', related_query_name
='slugs')
184 """ The main slug of the podcast
186 TODO: should be retrieved from a (materialized) view """
188 # We could also use self.slugs.first() here, but this would result in a
189 # different query and would render a .prefetch_related('slugs') useless
190 # The assumption is that we will never have loads of slugs, so
191 # fetching all won't hurt
192 slugs
= list(self
.slugs
.all())
193 slug
= slugs
[0].slug
if slugs
else None
194 logger
.debug('Found slugs %r, picking %r', slugs
, slug
)
198 def add_slug(self
, slug
):
199 """ Adds a (non-cannonical) slug """
202 raise ValueError("'%s' is not a valid slug" % slug
)
204 existing_slugs
= self
.slugs
.all()
206 # check if slug already exists
207 if slug
in [s
.slug
for s
in existing_slugs
]:
210 max_order
= max([-1] + [s
.order
for s
in existing_slugs
])
211 next_order
= max_order
+ 1
212 Slug
.objects
.create(scope
=self
.scope
,
218 def set_slug(self
, slug
):
219 """ Sets the canonical slug """
221 slugs
= [s
.slug
for s
in self
.slugs
.all()]
225 slugs
.insert(0, slug
)
226 self
.set_slugs(slugs
)
229 def remove_slug(self
, slug
):
230 """ Removes a slug """
233 content_type
=ContentType
.objects
.get_for_model(self
),
238 def set_slugs(self
, slugs
):
239 """ Update the object's slugs to the given list
241 'slugs' should be a list of strings. Slugs that do not exist are
242 created. Existing slugs that are not in the 'slugs' list are
244 existing
= {s
.slug
: s
for s
in self
.slugs
.all()}
245 logger
.info('%d existing slugs', len(existing
))
247 logger
.info('%d new slugs', len(slugs
))
249 with transaction
.atomic():
250 max_order
= max([s
.order
for s
in existing
.values()] + [len(slugs
)])
251 logger
.info('Renumbering slugs starting from %d', max_order
+1)
252 for n
, slug
in enumerate(existing
.values(), max_order
+1):
256 logger
.info('%d existing slugs', len(existing
))
258 for n
, slug
in enumerate(slugs
):
260 s
= existing
.pop(slug
)
261 logger
.info('Updating new slug %d: %s', n
, slug
)
265 logger
.info('Creating new slug %d: %s', n
, slug
)
267 Slug
.objects
.create(slug
=slug
,
272 except IntegrityError
as ie
:
273 logger
.warn('Could not create Slug for %s: %s', self
, ie
)
275 with transaction
.atomic():
276 delete
= [s
.pk
for s
in existing
.values()]
277 logger
.info('Deleting %d slugs', len(delete
))
278 Slug
.objects
.filter(id__in
=delete
).delete()
282 class MergedUUIDsMixin(models
.Model
):
283 """ Methods for working with MergedUUID objects """
285 merged_uuids
= GenericRelation('MergedUUID',
286 related_query_name
='merged_uuids')
292 class MergedUUIDQuerySet(models
.QuerySet
):
293 """ QuerySet for Models inheriting from MergedUUID """
295 def get_by_any_id(self
, id):
296 """ Find am Episode by its own ID or by a merged ID """
297 # TODO: should this be done in the model?
299 return self
.get(id=id)
300 except self
.model
.DoesNotExist
:
301 return self
.get(merged_uuids__uuid
=id)
304 class TagsMixin(models
.Model
):
305 """ Methods for working with Tag objects """
307 tags
= GenericRelation('Tag', related_query_name
='tags')
313 class OrderedModel(models
.Model
):
314 """ A model that can be ordered
316 The implementing Model must make sure that 'order' is sufficiently unique
319 order
= models
.PositiveSmallIntegerField()
326 class PodcastGroup(UUIDModel
, TitleModel
, SlugsMixin
):
327 """ Groups multiple podcasts together """
331 """ A podcast group is always in the global scope """
334 def subscriber_count(self
):
335 # this could be done directly in the DB
336 return sum([p
.subscriber_count() for p
in self
.podcast_set
.all()] + [0])
338 class PodcastQuerySet(MergedUUIDQuerySet
):
339 """ Custom queries for Podcasts """
344 Excludes podcasts with missing title to guarantee some
345 minimum quality of the results """
347 # Using PostgreSQL's RANDOM() is very expensive, so we're generating a
348 # random uuid and query podcasts with a higher ID
349 # This returns podcasts in order of their ID, but the assumption is
350 # that usually only one podcast will be required anyway
353 return self
.exclude(title
='').filter(id__gt
=ruuid
)
356 """ Podcasts providing Flattr information """
357 return self
.exclude(flattr_url__isnull
=True)
359 def license(self
, license_url
=None):
360 """ Podcasts with any / the given license """
362 return self
.filter(license
=license_url
)
364 return self
.exclude(license__isnull
=True)
366 def order_by_next_update(self
):
367 """ Sort podcasts by next scheduled update """
368 NEXTUPDATE
= "last_update + (update_interval || ' hours')::INTERVAL"
369 q
= self
.extra(select
={'next_update': NEXTUPDATE
})
370 return q
.order_by('next_update')
372 def toplist(self
, language
=None):
375 toplist
= toplist
.filter(language
=language
)
377 return toplist
.order_by('-subscribers')
380 class PodcastManager(GenericManager
):
381 """ Manager for the Podcast model """
383 def get_queryset(self
):
384 return PodcastQuerySet(self
.model
, using
=self
._db
)
387 def get_or_create_for_url(self
, url
, defaults
={}):
388 # TODO: where to specify how uuid is created?
391 'id': uuid
.uuid1().hex,
394 url
= utils
.to_maxlength(URL
, 'url', url
)
395 podcast
, created
= self
.get_or_create(urls__url
=url
, defaults
=defaults
)
398 url
= URL
.objects
.create(url
=url
,
401 content_object
=podcast
,
406 class Podcast(UUIDModel
, TitleModel
, DescriptionModel
, LinkModel
,
407 LanguageModel
, LastUpdateModel
, UpdateInfoModel
, LicenseModel
,
408 FlattrModel
, ContentTypesModel
, MergedIdsModel
, OutdatedModel
,
409 AuthorModel
, UrlsMixin
, SlugsMixin
, TagsMixin
, MergedUUIDsMixin
,
413 logo_url
= models
.URLField(null
=True, max_length
=1000)
414 group
= models
.ForeignKey(PodcastGroup
, null
=True,
415 on_delete
=models
.PROTECT
)
416 group_member_name
= models
.CharField(max_length
=30, null
=True, blank
=False)
418 # if p1 is related to p2, p2 is also related to p1
419 related_podcasts
= models
.ManyToManyField('self', symmetrical
=True)
421 subscribers
= models
.PositiveIntegerField(default
=0)
422 restrictions
= models
.CharField(max_length
=20, null
=False, blank
=True,
424 common_episode_title
= models
.CharField(max_length
=100, null
=False, blank
=True)
425 new_location
= models
.URLField(max_length
=1000, null
=True, blank
=False)
426 latest_episode_timestamp
= models
.DateTimeField(null
=True)
427 episode_count
= models
.PositiveIntegerField(default
=0)
428 hub
= models
.URLField(null
=True)
429 update_interval
= models
.PositiveSmallIntegerField(null
=False,
430 default
=DEFAULT_UPDATE_INTERVAL
)
432 objects
= PodcastManager()
434 def subscriber_count(self
):
436 return self
.subscribers
438 def group_with(self
, other
, grouptitle
, myname
, othername
):
439 """ Group the podcast with another one """
440 # TODO: move to PodcastGroup?
442 if bool(self
.group
) and (self
.group
== other
.group
):
443 # they are already grouped
449 if group1
and group2
:
450 raise ValueError('both podcasts already are in different groups')
452 elif not (group1
or group2
):
455 group
= PodcastGroup
.objects
.create(id=uuid
.uuid1(), title
=grouptitle
)
456 self
.group_member_name
= myname
460 other
.group_member_name
= othername
467 # add other to self's group
468 other
.group_member_name
= othername
474 # add self to other's group
475 self
.group_member_name
= myname
480 def get_common_episode_title(self
, num_episodes
=100):
482 if self
.common_episode_title
:
483 return self
.common_episode_title
485 episodes
= self
.episode_set
.all()[:num_episodes
]
487 # We take all non-empty titles
488 titles
= filter(None, (e
.title
for e
in episodes
))
490 # there can not be a "common" title of a single title
494 # get the longest common substring
495 common_title
= utils
.longest_substr(titles
)
497 # but consider only the part up to the first number. Otherwise we risk
498 # removing part of the number (eg if a feed contains episodes 100-199)
499 common_title
= re
.search(r
'^\D*', common_title
).group(0)
501 if len(common_title
.strip()) < 2:
507 def get_episode_before(self
, episode
):
508 if not episode
.released
:
510 return self
.episode_set
.filter(released__lt
=episode
.released
).latest()
512 def get_episode_after(self
, episode
):
513 if not episode
.released
:
515 return self
.episode_set
.filter(released__gt
=episode
.released
).first()
519 """ A podcast is always in the global scope """
523 def display_title(self
):
528 class EpisodeQuerySet(MergedUUIDQuerySet
):
529 """ QuerySet for Episodes """
531 def toplist(self
, language
=None):
534 toplist
= toplist
.filter(language
=language
)
536 return toplist
.order_by('-listeners')
538 def by_released(self
):
539 """ Sorts by release date, sorting missing release date last
541 When sorting by release date, we want to list those with the most
542 revent release date first. At the end the episodes without release date
543 should be sorted. """
544 return self
.extra(select
={
545 'has_released': 'released IS NOT NULL',
547 order_by('-has_released', '-released')
550 class EpisodeManager(GenericManager
):
551 """ Custom queries for Episodes """
553 def get_queryset(self
):
554 return EpisodeQuerySet(self
.model
, using
=self
._db
)
557 def get_or_create_for_url(self
, podcast
, url
, defaults
={}):
558 # TODO: where to specify how uuid is created?
561 'id': uuid
.uuid1().hex,
563 episode
, created
= self
.get_or_create(podcast
=podcast
,
569 url
= URL
.objects
.create(url
=url
,
572 content_object
=episode
,
576 class Episode(UUIDModel
, TitleModel
, DescriptionModel
, LinkModel
,
577 LanguageModel
, LastUpdateModel
, UpdateInfoModel
, LicenseModel
,
578 FlattrModel
, ContentTypesModel
, MergedIdsModel
, OutdatedModel
,
579 AuthorModel
, UrlsMixin
, SlugsMixin
, MergedUUIDsMixin
):
582 guid
= models
.CharField(max_length
=200, null
=True)
583 content
= models
.TextField()
584 released
= models
.DateTimeField(null
=True, db_index
=True)
585 duration
= models
.PositiveIntegerField(null
=True)
586 filesize
= models
.BigIntegerField(null
=True)
587 mimetypes
= models
.CharField(max_length
=200)
588 podcast
= models
.ForeignKey(Podcast
, on_delete
=models
.PROTECT
)
589 listeners
= models
.PositiveIntegerField(null
=True, db_index
=True)
591 objects
= EpisodeManager()
594 ordering
= ['-released']
598 """ An episode's scope is its podcast """
599 return self
.podcast_id
.hex
601 def get_short_title(self
, common_title
):
602 """ Title when used within the podcast's context """
603 if not self
.title
or not common_title
:
606 title
= self
.title
.replace(common_title
, '').strip()
607 title
= re
.sub(r
'^[\W\d]+', '', title
)
611 def get_episode_number(self
, common_title
):
612 """ Number of the episode """
613 if not self
.title
or not common_title
:
616 title
= self
.title
.replace(common_title
, '').strip()
617 match
= re
.search(r
'^\W*(\d+)', title
)
621 return int(match
.group(1))
624 class ScopedModel(models
.Model
):
625 """ A model that belongs to some scope, usually for limited uniqueness
627 scope does not allow null values, because null is not equal to null in SQL.
628 It could therefore not be used in unique constraints. """
630 # A slug / URL is unique within a scope; no two podcasts can have the same
631 # URL (scope ''), and no two episdoes of the same podcast (scope =
632 # podcast-ID) can have the same URL
633 scope
= models
.CharField(max_length
=32, null
=False, blank
=True,
639 def get_default_scope(self
):
640 """ Returns the default scope of the object """
641 raise NotImplementedError('{cls} should implement get_default_scope'
642 .format(cls
=self
.__class
__.__name
__))
645 class URL(OrderedModel
, ScopedModel
):
646 """ Podcasts and Episodes can have multiple URLs
648 URLs are ordered, and the first slug is considered the canonical one """
650 url
= models
.URLField(max_length
=2048)
652 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
653 content_type
= models
.ForeignKey(ContentType
, on_delete
=models
.PROTECT
)
654 object_id
= UUIDField()
655 content_object
= generic
.GenericForeignKey('content_type', 'object_id')
657 class Meta(OrderedModel
.Meta
):
659 # a URL is unique per scope
662 # URLs of an object must be ordered, so that no two slugs of one
663 # object have the same order key
664 ('content_type', 'object_id', 'order'),
668 verbose_name_plural
= 'URLs'
670 def get_default_scope(self
):
671 return self
.content_object
.scope
674 class Tag(models
.Model
):
675 """ Tags any kind of Model
677 See also :class:`TagsMixin`
686 (DELICIOUS
, 'delicious'),
690 tag
= models
.SlugField()
692 # indicates where the tag came from
693 source
= models
.PositiveSmallIntegerField(choices
=SOURCE_CHOICES
)
695 # the user that created the tag (if it was created by a user,
697 user
= models
.ForeignKey(settings
.AUTH_USER_MODEL
, null
=True,
698 on_delete
=models
.CASCADE
)
700 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
701 content_type
= models
.ForeignKey(ContentType
, on_delete
=models
.PROTECT
)
702 object_id
= UUIDField()
703 content_object
= generic
.GenericForeignKey('content_type', 'object_id')
707 # a tag can only be assigned once from one source to one item
708 ('tag', 'source', 'user', 'content_type', 'object_id'),
712 class Slug(OrderedModel
, ScopedModel
):
713 """ Slug for any kind of Model
715 Slugs are ordered, and the first slug is considered the canonical one.
716 See also :class:`SlugsMixin`
719 slug
= models
.SlugField(max_length
=150, db_index
=True)
721 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
722 content_type
= models
.ForeignKey(ContentType
, on_delete
=models
.PROTECT
)
723 object_id
= UUIDField()
724 content_object
= generic
.GenericForeignKey('content_type', 'object_id')
726 class Meta(OrderedModel
.Meta
):
728 # a slug is unique per type; eg a podcast can have the same slug
729 # as an episode, but no two podcasts can have the same slug
732 # slugs of an object must be ordered, so that no two slugs of one
733 # object have the same order key
734 ('content_type', 'object_id', 'order'),
738 return '{cls}(slug={slug}, order={order}, content_object={obj}'.format(
739 cls
=self
.__class
__.__name
__,
742 obj
=self
.content_object
746 class MergedUUID(models
.Model
):
747 """ If objects are merged their UUIDs are stored for later reference
749 see also :class:`MergedUUIDsMixin`
752 uuid
= UUIDField(unique
=True)
754 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
755 content_type
= models
.ForeignKey(ContentType
, on_delete
=models
.PROTECT
)
756 object_id
= UUIDField()
757 content_object
= generic
.GenericForeignKey('content_type', 'object_id')
760 verbose_name
= 'Merged UUID'
761 verbose_name_plural
= 'Merged UUIDs'