1 from __future__
import unicode_literals
4 from datetime
import datetime
6 from django
.conf
import settings
7 from django
.db
import models
, transaction
, IntegrityError
8 from django
.db
.models
import F
9 from django
.utils
.translation
import ugettext
as _
10 from django
.contrib
.contenttypes
.models
import ContentType
11 from django
.contrib
.contenttypes
.fields
import GenericRelation
12 from django
.contrib
.contenttypes
import generic
14 from uuidfield
import UUIDField
16 from mygpo
import utils
17 from mygpo
.core
.models
import (TwitterModel
, UUIDModel
, GenericManager
,
18 UpdateInfoModel
, OrderedModel
, OptionallyOrderedModel
)
21 logger
= logging
.getLogger(__name__
)
24 # default podcast update interval in hours
25 DEFAULT_UPDATE_INTERVAL
= 7 * 24
27 # minium podcast update interval in hours
28 MIN_UPDATE_INTERVAL
= 5
30 # every podcast should be updated at least once a month
31 MAX_UPDATE_INTERVAL
= 24 * 30
34 class TitleModel(models
.Model
):
35 """ Model that has a title """
37 title
= models
.CharField(max_length
=1000, null
=False, blank
=True,
39 subtitle
= models
.TextField(null
=False, blank
=True)
42 return self
.title
.encode('ascii', errors
='replace')
51 class DescriptionModel(models
.Model
):
52 """ Model that has a description """
54 description
= models
.TextField(null
=False, blank
=True)
60 class LinkModel(models
.Model
):
61 """ Model that has a link """
63 link
= models
.URLField(null
=True, max_length
=1000)
69 class LanguageModel(models
.Model
):
70 """ Model that has a language """
72 language
= models
.CharField(max_length
=10, null
=True, blank
=False,
79 class LastUpdateModel(models
.Model
):
80 """ Model with timestamp of last update from its source """
82 # date and time at which the model has last been updated from its source
83 # (eg a podcast feed). None means that the object has been created as a
84 # stub, without information from the source.
85 last_update
= models
.DateTimeField(null
=True)
91 class LicenseModel(models
.Model
):
92 # URL to a license (usually Creative Commons)
93 license
= models
.CharField(max_length
=100, null
=True, blank
=False,
100 class FlattrModel(models
.Model
):
101 # A Flattr payment URL
102 flattr_url
= models
.URLField(null
=True, blank
=False, max_length
=1000,
109 class ContentTypesModel(models
.Model
):
110 # contains a comma-separated values of content types, eg 'audio,video'
111 content_types
= models
.CharField(max_length
=20, null
=False, blank
=True)
117 class MergedIdsModel(models
.Model
):
123 class OutdatedModel(models
.Model
):
124 outdated
= models
.BooleanField(default
=False, db_index
=True)
130 class AuthorModel(models
.Model
):
131 author
= models
.CharField(max_length
=350, null
=True, blank
=True)
137 class UrlsMixin(models
.Model
):
138 """ Methods for working with URL objects """
140 urls
= GenericRelation('URL', related_query_name
='urls')
147 """ The main URL of the model """
148 # We could also use self.urls.first() here, but this would result in a
149 # different query and would render a .prefetch_related('urls') useless
150 # The assumption is that we will never have loads of URLS, so
151 # fetching all won't hurt
152 urls
= list(self
.urls
.all())
153 return urls
[0].url
if urls
else None
155 def add_missing_urls(self
, new_urls
):
156 """ Adds missing URLS from new_urls
158 The order of existing URLs is not changed """
159 existing_urls
= self
.urls
.all()
160 next_order
= max([-1] + [u
.order
for u
in existing_urls
]) + 1
161 existing_urls
= [u
.url
for u
in existing_urls
]
164 if url
in existing_urls
:
168 URL
.objects
.create(url
=url
,
174 except IntegrityError
as ie
:
175 logger
.warn('Could not add URL: {err}'.format(err
=ie
))
178 def set_url(self
, url
):
179 """ Sets the canonical URL """
181 urls
= [u
.url
for u
in self
.urls
.all()]
188 def set_urls(self
, urls
):
189 """ Update the object's URLS to the given list
191 'urls' should be a list of strings. Slugs that do not exist are
192 created. Existing urls that are not in the 'urls' list are
194 urls
= [utils
.to_maxlength(URL
, 'url', url
) for url
in urls
]
195 existing
= {u
.url
: u
for u
in self
.urls
.all()}
196 utils
.set_ordered_entries(self
, urls
, existing
, URL
, 'url',
200 class SlugsMixin(models
.Model
):
201 """ Methods for working with Slug objects """
203 slugs
= GenericRelation('Slug', related_query_name
='slugs')
210 """ The main slug of the podcast
212 TODO: should be retrieved from a (materialized) view """
214 # We could also use self.slugs.first() here, but this would result in a
215 # different query and would render a .prefetch_related('slugs') useless
216 # The assumption is that we will never have loads of slugs, so
217 # fetching all won't hurt
218 slugs
= list(self
.slugs
.all())
219 slug
= slugs
[0].slug
if slugs
else None
220 logger
.debug('Found slugs %r, picking %r', slugs
, slug
)
224 def add_slug(self
, slug
):
225 """ Adds a (non-cannonical) slug """
228 raise ValueError("'%s' is not a valid slug" % slug
)
230 existing_slugs
= self
.slugs
.all()
232 # cut slug to the maximum allowed length
233 slug
= utils
.to_maxlength(Slug
, 'slug', slug
)
235 # check if slug already exists
236 if slug
in [s
.slug
for s
in existing_slugs
]:
239 max_order
= max([-1] + [s
.order
for s
in existing_slugs
])
240 next_order
= max_order
+ 1
241 Slug
.objects
.create(scope
=self
.scope
,
247 def set_slug(self
, slug
):
248 """ Sets the canonical slug """
250 slugs
= [s
.slug
for s
in self
.slugs
.all()]
254 slugs
.insert(0, slug
)
255 self
.set_slugs(slugs
)
258 def remove_slug(self
, slug
):
259 """ Removes a slug """
262 content_type
=ContentType
.objects
.get_for_model(self
),
267 def set_slugs(self
, slugs
):
268 """ Update the object's slugs to the given list
270 'slugs' should be a list of strings. Slugs that do not exist are
271 created. Existing slugs that are not in the 'slugs' list are
273 slugs
= [utils
.to_maxlength(Slug
, 'slug', slug
) for slug
in slugs
]
274 existing
= {s
.slug
: s
for s
in self
.slugs
.all()}
275 utils
.set_ordered_entries(self
, slugs
, existing
, Slug
, 'slug',
279 class MergedUUIDsMixin(models
.Model
):
280 """ Methods for working with MergedUUID objects """
282 merged_uuids
= GenericRelation('MergedUUID',
283 related_query_name
='merged_uuids')
289 class MergedUUIDQuerySet(models
.QuerySet
):
290 """ QuerySet for Models inheriting from MergedUUID """
292 def get_by_any_id(self
, id):
293 """ Find am Episode by its own ID or by a merged ID """
294 # TODO: should this be done in the model?
296 return self
.get(id=id)
297 except self
.model
.DoesNotExist
:
298 return self
.get(merged_uuids__uuid
=id)
301 class TagsMixin(models
.Model
):
302 """ Methods for working with Tag objects """
304 tags
= GenericRelation('Tag', related_query_name
='tags')
310 class PodcastGroup(UUIDModel
, TitleModel
, SlugsMixin
):
311 """ Groups multiple podcasts together """
315 """ A podcast group is always in the global scope """
318 def subscriber_count(self
):
319 # this could be done directly in the DB
320 return sum([p
.subscriber_count() for p
in self
.podcast_set
.all()] + [0])
324 podcast
= self
.podcast_set
.first()
328 class PodcastQuerySet(MergedUUIDQuerySet
):
329 """ Custom queries for Podcasts """
334 Excludes podcasts with missing title to guarantee some
335 minimum quality of the results """
337 # Using PostgreSQL's RANDOM() is very expensive, so we're generating a
338 # random uuid and query podcasts with a higher ID
339 # This returns podcasts in order of their ID, but the assumption is
340 # that usually only one podcast will be required anyway
343 return self
.exclude(title
='').filter(id__gt
=ruuid
)
346 """ Podcasts providing Flattr information """
347 return self
.exclude(flattr_url__isnull
=True)
349 def license(self
, license_url
=None):
350 """ Podcasts with any / the given license """
352 return self
.filter(license
=license_url
)
354 return self
.exclude(license__isnull
=True)
356 def order_by_next_update(self
):
357 """ Sort podcasts by next scheduled update """
358 NEXTUPDATE
= "last_update + (update_interval || ' hours')::INTERVAL"
359 q
= self
.extra(select
={'next_update': NEXTUPDATE
})
360 return q
.order_by('next_update')
362 def next_update_between(self
, start
, end
):
363 NEXTUPDATE_BETWEEN
= ("(last_update + (update_interval || "
364 "' hours')::INTERVAL) BETWEEN %s AND %s")
366 where
=[NEXTUPDATE_BETWEEN
], params
=[start
, end
]
369 def toplist(self
, language
=None):
372 toplist
= toplist
.filter(language
=language
)
374 return toplist
.order_by('-subscribers')
377 class PodcastManager(GenericManager
):
378 """ Manager for the Podcast model """
380 def get_queryset(self
):
381 return PodcastQuerySet(self
.model
, using
=self
._db
)
384 def get_or_create_for_url(self
, url
, defaults
={}):
385 # TODO: where to specify how uuid is created?
388 'id': uuid
.uuid1().hex,
391 url
= utils
.to_maxlength(URL
, 'url', url
)
393 # try to fetch the podcast
394 return Podcast
.objects
.get(urls__url
=url
,
397 except Podcast
.DoesNotExist
:
398 # episode did not exist, try to create it
400 with transaction
.atomic():
401 podcast
= Podcast
.objects
.create(**defaults
)
402 url
= URL
.objects
.create(url
=url
,
405 content_object
=podcast
,
409 # URL could not be created, so it was created since the first get
410 except IntegrityError
:
411 return Podcast
.objects
.get(urls__url
=url
,
416 class Podcast(UUIDModel
, TitleModel
, DescriptionModel
, LinkModel
,
417 LanguageModel
, LastUpdateModel
, UpdateInfoModel
, LicenseModel
,
418 FlattrModel
, ContentTypesModel
, MergedIdsModel
, OutdatedModel
,
419 AuthorModel
, UrlsMixin
, SlugsMixin
, TagsMixin
, MergedUUIDsMixin
,
423 logo_url
= models
.URLField(null
=True, max_length
=1000)
424 group
= models
.ForeignKey(PodcastGroup
, null
=True,
425 on_delete
=models
.PROTECT
)
426 group_member_name
= models
.CharField(max_length
=30, null
=True, blank
=False)
428 # if p1 is related to p2, p2 is also related to p1
429 related_podcasts
= models
.ManyToManyField('self', symmetrical
=True)
431 subscribers
= models
.PositiveIntegerField(default
=0)
432 restrictions
= models
.CharField(max_length
=20, null
=False, blank
=True,
434 common_episode_title
= models
.CharField(max_length
=100, null
=False, blank
=True)
435 new_location
= models
.URLField(max_length
=1000, null
=True, blank
=False)
436 latest_episode_timestamp
= models
.DateTimeField(null
=True)
437 episode_count
= models
.PositiveIntegerField(default
=0)
438 hub
= models
.URLField(null
=True)
439 update_interval
= models
.PositiveSmallIntegerField(null
=False,
440 default
=DEFAULT_UPDATE_INTERVAL
)
442 # "order" value of the most recent episode (will be the highest of all)
443 max_episode_order
= models
.PositiveIntegerField(null
=True, default
=None)
445 objects
= PodcastManager()
447 def subscriber_count(self
):
449 return self
.subscribers
451 def group_with(self
, other
, grouptitle
, myname
, othername
):
452 """ Group the podcast with another one """
453 # TODO: move to PodcastGroup?
455 if bool(self
.group
) and (self
.group
== other
.group
):
456 # they are already grouped
462 if group1
and group2
:
463 raise ValueError('both podcasts already are in different groups')
465 elif not (group1
or group2
):
468 group
= PodcastGroup
.objects
.create(id=uuid
.uuid1(), title
=grouptitle
)
469 self
.group_member_name
= myname
473 other
.group_member_name
= othername
480 # add other to self's group
481 other
.group_member_name
= othername
487 # add self to other's group
488 self
.group_member_name
= myname
493 def get_common_episode_title(self
, num_episodes
=100):
495 if self
.common_episode_title
:
496 return self
.common_episode_title
498 episodes
= self
.episode_set
.all()[:num_episodes
]
500 # We take all non-empty titles
501 titles
= filter(None, (e
.title
for e
in episodes
))
503 # there can not be a "common" title of a single title
507 # get the longest common substring
508 common_title
= utils
.longest_substr(titles
)
510 # but consider only the part up to the first number. Otherwise we risk
511 # removing part of the number (eg if a feed contains episodes 100-199)
512 common_title
= re
.search(r
'^\D*', common_title
).group(0)
514 if len(common_title
.strip()) < 2:
520 def get_episode_before(self
, episode
):
521 if not episode
.released
:
523 return self
.episode_set
.filter(released__lt
=episode
.released
).latest()
525 def get_episode_after(self
, episode
):
526 if not episode
.released
:
528 return self
.episode_set
.filter(released__gt
=episode
.released
).first()
532 """ A podcast is always in the global scope """
537 """ If models use this object as scope, they'll use this value """
541 def display_title(self
):
542 """ a title for display purposes """
547 logger
.warn('Podcast with ID {podcast_id} does not have a URL'
548 .format(podcast_id
=self
.id.hex))
549 return _('Unknown Podcast')
551 return _('Unknown Podcast from {domain}'.format(
552 domain
=utils
.get_domain(self
.url
)))
555 class EpisodeQuerySet(MergedUUIDQuerySet
):
556 """ QuerySet for Episodes """
558 def toplist(self
, language
=None):
561 toplist
= toplist
.filter(language
=language
)
563 return toplist
.order_by('-listeners')
566 class EpisodeManager(GenericManager
):
567 """ Custom queries for Episodes """
569 def get_queryset(self
):
570 return EpisodeQuerySet(self
.model
, using
=self
._db
)
572 def get_or_create_for_url(self
, podcast
, url
, defaults
={}):
573 """ Create an Episode for a given URL
575 This is the only place where new episodes are created """
577 # TODO: where to specify how uuid is created?
581 # try to fetch the episode
582 return Episode
.objects
.get(urls__url
=url
,
583 urls__scope
=podcast
.as_scope
,
585 except Episode
.DoesNotExist
:
586 # episode did not exist, try to create it
588 with transaction
.atomic():
589 episode
= Episode
.objects
.create(podcast
=podcast
,
593 url
= URL
.objects
.create(url
=url
,
596 content_object
=episode
,
599 # Keep episode_count up to date here; it is not
600 # recalculated when updating the podcast because counting
601 # episodes can be very slow for podcasts with many episodes
602 Podcast
.objects
.filter(pk
=podcast
.pk
)\
603 .update(episode_count
=F('episode_count')+1)
607 # URL could not be created, so it was created since the first get
608 except IntegrityError
:
609 return Episode
.objects
.get(urls__url
=url
,
610 urls__scope
=podcast
.as_scope
,
614 class Episode(UUIDModel
, TitleModel
, DescriptionModel
, LinkModel
,
615 LanguageModel
, LastUpdateModel
, UpdateInfoModel
, LicenseModel
,
616 FlattrModel
, ContentTypesModel
, MergedIdsModel
, OutdatedModel
,
617 AuthorModel
, UrlsMixin
, SlugsMixin
, MergedUUIDsMixin
,
618 OptionallyOrderedModel
):
621 guid
= models
.CharField(max_length
=200, null
=True)
622 content
= models
.TextField()
623 released
= models
.DateTimeField(null
=True, db_index
=True)
624 duration
= models
.BigIntegerField(null
=True)
625 filesize
= models
.BigIntegerField(null
=True)
626 mimetypes
= models
.CharField(max_length
=200)
627 podcast
= models
.ForeignKey(Podcast
, on_delete
=models
.PROTECT
)
628 listeners
= models
.PositiveIntegerField(null
=True, db_index
=True)
630 objects
= EpisodeManager()
633 ordering
= ['-order', '-released']
636 ('podcast', 'outdated', 'released'),
637 ('podcast', 'released'),
638 ('released', 'podcast'),
640 # index for typical episode toplist queries
641 ('language', 'listeners'),
643 ('podcast', 'order', 'released'),
648 """ An episode's scope is its podcast """
649 return self
.podcast_id
.hex
652 def display_title(self
):
653 # TODO: return basename of URL (see Podcast.display_title)
656 def get_short_title(self
, common_title
):
657 """ Title when used within the podcast's context """
658 if not self
.title
or not common_title
:
661 title
= self
.title
.replace(common_title
, '').strip()
662 title
= re
.sub(r
'^[\W\d]+', '', title
)
666 def get_episode_number(self
, common_title
):
667 """ Number of the episode """
668 if not self
.title
or not common_title
:
671 title
= self
.title
.replace(common_title
, '').strip()
672 match
= re
.search(r
'^\W*(\d+)', title
)
676 return int(match
.group(1))
679 class ScopedModel(models
.Model
):
680 """ A model that belongs to some scope, usually for limited uniqueness
682 scope does not allow null values, because null is not equal to null in SQL.
683 It could therefore not be used in unique constraints. """
685 # A slug / URL is unique within a scope; no two podcasts can have the same
686 # URL (scope ''), and no two episdoes of the same podcast (scope =
687 # podcast-ID) can have the same URL
688 scope
= models
.CharField(max_length
=32, null
=False, blank
=True,
694 def get_default_scope(self
):
695 """ Returns the default scope of the object """
696 raise NotImplementedError('{cls} should implement get_default_scope'
697 .format(cls
=self
.__class
__.__name
__))
700 class URL(OrderedModel
, ScopedModel
):
701 """ Podcasts and Episodes can have multiple URLs
703 URLs are ordered, and the first slug is considered the canonical one """
705 url
= models
.URLField(max_length
=2048)
707 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
708 content_type
= models
.ForeignKey(ContentType
, on_delete
=models
.PROTECT
)
709 object_id
= UUIDField()
710 content_object
= generic
.GenericForeignKey('content_type', 'object_id')
712 class Meta(OrderedModel
.Meta
):
714 # a URL is unique per scope
717 # URLs of an object must be ordered, so that no two slugs of one
718 # object have the same order key
719 ('content_type', 'object_id', 'order'),
723 verbose_name_plural
= 'URLs'
725 def get_default_scope(self
):
726 return self
.content_object
.scope
729 class Tag(models
.Model
):
730 """ Tags any kind of Model
732 See also :class:`TagsMixin`
741 (DELICIOUS
, 'delicious'),
745 tag
= models
.SlugField()
747 # indicates where the tag came from
748 source
= models
.PositiveSmallIntegerField(choices
=SOURCE_CHOICES
)
750 # the user that created the tag (if it was created by a user,
752 user
= models
.ForeignKey(settings
.AUTH_USER_MODEL
, null
=True,
753 on_delete
=models
.CASCADE
)
755 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
756 content_type
= models
.ForeignKey(ContentType
, on_delete
=models
.PROTECT
)
757 object_id
= UUIDField()
758 content_object
= generic
.GenericForeignKey('content_type', 'object_id')
762 # a tag can only be assigned once from one source to one item
763 ('tag', 'source', 'user', 'content_type', 'object_id'),
767 class Slug(OrderedModel
, ScopedModel
):
768 """ Slug for any kind of Model
770 Slugs are ordered, and the first slug is considered the canonical one.
771 See also :class:`SlugsMixin`
774 slug
= models
.SlugField(max_length
=150, db_index
=True)
776 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
777 content_type
= models
.ForeignKey(ContentType
, on_delete
=models
.PROTECT
)
778 object_id
= UUIDField()
779 content_object
= generic
.GenericForeignKey('content_type', 'object_id')
781 class Meta(OrderedModel
.Meta
):
783 # a slug is unique per type; eg a podcast can have the same slug
784 # as an episode, but no two podcasts can have the same slug
787 # slugs of an object must be ordered, so that no two slugs of one
788 # object have the same order key
789 ('content_type', 'object_id', 'order'),
793 ('slug', 'content_type')
797 return '{cls}(slug={slug}, order={order}, content_object={obj}'.format(
798 cls
=self
.__class
__.__name
__,
801 obj
=self
.content_object
805 class MergedUUID(models
.Model
):
806 """ If objects are merged their UUIDs are stored for later reference
808 see also :class:`MergedUUIDsMixin`
811 uuid
= UUIDField(unique
=True)
813 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
814 content_type
= models
.ForeignKey(ContentType
, on_delete
=models
.PROTECT
)
815 object_id
= UUIDField()
816 content_object
= generic
.GenericForeignKey('content_type', 'object_id')
819 verbose_name
= 'Merged UUID'
820 verbose_name_plural
= 'Merged UUIDs'