1 from __future__
import unicode_literals
4 from datetime
import datetime
6 from django
.conf
import settings
7 from django
.db
import models
, transaction
, IntegrityError
8 from django
.utils
.translation
import ugettext
as _
9 from django
.contrib
.contenttypes
.models
import ContentType
10 from django
.contrib
.contenttypes
.fields
import GenericRelation
11 from django
.contrib
.contenttypes
import generic
13 from uuidfield
import UUIDField
15 from mygpo
import utils
16 from mygpo
.core
.models
import (TwitterModel
, UUIDModel
, GenericManager
,
17 UpdateInfoModel
, OrderedModel
, OptionallyOrderedModel
)
20 logger
= logging
.getLogger(__name__
)
23 # default podcast update interval in hours
24 DEFAULT_UPDATE_INTERVAL
= 7 * 24
26 # minium podcast update interval in hours
27 MIN_UPDATE_INTERVAL
= 5
29 # every podcast should be updated at least once a month
30 MAX_UPDATE_INTERVAL
= 24 * 30
33 class TitleModel(models
.Model
):
34 """ Model that has a title """
36 title
= models
.CharField(max_length
=1000, null
=False, blank
=True,
38 subtitle
= models
.TextField(null
=False, blank
=True)
41 return self
.title
.encode('ascii', errors
='replace')
50 class DescriptionModel(models
.Model
):
51 """ Model that has a description """
53 description
= models
.TextField(null
=False, blank
=True)
59 class LinkModel(models
.Model
):
60 """ Model that has a link """
62 link
= models
.URLField(null
=True, max_length
=1000)
68 class LanguageModel(models
.Model
):
69 """ Model that has a language """
71 language
= models
.CharField(max_length
=10, null
=True, blank
=False,
78 class LastUpdateModel(models
.Model
):
79 """ Model with timestamp of last update from its source """
81 # date and time at which the model has last been updated from its source
82 # (eg a podcast feed). None means that the object has been created as a
83 # stub, without information from the source.
84 last_update
= models
.DateTimeField(null
=True)
90 class LicenseModel(models
.Model
):
91 # URL to a license (usually Creative Commons)
92 license
= models
.CharField(max_length
=100, null
=True, blank
=False,
99 class FlattrModel(models
.Model
):
100 # A Flattr payment URL
101 flattr_url
= models
.URLField(null
=True, blank
=False, max_length
=1000,
108 class ContentTypesModel(models
.Model
):
109 # contains a comma-separated values of content types, eg 'audio,video'
110 content_types
= models
.CharField(max_length
=20, null
=False, blank
=True)
116 class MergedIdsModel(models
.Model
):
122 class OutdatedModel(models
.Model
):
123 outdated
= models
.BooleanField(default
=False, db_index
=True)
129 class AuthorModel(models
.Model
):
130 author
= models
.CharField(max_length
=350, null
=True, blank
=True)
136 class UrlsMixin(models
.Model
):
137 """ Methods for working with URL objects """
139 urls
= GenericRelation('URL', related_query_name
='urls')
146 """ The main URL of the model """
147 # We could also use self.urls.first() here, but this would result in a
148 # different query and would render a .prefetch_related('urls') useless
149 # The assumption is that we will never have loads of URLS, so
150 # fetching all won't hurt
151 urls
= list(self
.urls
.all())
152 return urls
[0].url
if urls
else None
154 def add_missing_urls(self
, new_urls
):
155 """ Adds missing URLS from new_urls
157 The order of existing URLs is not changed """
158 existing_urls
= self
.urls
.all()
159 next_order
= max([-1] + [u
.order
for u
in existing_urls
]) + 1
160 existing_urls
= [u
.url
for u
in existing_urls
]
163 if url
in existing_urls
:
167 URL
.objects
.create(url
=url
,
173 except IntegrityError
as ie
:
174 logger
.warn('Could not add URL: {err}'.format(err
=ie
))
177 def set_url(self
, url
):
178 """ Sets the canonical URL """
180 urls
= [u
.url
for u
in self
.urls
.all()]
187 def set_urls(self
, urls
):
188 """ Update the object's URLS to the given list
190 'urls' should be a list of strings. Slugs that do not exist are
191 created. Existing urls that are not in the 'urls' list are
193 urls
= [utils
.to_maxlength(URL
, 'url', url
) for url
in urls
]
194 existing
= {u
.url
: u
for u
in self
.urls
.all()}
195 utils
.set_ordered_entries(self
, urls
, existing
, URL
, 'url',
199 class SlugsMixin(models
.Model
):
200 """ Methods for working with Slug objects """
202 slugs
= GenericRelation('Slug', related_query_name
='slugs')
209 """ The main slug of the podcast
211 TODO: should be retrieved from a (materialized) view """
213 # We could also use self.slugs.first() here, but this would result in a
214 # different query and would render a .prefetch_related('slugs') useless
215 # The assumption is that we will never have loads of slugs, so
216 # fetching all won't hurt
217 slugs
= list(self
.slugs
.all())
218 slug
= slugs
[0].slug
if slugs
else None
219 logger
.debug('Found slugs %r, picking %r', slugs
, slug
)
223 def add_slug(self
, slug
):
224 """ Adds a (non-cannonical) slug """
227 raise ValueError("'%s' is not a valid slug" % slug
)
229 existing_slugs
= self
.slugs
.all()
231 # cut slug to the maximum allowed length
232 slug
= utils
.to_maxlength(Slug
, 'slug', slug
)
234 # check if slug already exists
235 if slug
in [s
.slug
for s
in existing_slugs
]:
238 max_order
= max([-1] + [s
.order
for s
in existing_slugs
])
239 next_order
= max_order
+ 1
240 Slug
.objects
.create(scope
=self
.scope
,
246 def set_slug(self
, slug
):
247 """ Sets the canonical slug """
249 slugs
= [s
.slug
for s
in self
.slugs
.all()]
253 slugs
.insert(0, slug
)
254 self
.set_slugs(slugs
)
257 def remove_slug(self
, slug
):
258 """ Removes a slug """
261 content_type
=ContentType
.objects
.get_for_model(self
),
266 def set_slugs(self
, slugs
):
267 """ Update the object's slugs to the given list
269 'slugs' should be a list of strings. Slugs that do not exist are
270 created. Existing slugs that are not in the 'slugs' list are
272 slugs
= [utils
.to_maxlength(Slug
, 'slug', slug
) for slug
in slugs
]
273 existing
= {s
.slug
: s
for s
in self
.slugs
.all()}
274 utils
.set_ordered_entries(self
, slugs
, existing
, Slug
, 'slug',
278 class MergedUUIDsMixin(models
.Model
):
279 """ Methods for working with MergedUUID objects """
281 merged_uuids
= GenericRelation('MergedUUID',
282 related_query_name
='merged_uuids')
288 class MergedUUIDQuerySet(models
.QuerySet
):
289 """ QuerySet for Models inheriting from MergedUUID """
291 def get_by_any_id(self
, id):
292 """ Find am Episode by its own ID or by a merged ID """
293 # TODO: should this be done in the model?
295 return self
.get(id=id)
296 except self
.model
.DoesNotExist
:
297 return self
.get(merged_uuids__uuid
=id)
300 class TagsMixin(models
.Model
):
301 """ Methods for working with Tag objects """
303 tags
= GenericRelation('Tag', related_query_name
='tags')
309 class PodcastGroup(UUIDModel
, TitleModel
, SlugsMixin
):
310 """ Groups multiple podcasts together """
314 """ A podcast group is always in the global scope """
317 def subscriber_count(self
):
318 # this could be done directly in the DB
319 return sum([p
.subscriber_count() for p
in self
.podcast_set
.all()] + [0])
323 podcast
= self
.podcast_set
.first()
327 class PodcastQuerySet(MergedUUIDQuerySet
):
328 """ Custom queries for Podcasts """
333 Excludes podcasts with missing title to guarantee some
334 minimum quality of the results """
336 # Using PostgreSQL's RANDOM() is very expensive, so we're generating a
337 # random uuid and query podcasts with a higher ID
338 # This returns podcasts in order of their ID, but the assumption is
339 # that usually only one podcast will be required anyway
342 return self
.exclude(title
='').filter(id__gt
=ruuid
)
345 """ Podcasts providing Flattr information """
346 return self
.exclude(flattr_url__isnull
=True)
348 def license(self
, license_url
=None):
349 """ Podcasts with any / the given license """
351 return self
.filter(license
=license_url
)
353 return self
.exclude(license__isnull
=True)
355 def order_by_next_update(self
):
356 """ Sort podcasts by next scheduled update """
357 NEXTUPDATE
= "last_update + (update_interval || ' hours')::INTERVAL"
358 q
= self
.extra(select
={'next_update': NEXTUPDATE
})
359 return q
.order_by('next_update')
361 def next_update_between(self
, start
, end
):
362 NEXTUPDATE_BETWEEN
= ("(last_update + (update_interval || "
363 "' hours')::INTERVAL) BETWEEN %s AND %s")
365 where
=[NEXTUPDATE_BETWEEN
], params
=[start
, end
]
368 def toplist(self
, language
=None):
371 toplist
= toplist
.filter(language
=language
)
373 return toplist
.order_by('-subscribers')
376 class PodcastManager(GenericManager
):
377 """ Manager for the Podcast model """
379 def get_queryset(self
):
380 return PodcastQuerySet(self
.model
, using
=self
._db
)
383 def get_or_create_for_url(self
, url
, defaults
={}):
384 # TODO: where to specify how uuid is created?
387 'id': uuid
.uuid1().hex,
390 url
= utils
.to_maxlength(URL
, 'url', url
)
391 podcast
, created
= self
.get_or_create(urls__url
=url
, defaults
=defaults
)
394 url
= URL
.objects
.create(url
=url
,
397 content_object
=podcast
,
402 class Podcast(UUIDModel
, TitleModel
, DescriptionModel
, LinkModel
,
403 LanguageModel
, LastUpdateModel
, UpdateInfoModel
, LicenseModel
,
404 FlattrModel
, ContentTypesModel
, MergedIdsModel
, OutdatedModel
,
405 AuthorModel
, UrlsMixin
, SlugsMixin
, TagsMixin
, MergedUUIDsMixin
,
409 logo_url
= models
.URLField(null
=True, max_length
=1000)
410 group
= models
.ForeignKey(PodcastGroup
, null
=True,
411 on_delete
=models
.PROTECT
)
412 group_member_name
= models
.CharField(max_length
=30, null
=True, blank
=False)
414 # if p1 is related to p2, p2 is also related to p1
415 related_podcasts
= models
.ManyToManyField('self', symmetrical
=True)
417 subscribers
= models
.PositiveIntegerField(default
=0)
418 restrictions
= models
.CharField(max_length
=20, null
=False, blank
=True,
420 common_episode_title
= models
.CharField(max_length
=100, null
=False, blank
=True)
421 new_location
= models
.URLField(max_length
=1000, null
=True, blank
=False)
422 latest_episode_timestamp
= models
.DateTimeField(null
=True)
423 episode_count
= models
.PositiveIntegerField(default
=0)
424 hub
= models
.URLField(null
=True)
425 update_interval
= models
.PositiveSmallIntegerField(null
=False,
426 default
=DEFAULT_UPDATE_INTERVAL
)
428 # "order" value of the most recent episode (will be the highest of all)
429 max_episode_order
= models
.PositiveIntegerField(null
=True, default
=None)
431 objects
= PodcastManager()
433 def subscriber_count(self
):
435 return self
.subscribers
437 def group_with(self
, other
, grouptitle
, myname
, othername
):
438 """ Group the podcast with another one """
439 # TODO: move to PodcastGroup?
441 if bool(self
.group
) and (self
.group
== other
.group
):
442 # they are already grouped
448 if group1
and group2
:
449 raise ValueError('both podcasts already are in different groups')
451 elif not (group1
or group2
):
454 group
= PodcastGroup
.objects
.create(id=uuid
.uuid1(), title
=grouptitle
)
455 self
.group_member_name
= myname
459 other
.group_member_name
= othername
466 # add other to self's group
467 other
.group_member_name
= othername
473 # add self to other's group
474 self
.group_member_name
= myname
479 def get_common_episode_title(self
, num_episodes
=100):
481 if self
.common_episode_title
:
482 return self
.common_episode_title
484 episodes
= self
.episode_set
.all()[:num_episodes
]
486 # We take all non-empty titles
487 titles
= filter(None, (e
.title
for e
in episodes
))
489 # there can not be a "common" title of a single title
493 # get the longest common substring
494 common_title
= utils
.longest_substr(titles
)
496 # but consider only the part up to the first number. Otherwise we risk
497 # removing part of the number (eg if a feed contains episodes 100-199)
498 common_title
= re
.search(r
'^\D*', common_title
).group(0)
500 if len(common_title
.strip()) < 2:
506 def get_episode_before(self
, episode
):
507 if not episode
.released
:
509 return self
.episode_set
.filter(released__lt
=episode
.released
).latest()
511 def get_episode_after(self
, episode
):
512 if not episode
.released
:
514 return self
.episode_set
.filter(released__gt
=episode
.released
).first()
518 """ A podcast is always in the global scope """
523 """ If models use this object as scope, they'll use this value """
527 def display_title(self
):
528 """ a title for display purposes """
533 logger
.warn('Podcast with ID {podcast_id} does not have a URL'
534 .format(podcast_id
=self
.id.hex))
535 return _('Unknown Podcast')
537 return _('Unknown Podcast from {domain}'.format(
538 domain
=utils
.get_domain(self
.url
)))
541 class EpisodeQuerySet(MergedUUIDQuerySet
):
542 """ QuerySet for Episodes """
544 def toplist(self
, language
=None):
547 toplist
= toplist
.filter(language
=language
)
549 return toplist
.order_by('-listeners')
552 class EpisodeManager(GenericManager
):
553 """ Custom queries for Episodes """
555 def get_queryset(self
):
556 return EpisodeQuerySet(self
.model
, using
=self
._db
)
559 def get_or_create_for_url(self
, podcast
, url
, defaults
={}):
560 # TODO: where to specify how uuid is created?
564 url
= URL
.objects
.get(url
=url
, scope
=podcast
.as_scope
)
566 except URL
.DoesNotExist
:
567 episode
= Episode
.objects
.create(podcast
=podcast
,
571 url
= URL
.objects
.create(url
=url
,
574 content_object
=episode
,
579 return url
.content_object
582 class Episode(UUIDModel
, TitleModel
, DescriptionModel
, LinkModel
,
583 LanguageModel
, LastUpdateModel
, UpdateInfoModel
, LicenseModel
,
584 FlattrModel
, ContentTypesModel
, MergedIdsModel
, OutdatedModel
,
585 AuthorModel
, UrlsMixin
, SlugsMixin
, MergedUUIDsMixin
,
586 OptionallyOrderedModel
):
589 guid
= models
.CharField(max_length
=200, null
=True)
590 content
= models
.TextField()
591 released
= models
.DateTimeField(null
=True, db_index
=True)
592 duration
= models
.BigIntegerField(null
=True)
593 filesize
= models
.BigIntegerField(null
=True)
594 mimetypes
= models
.CharField(max_length
=200)
595 podcast
= models
.ForeignKey(Podcast
, on_delete
=models
.PROTECT
)
596 listeners
= models
.PositiveIntegerField(null
=True, db_index
=True)
598 objects
= EpisodeManager()
601 ordering
= ['-order', '-released']
604 ('podcast', 'outdated', 'released'),
605 ('podcast', 'released'),
606 ('released', 'podcast'),
608 # index for typical episode toplist queries
609 ('language', 'listeners'),
611 ('podcast', 'order', 'released'),
616 """ An episode's scope is its podcast """
617 return self
.podcast_id
.hex
620 def display_title(self
):
621 # TODO: return basename of URL (see Podcast.display_title)
624 def get_short_title(self
, common_title
):
625 """ Title when used within the podcast's context """
626 if not self
.title
or not common_title
:
629 title
= self
.title
.replace(common_title
, '').strip()
630 title
= re
.sub(r
'^[\W\d]+', '', title
)
634 def get_episode_number(self
, common_title
):
635 """ Number of the episode """
636 if not self
.title
or not common_title
:
639 title
= self
.title
.replace(common_title
, '').strip()
640 match
= re
.search(r
'^\W*(\d+)', title
)
644 return int(match
.group(1))
647 class ScopedModel(models
.Model
):
648 """ A model that belongs to some scope, usually for limited uniqueness
650 scope does not allow null values, because null is not equal to null in SQL.
651 It could therefore not be used in unique constraints. """
653 # A slug / URL is unique within a scope; no two podcasts can have the same
654 # URL (scope ''), and no two episdoes of the same podcast (scope =
655 # podcast-ID) can have the same URL
656 scope
= models
.CharField(max_length
=32, null
=False, blank
=True,
662 def get_default_scope(self
):
663 """ Returns the default scope of the object """
664 raise NotImplementedError('{cls} should implement get_default_scope'
665 .format(cls
=self
.__class
__.__name
__))
668 class URL(OrderedModel
, ScopedModel
):
669 """ Podcasts and Episodes can have multiple URLs
671 URLs are ordered, and the first slug is considered the canonical one """
673 url
= models
.URLField(max_length
=2048)
675 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
676 content_type
= models
.ForeignKey(ContentType
, on_delete
=models
.PROTECT
)
677 object_id
= UUIDField()
678 content_object
= generic
.GenericForeignKey('content_type', 'object_id')
680 class Meta(OrderedModel
.Meta
):
682 # a URL is unique per scope
685 # URLs of an object must be ordered, so that no two slugs of one
686 # object have the same order key
687 ('content_type', 'object_id', 'order'),
691 verbose_name_plural
= 'URLs'
693 def get_default_scope(self
):
694 return self
.content_object
.scope
697 class Tag(models
.Model
):
698 """ Tags any kind of Model
700 See also :class:`TagsMixin`
709 (DELICIOUS
, 'delicious'),
713 tag
= models
.SlugField()
715 # indicates where the tag came from
716 source
= models
.PositiveSmallIntegerField(choices
=SOURCE_CHOICES
)
718 # the user that created the tag (if it was created by a user,
720 user
= models
.ForeignKey(settings
.AUTH_USER_MODEL
, null
=True,
721 on_delete
=models
.CASCADE
)
723 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
724 content_type
= models
.ForeignKey(ContentType
, on_delete
=models
.PROTECT
)
725 object_id
= UUIDField()
726 content_object
= generic
.GenericForeignKey('content_type', 'object_id')
730 # a tag can only be assigned once from one source to one item
731 ('tag', 'source', 'user', 'content_type', 'object_id'),
735 class Slug(OrderedModel
, ScopedModel
):
736 """ Slug for any kind of Model
738 Slugs are ordered, and the first slug is considered the canonical one.
739 See also :class:`SlugsMixin`
742 slug
= models
.SlugField(max_length
=150, db_index
=True)
744 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
745 content_type
= models
.ForeignKey(ContentType
, on_delete
=models
.PROTECT
)
746 object_id
= UUIDField()
747 content_object
= generic
.GenericForeignKey('content_type', 'object_id')
749 class Meta(OrderedModel
.Meta
):
751 # a slug is unique per type; eg a podcast can have the same slug
752 # as an episode, but no two podcasts can have the same slug
755 # slugs of an object must be ordered, so that no two slugs of one
756 # object have the same order key
757 ('content_type', 'object_id', 'order'),
761 ('slug', 'content_type')
765 return '{cls}(slug={slug}, order={order}, content_object={obj}'.format(
766 cls
=self
.__class
__.__name
__,
769 obj
=self
.content_object
773 class MergedUUID(models
.Model
):
774 """ If objects are merged their UUIDs are stored for later reference
776 see also :class:`MergedUUIDsMixin`
779 uuid
= UUIDField(unique
=True)
781 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
782 content_type
= models
.ForeignKey(ContentType
, on_delete
=models
.PROTECT
)
783 object_id
= UUIDField()
784 content_object
= generic
.GenericForeignKey('content_type', 'object_id')
787 verbose_name
= 'Merged UUID'
788 verbose_name_plural
= 'Merged UUIDs'