4 from datetime
import datetime
6 from django
.conf
import settings
7 from django
.db
import models
, transaction
, IntegrityError
8 from django
.db
.models
import F
9 from django
.utils
.translation
import ugettext
as _
10 from django
.contrib
.contenttypes
.models
import ContentType
11 from django
.contrib
.contenttypes
.fields
import GenericRelation
12 from django
.contrib
.contenttypes
import generic
14 from mygpo
import utils
15 from mygpo
.core
.models
import (TwitterModel
, UUIDModel
, GenericManager
,
16 UpdateInfoModel
, OrderedModel
, OptionallyOrderedModel
)
19 logger
= logging
.getLogger(__name__
)
22 # default podcast update interval in hours
23 DEFAULT_UPDATE_INTERVAL
= 7 * 24
25 # minium podcast update interval in hours
26 MIN_UPDATE_INTERVAL
= 5
28 # every podcast should be updated at least once a month
29 MAX_UPDATE_INTERVAL
= 24 * 30
32 class TitleModel(models
.Model
):
33 """ Model that has a title """
35 title
= models
.CharField(max_length
=1000, null
=False, blank
=True,
37 subtitle
= models
.TextField(null
=False, blank
=True)
46 class DescriptionModel(models
.Model
):
47 """ Model that has a description """
49 description
= models
.TextField(null
=False, blank
=True)
55 class LinkModel(models
.Model
):
56 """ Model that has a link """
58 link
= models
.URLField(null
=True, max_length
=1000)
64 class LanguageModel(models
.Model
):
65 """ Model that has a language """
67 language
= models
.CharField(max_length
=10, null
=True, blank
=False,
74 class LastUpdateModel(models
.Model
):
75 """ Model with timestamp of last update from its source """
77 # date and time at which the model has last been updated from its source
78 # (eg a podcast feed). None means that the object has been created as a
79 # stub, without information from the source.
80 last_update
= models
.DateTimeField(null
=True)
86 class LicenseModel(models
.Model
):
87 # URL to a license (usually Creative Commons)
88 license
= models
.CharField(max_length
=100, null
=True, blank
=False,
95 class FlattrModel(models
.Model
):
96 # A Flattr payment URL
97 flattr_url
= models
.URLField(null
=True, blank
=False, max_length
=1000,
104 class ContentTypesModel(models
.Model
):
105 # contains a comma-separated values of content types, eg 'audio,video'
106 content_types
= models
.CharField(max_length
=20, null
=False, blank
=True)
112 class MergedIdsModel(models
.Model
):
118 class OutdatedModel(models
.Model
):
119 outdated
= models
.BooleanField(default
=False, db_index
=True)
125 class AuthorModel(models
.Model
):
126 author
= models
.CharField(max_length
=350, null
=True, blank
=True)
132 class UrlsMixin(models
.Model
):
133 """ Methods for working with URL objects """
135 urls
= GenericRelation('URL', related_query_name
='urls')
142 """ The main URL of the model """
143 # We could also use self.urls.first() here, but this would result in a
144 # different query and would render a .prefetch_related('urls') useless
145 # The assumption is that we will never have loads of URLS, so
146 # fetching all won't hurt
147 urls
= list(self
.urls
.all())
148 return urls
[0].url
if urls
else None
150 def add_missing_urls(self
, new_urls
):
151 """ Adds missing URLS from new_urls
153 The order of existing URLs is not changed """
154 existing_urls
= self
.urls
.all()
155 next_order
= max([-1] + [u
.order
for u
in existing_urls
]) + 1
156 existing_urls
= [u
.url
for u
in existing_urls
]
159 if url
in existing_urls
:
163 URL
.objects
.create(url
=url
,
169 except IntegrityError
as ie
:
170 err
= str(ie
).decode('utf-8')
171 logger
.warn(u
'Could not add URL: {0}'.format(err
))
174 def set_url(self
, url
):
175 """ Sets the canonical URL """
177 urls
= [u
.url
for u
in self
.urls
.all()]
184 def set_urls(self
, urls
):
185 """ Update the object's URLS to the given list
187 'urls' should be a list of strings. Slugs that do not exist are
188 created. Existing urls that are not in the 'urls' list are
190 urls
= [utils
.to_maxlength(URL
, 'url', url
) for url
in urls
]
191 existing
= {u
.url
: u
for u
in self
.urls
.all()}
192 utils
.set_ordered_entries(self
, urls
, existing
, URL
, 'url',
196 class SlugsMixin(models
.Model
):
197 """ Methods for working with Slug objects """
199 slugs
= GenericRelation('Slug', related_query_name
='slugs')
206 """ The main slug of the podcast
208 TODO: should be retrieved from a (materialized) view """
210 # We could also use self.slugs.first() here, but this would result in a
211 # different query and would render a .prefetch_related('slugs') useless
212 # The assumption is that we will never have loads of slugs, so
213 # fetching all won't hurt
214 slugs
= list(self
.slugs
.all())
215 slug
= slugs
[0].slug
if slugs
else None
216 logger
.debug('Found slugs %r, picking %r', slugs
, slug
)
220 def add_slug(self
, slug
):
221 """ Adds a (non-cannonical) slug """
224 raise ValueError("'%s' is not a valid slug" % slug
)
226 existing_slugs
= self
.slugs
.all()
228 # cut slug to the maximum allowed length
229 slug
= utils
.to_maxlength(Slug
, 'slug', slug
)
231 # check if slug already exists
232 if slug
in [s
.slug
for s
in existing_slugs
]:
235 max_order
= max([-1] + [s
.order
for s
in existing_slugs
])
236 next_order
= max_order
+ 1
237 Slug
.objects
.create(scope
=self
.scope
,
243 def set_slug(self
, slug
):
244 """ Sets the canonical slug """
246 slugs
= [s
.slug
for s
in self
.slugs
.all()]
250 slugs
.insert(0, slug
)
251 self
.set_slugs(slugs
)
254 def remove_slug(self
, slug
):
255 """ Removes a slug """
258 content_type
=ContentType
.objects
.get_for_model(self
),
263 def set_slugs(self
, slugs
):
264 """ Update the object's slugs to the given list
266 'slugs' should be a list of strings. Slugs that do not exist are
267 created. Existing slugs that are not in the 'slugs' list are
269 slugs
= [utils
.to_maxlength(Slug
, 'slug', slug
) for slug
in slugs
]
270 existing
= {s
.slug
: s
for s
in self
.slugs
.all()}
271 utils
.set_ordered_entries(self
, slugs
, existing
, Slug
, 'slug',
275 class MergedUUIDsMixin(models
.Model
):
276 """ Methods for working with MergedUUID objects """
278 merged_uuids
= GenericRelation('MergedUUID',
279 related_query_name
='merged_uuids')
285 class MergedUUIDQuerySet(models
.QuerySet
):
286 """ QuerySet for Models inheriting from MergedUUID """
288 def get_by_any_id(self
, id):
289 """ Find am Episode by its own ID or by a merged ID """
290 # TODO: should this be done in the model?
292 return self
.get(id=id)
293 except self
.model
.DoesNotExist
:
294 return self
.get(merged_uuids__uuid
=id)
297 class TagsMixin(models
.Model
):
298 """ Methods for working with Tag objects """
300 tags
= GenericRelation('Tag', related_query_name
='tags')
306 class PodcastGroup(UUIDModel
, TitleModel
, SlugsMixin
):
307 """ Groups multiple podcasts together """
311 """ A podcast group is always in the global scope """
314 def subscriber_count(self
):
315 # this could be done directly in the DB
316 return sum([p
.subscriber_count() for p
in self
.podcast_set
.all()] + [0])
320 podcast
= self
.podcast_set
.first()
324 class PodcastQuerySet(MergedUUIDQuerySet
):
325 """ Custom queries for Podcasts """
330 Excludes podcasts with missing title to guarantee some
331 minimum quality of the results """
333 # Using PostgreSQL's RANDOM() is very expensive, so we're generating a
334 # random uuid and query podcasts with a higher ID
335 # This returns podcasts in order of their ID, but the assumption is
336 # that usually only one podcast will be required anyway
339 return self
.exclude(title
='').filter(id__gt
=ruuid
)
342 """ Podcasts providing Flattr information """
343 return self
.exclude(flattr_url__isnull
=True)
345 def license(self
, license_url
=None):
346 """ Podcasts with any / the given license """
348 return self
.filter(license
=license_url
)
350 return self
.exclude(license__isnull
=True)
352 def order_by_next_update(self
):
353 """ Sort podcasts by next scheduled update """
354 NEXTUPDATE
= "last_update + (update_interval || ' hours')::INTERVAL"
355 q
= self
.extra(select
={'next_update': NEXTUPDATE
})
356 return q
.order_by('next_update')
358 def next_update_between(self
, start
, end
):
359 NEXTUPDATE_BETWEEN
= ("(last_update + (update_interval || "
360 "' hours')::INTERVAL) BETWEEN %s AND %s")
362 where
=[NEXTUPDATE_BETWEEN
], params
=[start
, end
]
365 def toplist(self
, language
=None):
368 toplist
= toplist
.filter(language
=language
)
370 return toplist
.order_by('-subscribers')
373 class PodcastManager(GenericManager
):
374 """ Manager for the Podcast model """
376 def get_queryset(self
):
377 return PodcastQuerySet(self
.model
, using
=self
._db
)
380 def get_or_create_for_url(self
, url
, defaults
={}):
381 # TODO: where to specify how uuid is created?
387 url
= utils
.to_maxlength(URL
, 'url', url
)
389 # try to fetch the podcast
390 return Podcast
.objects
.get(urls__url
=url
,
393 except Podcast
.DoesNotExist
:
394 # episode did not exist, try to create it
396 with transaction
.atomic():
397 podcast
= Podcast
.objects
.create(**defaults
)
398 url
= URL
.objects
.create(url
=url
,
401 content_object
=podcast
,
405 # URL could not be created, so it was created since the first get
406 except IntegrityError
:
407 return Podcast
.objects
.get(urls__url
=url
,
412 class Podcast(UUIDModel
, TitleModel
, DescriptionModel
, LinkModel
,
413 LanguageModel
, LastUpdateModel
, UpdateInfoModel
, LicenseModel
,
414 FlattrModel
, ContentTypesModel
, MergedIdsModel
, OutdatedModel
,
415 AuthorModel
, UrlsMixin
, SlugsMixin
, TagsMixin
, MergedUUIDsMixin
,
419 logo_url
= models
.URLField(null
=True, max_length
=1000)
420 group
= models
.ForeignKey(PodcastGroup
, null
=True,
421 on_delete
=models
.PROTECT
)
422 group_member_name
= models
.CharField(max_length
=30, null
=True, blank
=False)
424 # if p1 is related to p2, p2 is also related to p1
425 related_podcasts
= models
.ManyToManyField('self', symmetrical
=True)
427 subscribers
= models
.PositiveIntegerField(default
=0)
428 restrictions
= models
.CharField(max_length
=20, null
=False, blank
=True,
430 common_episode_title
= models
.CharField(max_length
=100, null
=False, blank
=True)
431 new_location
= models
.URLField(max_length
=1000, null
=True, blank
=False)
432 latest_episode_timestamp
= models
.DateTimeField(null
=True)
433 episode_count
= models
.PositiveIntegerField(default
=0)
434 hub
= models
.URLField(null
=True)
435 update_interval
= models
.PositiveSmallIntegerField(null
=False,
436 default
=DEFAULT_UPDATE_INTERVAL
)
438 # "order" value of the most recent episode (will be the highest of all)
439 max_episode_order
= models
.PositiveIntegerField(null
=True, default
=None)
441 objects
= PodcastManager()
443 def subscriber_count(self
):
445 return self
.subscribers
447 def group_with(self
, other
, grouptitle
, myname
, othername
):
448 """ Group the podcast with another one """
449 # TODO: move to PodcastGroup?
451 if bool(self
.group
) and (self
.group
== other
.group
):
452 # they are already grouped
458 if group1
and group2
:
459 raise ValueError('both podcasts already are in different groups')
461 elif not (group1
or group2
):
464 group
= PodcastGroup
.objects
.create(id=uuid
.uuid1(), title
=grouptitle
)
465 self
.group_member_name
= myname
469 other
.group_member_name
= othername
476 # add other to self's group
477 other
.group_member_name
= othername
483 # add self to other's group
484 self
.group_member_name
= myname
489 def get_common_episode_title(self
, num_episodes
=100):
491 if self
.common_episode_title
:
492 return self
.common_episode_title
494 episodes
= self
.episode_set
.all()[:num_episodes
]
496 # We take all non-empty titles
497 titles
= [_f
for _f
in (e
.title
for e
in episodes
) if _f
]
499 # there can not be a "common" title of a single title
503 # get the longest common substring
504 common_title
= utils
.longest_substr(titles
)
506 # but consider only the part up to the first number. Otherwise we risk
507 # removing part of the number (eg if a feed contains episodes 100-199)
508 common_title
= re
.search(r
'^\D*', common_title
).group(0)
510 if len(common_title
.strip()) < 2:
516 def get_episode_before(self
, episode
):
517 if not episode
.released
:
519 return self
.episode_set
.filter(released__lt
=episode
.released
).latest()
521 def get_episode_after(self
, episode
):
522 if not episode
.released
:
524 return self
.episode_set
.filter(released__gt
=episode
.released
).first()
528 """ A podcast is always in the global scope """
533 """ If models use this object as scope, they'll use this value """
537 def display_title(self
):
538 """ a title for display purposes """
543 logger
.warn('Podcast with ID {podcast_id} does not have a URL'
544 .format(podcast_id
=self
.id))
545 return _('Unknown Podcast')
547 return _('Unknown Podcast from {domain}'.format(
548 domain
=utils
.get_domain(self
.url
)))
551 class EpisodeQuerySet(MergedUUIDQuerySet
):
552 """ QuerySet for Episodes """
554 def toplist(self
, language
=None):
557 toplist
= toplist
.filter(language
=language
)
559 return toplist
.order_by('-listeners')
562 class EpisodeManager(GenericManager
):
563 """ Custom queries for Episodes """
565 def get_queryset(self
):
566 return EpisodeQuerySet(self
.model
, using
=self
._db
)
568 def get_or_create_for_url(self
, podcast
, url
, defaults
={}):
569 """ Create an Episode for a given URL
571 This is the only place where new episodes are created """
573 # TODO: where to specify how uuid is created?
576 url
= utils
.to_maxlength(URL
, 'url', url
)
579 # try to fetch the episode
580 return Episode
.objects
.get(urls__url
=url
,
581 urls__scope
=podcast
.as_scope
,
583 except Episode
.DoesNotExist
:
584 # episode did not exist, try to create it
586 with transaction
.atomic():
587 episode
= Episode
.objects
.create(podcast
=podcast
,
591 url
= URL
.objects
.create(url
=url
,
594 content_object
=episode
,
597 # Keep episode_count up to date here; it is not
598 # recalculated when updating the podcast because counting
599 # episodes can be very slow for podcasts with many episodes
600 Podcast
.objects
.filter(pk
=podcast
.pk
)\
601 .update(episode_count
=F('episode_count')+1)
605 # URL could not be created, so it was created since the first get
606 except IntegrityError
:
607 return Episode
.objects
.get(urls__url
=url
,
608 urls__scope
=podcast
.as_scope
,
612 class Episode(UUIDModel
, TitleModel
, DescriptionModel
, LinkModel
,
613 LanguageModel
, LastUpdateModel
, UpdateInfoModel
, LicenseModel
,
614 FlattrModel
, ContentTypesModel
, MergedIdsModel
, OutdatedModel
,
615 AuthorModel
, UrlsMixin
, SlugsMixin
, MergedUUIDsMixin
,
616 OptionallyOrderedModel
):
619 guid
= models
.CharField(max_length
=200, null
=True)
620 content
= models
.TextField()
621 released
= models
.DateTimeField(null
=True, db_index
=True)
622 duration
= models
.BigIntegerField(null
=True)
623 filesize
= models
.BigIntegerField(null
=True)
624 mimetypes
= models
.CharField(max_length
=200)
625 podcast
= models
.ForeignKey(Podcast
, on_delete
=models
.PROTECT
)
626 listeners
= models
.PositiveIntegerField(null
=True, db_index
=True)
628 objects
= EpisodeManager()
631 ordering
= ['-order', '-released']
634 ('podcast', 'outdated', 'released'),
635 ('podcast', 'released'),
636 ('released', 'podcast'),
638 # index for typical episode toplist queries
639 ('language', 'listeners'),
641 ('podcast', 'order', 'released'),
646 """ An episode's scope is its podcast """
647 return self
.podcast
.id.hex
650 def display_title(self
):
651 # TODO: return basename of URL (see Podcast.display_title)
654 def get_short_title(self
, common_title
):
655 """ Title when used within the podcast's context """
656 if not self
.title
or not common_title
:
659 title
= self
.title
.replace(common_title
, '').strip()
660 title
= re
.sub(r
'^[\W\d]+', '', title
)
664 def get_episode_number(self
, common_title
):
665 """ Number of the episode """
666 if not self
.title
or not common_title
:
669 title
= self
.title
.replace(common_title
, '').strip()
670 match
= re
.search(r
'^\W*(\d+)', title
)
674 return int(match
.group(1))
677 class ScopedModel(models
.Model
):
678 """ A model that belongs to some scope, usually for limited uniqueness
680 scope does not allow null values, because null is not equal to null in SQL.
681 It could therefore not be used in unique constraints. """
683 # A slug / URL is unique within a scope; no two podcasts can have the same
684 # URL (scope ''), and no two episdoes of the same podcast (scope =
685 # podcast-ID) can have the same URL
686 scope
= models
.CharField(max_length
=32, null
=False, blank
=True,
692 def get_default_scope(self
):
693 """ Returns the default scope of the object """
694 raise NotImplementedError('{cls} should implement get_default_scope'
695 .format(cls
=self
.__class
__.__name
__))
698 class URL(OrderedModel
, ScopedModel
):
699 """ Podcasts and Episodes can have multiple URLs
701 URLs are ordered, and the first slug is considered the canonical one """
703 url
= models
.URLField(max_length
=2048)
705 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
706 content_type
= models
.ForeignKey(ContentType
, on_delete
=models
.PROTECT
)
707 object_id
= models
.UUIDField()
708 content_object
= generic
.GenericForeignKey('content_type', 'object_id')
710 class Meta(OrderedModel
.Meta
):
712 # a URL is unique per scope
715 # URLs of an object must be ordered, so that no two slugs of one
716 # object have the same order key
717 ('content_type', 'object_id', 'order'),
721 verbose_name_plural
= 'URLs'
723 def get_default_scope(self
):
724 return self
.content_object
.scope
727 class Tag(models
.Model
):
728 """ Tags any kind of Model
730 See also :class:`TagsMixin`
739 (DELICIOUS
, 'delicious'),
743 tag
= models
.SlugField()
745 # indicates where the tag came from
746 source
= models
.PositiveSmallIntegerField(choices
=SOURCE_CHOICES
)
748 # the user that created the tag (if it was created by a user,
750 user
= models
.ForeignKey(settings
.AUTH_USER_MODEL
, null
=True,
751 on_delete
=models
.CASCADE
)
753 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
754 content_type
= models
.ForeignKey(ContentType
, on_delete
=models
.PROTECT
)
755 object_id
= models
.UUIDField()
756 content_object
= generic
.GenericForeignKey('content_type', 'object_id')
760 # a tag can only be assigned once from one source to one item
761 ('tag', 'source', 'user', 'content_type', 'object_id'),
765 class Slug(OrderedModel
, ScopedModel
):
766 """ Slug for any kind of Model
768 Slugs are ordered, and the first slug is considered the canonical one.
769 See also :class:`SlugsMixin`
772 slug
= models
.SlugField(max_length
=150, db_index
=True)
774 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
775 content_type
= models
.ForeignKey(ContentType
, on_delete
=models
.PROTECT
)
776 object_id
= models
.UUIDField()
777 content_object
= generic
.GenericForeignKey('content_type', 'object_id')
779 class Meta(OrderedModel
.Meta
):
781 # a slug is unique per type; eg a podcast can have the same slug
782 # as an episode, but no two podcasts can have the same slug
785 # slugs of an object must be ordered, so that no two slugs of one
786 # object have the same order key
787 ('content_type', 'object_id', 'order'),
791 ('slug', 'content_type')
795 return '{cls}(slug={slug}, order={order}, content_object={obj}'.format(
796 cls
=self
.__class
__.__name
__,
799 obj
=self
.content_object
803 class MergedUUID(models
.Model
):
804 """ If objects are merged their UUIDs are stored for later reference
806 see also :class:`MergedUUIDsMixin`
809 uuid
= models
.UUIDField(unique
=True)
811 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
812 content_type
= models
.ForeignKey(ContentType
, on_delete
=models
.PROTECT
)
813 object_id
= models
.UUIDField()
814 content_object
= generic
.GenericForeignKey('content_type', 'object_id')
817 verbose_name
= 'Merged UUID'
818 verbose_name_plural
= 'Merged UUIDs'