5 from datetime
import timedelta
7 from django
.core
.cache
import cache
8 from django
.conf
import settings
9 from django
.db
import models
, transaction
, IntegrityError
10 from django
.db
.models
import F
11 from django
.utils
.translation
import ugettext
as _
12 from django
.contrib
.contenttypes
.models
import ContentType
13 from django
.contrib
.contenttypes
.fields
import (GenericRelation
,
15 from django
.contrib
.postgres
.search
import SearchVectorField
17 from mygpo
import utils
18 from mygpo
.core
.models
import (TwitterModel
, UUIDModel
, GenericManager
,
19 UpdateInfoModel
, OrderedModel
, OptionallyOrderedModel
)
22 logger
= logging
.getLogger(__name__
)
25 GetCreateResult
= collections
.namedtuple('GetCreateResult', 'object created')
28 # default podcast update interval in hours
29 DEFAULT_UPDATE_INTERVAL
= 7 * 24
31 # minium podcast update interval in hours
32 MIN_UPDATE_INTERVAL
= 5
34 # every podcast should be updated at least once a month
35 MAX_UPDATE_INTERVAL
= 24 * 30
38 class TitleModel(models
.Model
):
39 """ Model that has a title """
41 title
= models
.CharField(max_length
=1000, null
=False, blank
=True,
43 subtitle
= models
.TextField(null
=False, blank
=True)
52 class DescriptionModel(models
.Model
):
53 """ Model that has a description """
55 description
= models
.TextField(null
=False, blank
=True)
61 class LinkModel(models
.Model
):
62 """ Model that has a link """
64 link
= models
.URLField(null
=True, max_length
=1000)
70 class LanguageModel(models
.Model
):
71 """ Model that has a language """
73 language
= models
.CharField(max_length
=10, null
=True, blank
=False,
80 class LastUpdateModel(models
.Model
):
81 """ Model with timestamp of last update from its source """
83 # date and time at which the model has last been updated from its source
84 # (eg a podcast feed). None means that the object has been created as a
85 # stub, without information from the source.
86 last_update
= models
.DateTimeField(null
=True)
92 class LicenseModel(models
.Model
):
93 # URL to a license (usually Creative Commons)
94 license
= models
.CharField(max_length
=100, null
=True, blank
=False,
101 class FlattrModel(models
.Model
):
102 # A Flattr payment URL
103 flattr_url
= models
.URLField(null
=True, blank
=False, max_length
=1000,
110 class ContentTypesModel(models
.Model
):
111 # contains a comma-separated values of content types, eg 'audio,video'
112 content_types
= models
.CharField(max_length
=20, null
=False, blank
=True)
118 class MergedIdsModel(models
.Model
):
124 class OutdatedModel(models
.Model
):
125 outdated
= models
.BooleanField(default
=False, db_index
=True)
131 class AuthorModel(models
.Model
):
132 author
= models
.CharField(max_length
=350, null
=True, blank
=True)
138 class MergedUUIDQuerySet(models
.QuerySet
):
139 """ QuerySet for Models inheriting from MergedUUID """
141 def get_by_any_id(self
, id):
142 """ Find am Episode by its own ID or by a merged ID """
143 # TODO: should this be done in the model?
145 return self
.get(id=id)
146 except self
.model
.DoesNotExist
:
147 return self
.get(merged_uuids__uuid
=id)
150 class TagsMixin(models
.Model
):
151 """ Methods for working with Tag objects """
153 tags
= GenericRelation('Tag', related_query_name
='tags')
159 class ScopedModel(models
.Model
):
160 """ A model that belongs to some scope, usually for limited uniqueness
162 scope does not allow null values, because null is not equal to null in SQL.
163 It could therefore not be used in unique constraints. """
165 # A slug / URL is unique within a scope; no two podcasts can have the same
166 # URL (scope ''), and no two episdoes of the same podcast (scope =
167 # podcast-ID) can have the same URL
168 scope
= models
.CharField(max_length
=32, null
=False, blank
=True,
174 def get_default_scope(self
):
175 """ Returns the default scope of the object """
176 raise NotImplementedError('{cls} should implement get_default_scope'
177 .format(cls
=self
.__class
__.__name
__))
181 class Slug(OrderedModel
, ScopedModel
):
182 """ Slug for any kind of Model
184 Slugs are ordered, and the first slug is considered the canonical one.
185 See also :class:`SlugsMixin`
188 slug
= models
.SlugField(max_length
=150, db_index
=True)
190 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
191 content_type
= models
.ForeignKey(ContentType
, on_delete
=models
.PROTECT
)
192 object_id
= models
.UUIDField()
193 content_object
= GenericForeignKey('content_type', 'object_id')
195 class Meta(OrderedModel
.Meta
):
197 # a slug is unique per type; eg a podcast can have the same slug
198 # as an episode, but no two podcasts can have the same slug
201 # slugs of an object must be ordered, so that no two slugs of one
202 # object have the same order key
203 ('content_type', 'object_id', 'order'),
207 ('slug', 'content_type')
211 return '{cls}(slug={slug}, order={order}, content_object={obj}'.format(
212 cls
=self
.__class
__.__name
__,
215 obj
=self
.content_object
220 class SlugsMixin(models
.Model
):
221 """ Methods for working with Slug objects """
223 slugs
= GenericRelation(Slug
, related_query_name
='slugs')
230 """ The main slug of the podcast
232 TODO: should be retrieved from a (materialized) view """
234 # We could also use self.slugs.first() here, but this would result in a
235 # different query and would render a .prefetch_related('slugs') useless
236 # The assumption is that we will never have loads of slugs, so
237 # fetching all won't hurt
238 slugs
= list(self
.slugs
.all())
239 slug
= slugs
[0].slug
if slugs
else None
240 logger
.debug('Found slugs %r, picking %r', slugs
, slug
)
244 def add_slug(self
, slug
):
245 """ Adds a (non-cannonical) slug """
248 raise ValueError("'%s' is not a valid slug" % slug
)
250 existing_slugs
= self
.slugs
.all()
252 # cut slug to the maximum allowed length
253 slug
= utils
.to_maxlength(Slug
, 'slug', slug
)
255 # check if slug already exists
256 if slug
in [s
.slug
for s
in existing_slugs
]:
259 max_order
= max([-1] + [s
.order
for s
in existing_slugs
])
260 next_order
= max_order
+ 1
261 Slug
.objects
.create(scope
=self
.scope
,
267 def set_slug(self
, slug
):
268 """ Sets the canonical slug """
270 slugs
= [s
.slug
for s
in self
.slugs
.all()]
274 slugs
.insert(0, slug
)
275 self
.set_slugs(slugs
)
278 def remove_slug(self
, slug
):
279 """ Removes a slug """
282 content_type
=ContentType
.objects
.get_for_model(self
),
287 def set_slugs(self
, slugs
):
288 """ Update the object's slugs to the given list
290 'slugs' should be a list of strings. Slugs that do not exist are
291 created. Existing slugs that are not in the 'slugs' list are
293 slugs
= [utils
.to_maxlength(Slug
, 'slug', slug
) for slug
in slugs
]
294 existing
= {s
.slug
: s
for s
in self
.slugs
.all()}
295 utils
.set_ordered_entries(self
, slugs
, existing
, Slug
, 'slug',
300 class PodcastGroup(UUIDModel
, TitleModel
, SlugsMixin
):
301 """ Groups multiple podcasts together """
305 """ A podcast group is always in the global scope """
308 def subscriber_count(self
):
309 # this could be done directly in the DB
310 return sum([p
.subscriber_count() for p
in self
.podcast_set
.all()] + [0])
314 podcast
= self
.podcast_set
.first()
318 class PodcastQuerySet(MergedUUIDQuerySet
):
319 """ Custom queries for Podcasts """
324 Excludes podcasts with missing title to guarantee some
325 minimum quality of the results """
327 # Using PostgreSQL's RANDOM() is very expensive, so we're generating a
328 # random uuid and query podcasts with a higher ID
329 # This returns podcasts in order of their ID, but the assumption is
330 # that usually only one podcast will be required anyway
333 return self
.exclude(title
='').filter(id__gt
=ruuid
)
335 def license(self
, license_url
=None):
336 """ Podcasts with any / the given license """
338 return self
.filter(license
=license_url
)
340 return self
.exclude(license__isnull
=True)
342 def order_by_next_update(self
):
343 """ Sort podcasts by next scheduled update """
344 NEXTUPDATE
= ("last_update + (update_interval * "
345 "update_interval_factor || ' hours')::INTERVAL")
346 q
= self
.extra(select
={'_next_update': NEXTUPDATE
})
347 return q
.order_by('_next_update')
350 def next_update(self
):
351 interval
= (timedelta(hours
=self
.update_interval
) *
352 self
.update_interval_factor
)
353 return self
.last_update
+ interval
355 def next_update_between(self
, start
, end
):
356 NEXTUPDATE_BETWEEN
= ("(last_update + (update_interval * "
357 " update_interval_factor || "
358 "' hours')::INTERVAL) BETWEEN %s AND %s")
360 where
=[NEXTUPDATE_BETWEEN
], params
=[start
, end
]
363 def toplist(self
, language
=None):
366 toplist
= toplist
.filter(language
=language
)
368 return toplist
.order_by('-subscribers')
371 class PodcastManager(GenericManager
):
372 """ Manager for the Podcast model """
374 def get_queryset(self
):
375 return PodcastQuerySet(self
.model
, using
=self
._db
)
377 def get_advertised_podcast(self
):
378 """ Returns the currently advertised podcast """
379 if settings
.PODCAST_AD_ID
:
380 podcast
= cache
.get('podcast_ad')
384 pk
= uuid
.UUID(settings
.PODCAST_AD_ID
)
385 podcast
= self
.get_queryset().get(pk
=pk
)
386 cache
.set('pocdast_ad', podcast
)
391 def get_or_create_for_url(self
, url
, defaults
={}):
394 raise ValueError('The URL must not be empty')
396 # TODO: where to specify how uuid is created?
402 url
= utils
.to_maxlength(URL
, 'url', url
)
404 # try to fetch the podcast
405 podcast
= Podcast
.objects
.get(urls__url
=url
,
408 return GetCreateResult(podcast
, False)
410 except Podcast
.DoesNotExist
:
411 # episode did not exist, try to create it
413 with transaction
.atomic():
414 podcast
= Podcast
.objects
.create(**defaults
)
415 url
= URL
.objects
.create(url
=url
,
418 content_object
=podcast
,
420 return GetCreateResult(podcast
, True)
422 # URL could not be created, so it was created since the first get
423 except IntegrityError
:
424 podcast
= Podcast
.objects
.get(urls__url
=url
,
427 return GetCreateResult(podcast
, False)
430 class URL(OrderedModel
, ScopedModel
):
431 """ Podcasts and Episodes can have multiple URLs
433 URLs are ordered, and the first slug is considered the canonical one """
435 url
= models
.URLField(max_length
=2048)
437 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
438 content_type
= models
.ForeignKey(ContentType
, on_delete
=models
.PROTECT
)
439 object_id
= models
.UUIDField()
440 content_object
= GenericForeignKey('content_type', 'object_id')
442 class Meta(OrderedModel
.Meta
):
444 # a URL is unique per scope
447 # URLs of an object must be ordered, so that no two slugs of one
448 # object have the same order key
449 ('content_type', 'object_id', 'order'),
453 verbose_name_plural
= 'URLs'
455 def get_default_scope(self
):
456 return self
.content_object
.scope
460 class UrlsMixin(models
.Model
):
461 """ Methods for working with URL objects """
463 urls
= GenericRelation(URL
, related_query_name
='urls')
470 """ The main URL of the model """
471 # We could also use self.urls.first() here, but this would result in a
472 # different query and would render a .prefetch_related('urls') useless
473 # The assumption is that we will never have loads of URLS, so
474 # fetching all won't hurt
475 urls
= list(self
.urls
.all())
476 return urls
[0].url
if urls
else None
478 def add_missing_urls(self
, new_urls
):
479 """ Adds missing URLS from new_urls
481 The order of existing URLs is not changed """
482 existing_urls
= self
.urls
.all()
483 next_order
= max([-1] + [u
.order
for u
in existing_urls
]) + 1
484 existing_urls
= [u
.url
for u
in existing_urls
]
487 if url
in existing_urls
:
491 URL
.objects
.create(url
=url
,
497 except IntegrityError
as ie
:
499 logger
.warn(u
'Could not add URL: {0}'.format(err
))
502 def set_url(self
, url
):
503 """ Sets the canonical URL """
505 urls
= [u
.url
for u
in self
.urls
.all()]
512 def set_urls(self
, urls
):
513 """ Update the object's URLS to the given list
515 'urls' should be a list of strings. Slugs that do not exist are
516 created. Existing urls that are not in the 'urls' list are
518 urls
= [utils
.to_maxlength(URL
, 'url', url
) for url
in urls
]
519 existing
= {u
.url
: u
for u
in self
.urls
.all()}
520 utils
.set_ordered_entries(self
, urls
, existing
, URL
, 'url',
524 class MergedUUID(models
.Model
):
525 """ If objects are merged their UUIDs are stored for later reference
527 see also :class:`MergedUUIDsMixin`
530 uuid
= models
.UUIDField(unique
=True)
532 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
533 content_type
= models
.ForeignKey(ContentType
, on_delete
=models
.PROTECT
)
534 object_id
= models
.UUIDField()
535 content_object
= GenericForeignKey('content_type', 'object_id')
538 verbose_name
= 'Merged UUID'
539 verbose_name_plural
= 'Merged UUIDs'
542 class MergedUUIDsMixin(models
.Model
):
543 """ Methods for working with MergedUUID objects """
545 merged_uuids
= GenericRelation(MergedUUID
,
546 related_query_name
='merged_uuids')
554 class Podcast(UUIDModel
, TitleModel
, DescriptionModel
, LinkModel
,
555 LanguageModel
, LastUpdateModel
, UpdateInfoModel
, LicenseModel
,
556 FlattrModel
, ContentTypesModel
, MergedIdsModel
, OutdatedModel
,
557 AuthorModel
, UrlsMixin
, SlugsMixin
, TagsMixin
, MergedUUIDsMixin
,
561 logo_url
= models
.URLField(null
=True, max_length
=1000)
562 group
= models
.ForeignKey(PodcastGroup
, null
=True,
563 on_delete
=models
.PROTECT
)
564 group_member_name
= models
.CharField(max_length
=30, null
=True, blank
=False)
566 # if p1 is related to p2, p2 is also related to p1
567 related_podcasts
= models
.ManyToManyField('self', symmetrical
=True)
569 subscribers
= models
.PositiveIntegerField(default
=0)
570 restrictions
= models
.CharField(max_length
=20, null
=False, blank
=True,
572 common_episode_title
= models
.CharField(max_length
=100, null
=False, blank
=True)
573 new_location
= models
.URLField(max_length
=1000, null
=True, blank
=False)
574 latest_episode_timestamp
= models
.DateTimeField(null
=True)
575 episode_count
= models
.PositiveIntegerField(default
=0)
576 hub
= models
.URLField(null
=True)
578 # Interval between episodes, within a specified range
579 update_interval
= models
.PositiveSmallIntegerField(null
=False,
580 default
=DEFAULT_UPDATE_INTERVAL
)
582 # factor to increase update_interval if an update does not find any
584 update_interval_factor
= models
.FloatField(default
=1)
586 # "order" value of the most recent episode (will be the highest of all)
587 max_episode_order
= models
.PositiveIntegerField(null
=True, default
=None)
589 # indicates whether the search index is up-to-date (or needs updating)
590 search_index_uptodate
= models
.BooleanField(default
=False, db_index
=True)
592 # search vector for full-text search
593 search_vector
= SearchVectorField(null
=True)
595 objects
= PodcastManager()
602 def subscriber_count(self
):
604 return self
.subscribers
606 def group_with(self
, other
, grouptitle
, myname
, othername
):
607 """ Group the podcast with another one """
608 # TODO: move to PodcastGroup?
610 if bool(self
.group
) and (self
.group
== other
.group
):
611 # they are already grouped
617 if group1
and group2
:
618 raise ValueError('both podcasts already are in different groups')
620 elif not (group1
or group2
):
623 group
= PodcastGroup
.objects
.create(id=uuid
.uuid1(), title
=grouptitle
)
624 self
.group_member_name
= myname
628 other
.group_member_name
= othername
635 # add other to self's group
636 other
.group_member_name
= othername
642 # add self to other's group
643 self
.group_member_name
= myname
648 def get_common_episode_title(self
, num_episodes
=100):
650 if self
.common_episode_title
:
651 return self
.common_episode_title
653 episodes
= self
.episode_set
.all()[:num_episodes
]
655 # We take all non-empty titles
656 titles
= [_f
for _f
in (e
.title
for e
in episodes
) if _f
]
658 # there can not be a "common" title of a single title
662 # get the longest common substring
663 common_title
= utils
.longest_substr(titles
)
665 # but consider only the part up to the first number. Otherwise we risk
666 # removing part of the number (eg if a feed contains episodes 100-199)
667 common_title
= re
.search(r
'^\D*', common_title
).group(0)
669 if len(common_title
.strip()) < 2:
675 def get_episode_before(self
, episode
):
676 if not episode
.released
:
678 return self
.episode_set
.filter(released__lt
=episode
.released
).latest()
680 def get_episode_after(self
, episode
):
681 if not episode
.released
:
683 return self
.episode_set
.filter(released__gt
=episode
.released
).first()
687 """ A podcast is always in the global scope """
692 """ If models use this object as scope, they'll use this value """
696 def display_title(self
):
697 """ a title for display purposes """
702 logger
.warn('Podcast with ID {podcast_id} does not have a URL'
703 .format(podcast_id
=self
.id))
704 return _('Unknown Podcast')
706 return _('Unknown Podcast from {domain}'.format(
707 domain
=utils
.get_domain(self
.url
)))
710 def next_update(self
):
711 interval
= (timedelta(hours
=self
.update_interval
) *
712 self
.update_interval_factor
)
713 return self
.last_update
+ interval
716 class EpisodeQuerySet(MergedUUIDQuerySet
):
717 """ QuerySet for Episodes """
719 def toplist(self
, language
=None):
722 toplist
= toplist
.filter(language
=language
)
724 return toplist
.order_by('-listeners')
727 class EpisodeManager(GenericManager
):
728 """ Custom queries for Episodes """
730 def get_queryset(self
):
731 return EpisodeQuerySet(self
.model
, using
=self
._db
)
733 def get_or_create_for_url(self
, podcast
, url
, defaults
={}):
734 """ Create an Episode for a given URL
736 This is the only place where new episodes are created """
739 raise ValueError('The URL must not be empty')
741 # TODO: where to specify how uuid is created?
744 url
= utils
.to_maxlength(URL
, 'url', url
)
747 url
= URL
.objects
.get(url
=url
, scope
=podcast
.as_scope
)
749 episode
= url
.content_object
753 with transaction
.atomic():
754 episode
= Episode
.objects
.create(podcast
=podcast
,
758 url
.content_object
= episode
762 return GetCreateResult(episode
, created
)
765 except URL
.DoesNotExist
:
766 # episode did not exist, try to create it
768 with transaction
.atomic():
769 episode
= Episode
.objects
.create(podcast
=podcast
,
773 url
= URL
.objects
.create(url
=url
,
776 content_object
=episode
,
779 # Keep episode_count up to date here; it is not
780 # recalculated when updating the podcast because counting
781 # episodes can be very slow for podcasts with many episodes
782 Podcast
.objects
.filter(pk
=podcast
.pk
)\
783 .update(episode_count
=F('episode_count')+1)
785 return GetCreateResult(episode
, True)
787 # URL could not be created, so it was created since the first get
788 except IntegrityError
:
789 episode
= Episode
.objects
.get(urls__url
=url
,
790 urls__scope
=podcast
.as_scope
,
792 return GetCreateResult(episode
, False)
795 class Episode(UUIDModel
, TitleModel
, DescriptionModel
, LinkModel
,
796 LanguageModel
, LastUpdateModel
, UpdateInfoModel
, LicenseModel
,
797 FlattrModel
, ContentTypesModel
, MergedIdsModel
, OutdatedModel
,
798 AuthorModel
, UrlsMixin
, SlugsMixin
, MergedUUIDsMixin
,
799 OptionallyOrderedModel
):
802 guid
= models
.CharField(max_length
=200, null
=True)
803 content
= models
.TextField()
804 released
= models
.DateTimeField(null
=True, db_index
=True)
805 duration
= models
.BigIntegerField(null
=True)
806 filesize
= models
.BigIntegerField(null
=True)
807 mimetypes
= models
.CharField(max_length
=200)
808 podcast
= models
.ForeignKey(Podcast
, on_delete
=models
.PROTECT
)
809 listeners
= models
.PositiveIntegerField(null
=True, db_index
=True)
811 objects
= EpisodeManager()
814 ordering
= ['-order', '-released']
817 ('podcast', 'outdated', 'released'),
818 ('podcast', 'released'),
819 ('released', 'podcast'),
821 # index for typical episode toplist queries
822 ('language', 'listeners'),
824 ('podcast', 'order', 'released'),
829 """ An episode's scope is its podcast """
830 return self
.podcast
.id.hex
833 def display_title(self
):
834 # TODO: return basename of URL (see Podcast.display_title)
837 def get_short_title(self
, common_title
):
838 """ Title when used within the podcast's context """
839 if not self
.title
or not common_title
:
842 title
= self
.title
.replace(common_title
, '').strip()
843 title
= re
.sub(r
'^[\W\d]+', '', title
)
847 def get_episode_number(self
, common_title
):
848 """ Number of the episode """
849 if not self
.title
or not common_title
:
852 title
= self
.title
.replace(common_title
, '').strip()
853 match
= re
.search(r
'^\W*(\d+)', title
)
857 return int(match
.group(1))
860 class Tag(models
.Model
):
861 """ Tags any kind of Model
863 See also :class:`TagsMixin`
872 (DELICIOUS
, 'delicious'),
876 tag
= models
.SlugField()
878 # indicates where the tag came from
879 source
= models
.PositiveSmallIntegerField(choices
=SOURCE_CHOICES
)
881 # the user that created the tag (if it was created by a user,
883 user
= models
.ForeignKey(settings
.AUTH_USER_MODEL
, null
=True,
884 on_delete
=models
.CASCADE
)
886 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
887 content_type
= models
.ForeignKey(ContentType
, on_delete
=models
.PROTECT
)
888 object_id
= models
.UUIDField()
889 content_object
= GenericForeignKey('content_type', 'object_id')
893 # a tag can only be assigned once from one source to one item
894 ('tag', 'source', 'user', 'content_type', 'object_id'),