4 from datetime
import datetime
6 from django
.conf
import settings
7 from django
.db
import models
, transaction
, IntegrityError
8 from django
.db
.models
import F
9 from django
.utils
.translation
import ugettext
as _
10 from django
.contrib
.contenttypes
.models
import ContentType
11 from django
.contrib
.contenttypes
.fields
import (GenericRelation
,
14 from mygpo
import utils
15 from mygpo
.core
.models
import (TwitterModel
, UUIDModel
, GenericManager
,
16 UpdateInfoModel
, OrderedModel
, OptionallyOrderedModel
)
19 logger
= logging
.getLogger(__name__
)
22 # default podcast update interval in hours
23 DEFAULT_UPDATE_INTERVAL
= 7 * 24
25 # minium podcast update interval in hours
26 MIN_UPDATE_INTERVAL
= 5
28 # every podcast should be updated at least once a month
29 MAX_UPDATE_INTERVAL
= 24 * 30
32 class TitleModel(models
.Model
):
33 """ Model that has a title """
35 title
= models
.CharField(max_length
=1000, null
=False, blank
=True,
37 subtitle
= models
.TextField(null
=False, blank
=True)
46 class DescriptionModel(models
.Model
):
47 """ Model that has a description """
49 description
= models
.TextField(null
=False, blank
=True)
55 class LinkModel(models
.Model
):
56 """ Model that has a link """
58 link
= models
.URLField(null
=True, max_length
=1000)
64 class LanguageModel(models
.Model
):
65 """ Model that has a language """
67 language
= models
.CharField(max_length
=10, null
=True, blank
=False,
74 class LastUpdateModel(models
.Model
):
75 """ Model with timestamp of last update from its source """
77 # date and time at which the model has last been updated from its source
78 # (eg a podcast feed). None means that the object has been created as a
79 # stub, without information from the source.
80 last_update
= models
.DateTimeField(null
=True)
86 class LicenseModel(models
.Model
):
87 # URL to a license (usually Creative Commons)
88 license
= models
.CharField(max_length
=100, null
=True, blank
=False,
95 class FlattrModel(models
.Model
):
96 # A Flattr payment URL
97 flattr_url
= models
.URLField(null
=True, blank
=False, max_length
=1000,
104 class ContentTypesModel(models
.Model
):
105 # contains a comma-separated values of content types, eg 'audio,video'
106 content_types
= models
.CharField(max_length
=20, null
=False, blank
=True)
112 class MergedIdsModel(models
.Model
):
118 class OutdatedModel(models
.Model
):
119 outdated
= models
.BooleanField(default
=False, db_index
=True)
125 class AuthorModel(models
.Model
):
126 author
= models
.CharField(max_length
=350, null
=True, blank
=True)
132 class MergedUUIDQuerySet(models
.QuerySet
):
133 """ QuerySet for Models inheriting from MergedUUID """
135 def get_by_any_id(self
, id):
136 """ Find am Episode by its own ID or by a merged ID """
137 # TODO: should this be done in the model?
139 return self
.get(id=id)
140 except self
.model
.DoesNotExist
:
141 return self
.get(merged_uuids__uuid
=id)
144 class TagsMixin(models
.Model
):
145 """ Methods for working with Tag objects """
147 tags
= GenericRelation('Tag', related_query_name
='tags')
153 class ScopedModel(models
.Model
):
154 """ A model that belongs to some scope, usually for limited uniqueness
156 scope does not allow null values, because null is not equal to null in SQL.
157 It could therefore not be used in unique constraints. """
159 # A slug / URL is unique within a scope; no two podcasts can have the same
160 # URL (scope ''), and no two episdoes of the same podcast (scope =
161 # podcast-ID) can have the same URL
162 scope
= models
.CharField(max_length
=32, null
=False, blank
=True,
168 def get_default_scope(self
):
169 """ Returns the default scope of the object """
170 raise NotImplementedError('{cls} should implement get_default_scope'
171 .format(cls
=self
.__class
__.__name
__))
175 class Slug(OrderedModel
, ScopedModel
):
176 """ Slug for any kind of Model
178 Slugs are ordered, and the first slug is considered the canonical one.
179 See also :class:`SlugsMixin`
182 slug
= models
.SlugField(max_length
=150, db_index
=True)
184 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
185 content_type
= models
.ForeignKey(ContentType
, on_delete
=models
.PROTECT
)
186 object_id
= models
.UUIDField()
187 content_object
= GenericForeignKey('content_type', 'object_id')
189 class Meta(OrderedModel
.Meta
):
191 # a slug is unique per type; eg a podcast can have the same slug
192 # as an episode, but no two podcasts can have the same slug
195 # slugs of an object must be ordered, so that no two slugs of one
196 # object have the same order key
197 ('content_type', 'object_id', 'order'),
201 ('slug', 'content_type')
205 return '{cls}(slug={slug}, order={order}, content_object={obj}'.format(
206 cls
=self
.__class
__.__name
__,
209 obj
=self
.content_object
214 class SlugsMixin(models
.Model
):
215 """ Methods for working with Slug objects """
217 slugs
= GenericRelation(Slug
, related_query_name
='slugs')
224 """ The main slug of the podcast
226 TODO: should be retrieved from a (materialized) view """
228 # We could also use self.slugs.first() here, but this would result in a
229 # different query and would render a .prefetch_related('slugs') useless
230 # The assumption is that we will never have loads of slugs, so
231 # fetching all won't hurt
232 slugs
= list(self
.slugs
.all())
233 slug
= slugs
[0].slug
if slugs
else None
234 logger
.debug('Found slugs %r, picking %r', slugs
, slug
)
238 def add_slug(self
, slug
):
239 """ Adds a (non-cannonical) slug """
242 raise ValueError("'%s' is not a valid slug" % slug
)
244 existing_slugs
= self
.slugs
.all()
246 # cut slug to the maximum allowed length
247 slug
= utils
.to_maxlength(Slug
, 'slug', slug
)
249 # check if slug already exists
250 if slug
in [s
.slug
for s
in existing_slugs
]:
253 max_order
= max([-1] + [s
.order
for s
in existing_slugs
])
254 next_order
= max_order
+ 1
255 Slug
.objects
.create(scope
=self
.scope
,
261 def set_slug(self
, slug
):
262 """ Sets the canonical slug """
264 slugs
= [s
.slug
for s
in self
.slugs
.all()]
268 slugs
.insert(0, slug
)
269 self
.set_slugs(slugs
)
272 def remove_slug(self
, slug
):
273 """ Removes a slug """
276 content_type
=ContentType
.objects
.get_for_model(self
),
281 def set_slugs(self
, slugs
):
282 """ Update the object's slugs to the given list
284 'slugs' should be a list of strings. Slugs that do not exist are
285 created. Existing slugs that are not in the 'slugs' list are
287 slugs
= [utils
.to_maxlength(Slug
, 'slug', slug
) for slug
in slugs
]
288 existing
= {s
.slug
: s
for s
in self
.slugs
.all()}
289 utils
.set_ordered_entries(self
, slugs
, existing
, Slug
, 'slug',
294 class PodcastGroup(UUIDModel
, TitleModel
, SlugsMixin
):
295 """ Groups multiple podcasts together """
299 """ A podcast group is always in the global scope """
302 def subscriber_count(self
):
303 # this could be done directly in the DB
304 return sum([p
.subscriber_count() for p
in self
.podcast_set
.all()] + [0])
308 podcast
= self
.podcast_set
.first()
312 class PodcastQuerySet(MergedUUIDQuerySet
):
313 """ Custom queries for Podcasts """
318 Excludes podcasts with missing title to guarantee some
319 minimum quality of the results """
321 # Using PostgreSQL's RANDOM() is very expensive, so we're generating a
322 # random uuid and query podcasts with a higher ID
323 # This returns podcasts in order of their ID, but the assumption is
324 # that usually only one podcast will be required anyway
327 return self
.exclude(title
='').filter(id__gt
=ruuid
)
330 """ Podcasts providing Flattr information """
331 return self
.exclude(flattr_url__isnull
=True)
333 def license(self
, license_url
=None):
334 """ Podcasts with any / the given license """
336 return self
.filter(license
=license_url
)
338 return self
.exclude(license__isnull
=True)
340 def order_by_next_update(self
):
341 """ Sort podcasts by next scheduled update """
342 NEXTUPDATE
= "last_update + (update_interval || ' hours')::INTERVAL"
343 q
= self
.extra(select
={'next_update': NEXTUPDATE
})
344 return q
.order_by('next_update')
346 def next_update_between(self
, start
, end
):
347 NEXTUPDATE_BETWEEN
= ("(last_update + (update_interval || "
348 "' hours')::INTERVAL) BETWEEN %s AND %s")
350 where
=[NEXTUPDATE_BETWEEN
], params
=[start
, end
]
353 def toplist(self
, language
=None):
356 toplist
= toplist
.filter(language
=language
)
358 return toplist
.order_by('-subscribers')
361 class PodcastManager(GenericManager
):
362 """ Manager for the Podcast model """
364 def get_queryset(self
):
365 return PodcastQuerySet(self
.model
, using
=self
._db
)
368 def get_or_create_for_url(self
, url
, defaults
={}):
371 raise ValueError('The URL must not be empty')
373 # TODO: where to specify how uuid is created?
379 url
= utils
.to_maxlength(URL
, 'url', url
)
381 # try to fetch the podcast
382 return Podcast
.objects
.get(urls__url
=url
,
385 except Podcast
.DoesNotExist
:
386 # episode did not exist, try to create it
388 with transaction
.atomic():
389 podcast
= Podcast
.objects
.create(**defaults
)
390 url
= URL
.objects
.create(url
=url
,
393 content_object
=podcast
,
397 # URL could not be created, so it was created since the first get
398 except IntegrityError
:
399 return Podcast
.objects
.get(urls__url
=url
,
404 class URL(OrderedModel
, ScopedModel
):
405 """ Podcasts and Episodes can have multiple URLs
407 URLs are ordered, and the first slug is considered the canonical one """
409 url
= models
.URLField(max_length
=2048)
411 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
412 content_type
= models
.ForeignKey(ContentType
, on_delete
=models
.PROTECT
)
413 object_id
= models
.UUIDField()
414 content_object
= GenericForeignKey('content_type', 'object_id')
416 class Meta(OrderedModel
.Meta
):
418 # a URL is unique per scope
421 # URLs of an object must be ordered, so that no two slugs of one
422 # object have the same order key
423 ('content_type', 'object_id', 'order'),
427 verbose_name_plural
= 'URLs'
429 def get_default_scope(self
):
430 return self
.content_object
.scope
434 class UrlsMixin(models
.Model
):
435 """ Methods for working with URL objects """
437 urls
= GenericRelation(URL
, related_query_name
='urls')
444 """ The main URL of the model """
445 # We could also use self.urls.first() here, but this would result in a
446 # different query and would render a .prefetch_related('urls') useless
447 # The assumption is that we will never have loads of URLS, so
448 # fetching all won't hurt
449 urls
= list(self
.urls
.all())
450 return urls
[0].url
if urls
else None
452 def add_missing_urls(self
, new_urls
):
453 """ Adds missing URLS from new_urls
455 The order of existing URLs is not changed """
456 existing_urls
= self
.urls
.all()
457 next_order
= max([-1] + [u
.order
for u
in existing_urls
]) + 1
458 existing_urls
= [u
.url
for u
in existing_urls
]
461 if url
in existing_urls
:
465 URL
.objects
.create(url
=url
,
471 except IntegrityError
as ie
:
473 logger
.warn(u
'Could not add URL: {0}'.format(err
))
476 def set_url(self
, url
):
477 """ Sets the canonical URL """
479 urls
= [u
.url
for u
in self
.urls
.all()]
486 def set_urls(self
, urls
):
487 """ Update the object's URLS to the given list
489 'urls' should be a list of strings. Slugs that do not exist are
490 created. Existing urls that are not in the 'urls' list are
492 urls
= [utils
.to_maxlength(URL
, 'url', url
) for url
in urls
]
493 existing
= {u
.url
: u
for u
in self
.urls
.all()}
494 utils
.set_ordered_entries(self
, urls
, existing
, URL
, 'url',
498 class MergedUUID(models
.Model
):
499 """ If objects are merged their UUIDs are stored for later reference
501 see also :class:`MergedUUIDsMixin`
504 uuid
= models
.UUIDField(unique
=True)
506 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
507 content_type
= models
.ForeignKey(ContentType
, on_delete
=models
.PROTECT
)
508 object_id
= models
.UUIDField()
509 content_object
= GenericForeignKey('content_type', 'object_id')
512 verbose_name
= 'Merged UUID'
513 verbose_name_plural
= 'Merged UUIDs'
516 class MergedUUIDsMixin(models
.Model
):
517 """ Methods for working with MergedUUID objects """
519 merged_uuids
= GenericRelation(MergedUUID
,
520 related_query_name
='merged_uuids')
528 class Podcast(UUIDModel
, TitleModel
, DescriptionModel
, LinkModel
,
529 LanguageModel
, LastUpdateModel
, UpdateInfoModel
, LicenseModel
,
530 FlattrModel
, ContentTypesModel
, MergedIdsModel
, OutdatedModel
,
531 AuthorModel
, UrlsMixin
, SlugsMixin
, TagsMixin
, MergedUUIDsMixin
,
535 logo_url
= models
.URLField(null
=True, max_length
=1000)
536 group
= models
.ForeignKey(PodcastGroup
, null
=True,
537 on_delete
=models
.PROTECT
)
538 group_member_name
= models
.CharField(max_length
=30, null
=True, blank
=False)
540 # if p1 is related to p2, p2 is also related to p1
541 related_podcasts
= models
.ManyToManyField('self', symmetrical
=True)
543 subscribers
= models
.PositiveIntegerField(default
=0)
544 restrictions
= models
.CharField(max_length
=20, null
=False, blank
=True,
546 common_episode_title
= models
.CharField(max_length
=100, null
=False, blank
=True)
547 new_location
= models
.URLField(max_length
=1000, null
=True, blank
=False)
548 latest_episode_timestamp
= models
.DateTimeField(null
=True)
549 episode_count
= models
.PositiveIntegerField(default
=0)
550 hub
= models
.URLField(null
=True)
551 update_interval
= models
.PositiveSmallIntegerField(null
=False,
552 default
=DEFAULT_UPDATE_INTERVAL
)
554 # "order" value of the most recent episode (will be the highest of all)
555 max_episode_order
= models
.PositiveIntegerField(null
=True, default
=None)
557 objects
= PodcastManager()
559 def subscriber_count(self
):
561 return self
.subscribers
563 def group_with(self
, other
, grouptitle
, myname
, othername
):
564 """ Group the podcast with another one """
565 # TODO: move to PodcastGroup?
567 if bool(self
.group
) and (self
.group
== other
.group
):
568 # they are already grouped
574 if group1
and group2
:
575 raise ValueError('both podcasts already are in different groups')
577 elif not (group1
or group2
):
580 group
= PodcastGroup
.objects
.create(id=uuid
.uuid1(), title
=grouptitle
)
581 self
.group_member_name
= myname
585 other
.group_member_name
= othername
592 # add other to self's group
593 other
.group_member_name
= othername
599 # add self to other's group
600 self
.group_member_name
= myname
605 def get_common_episode_title(self
, num_episodes
=100):
607 if self
.common_episode_title
:
608 return self
.common_episode_title
610 episodes
= self
.episode_set
.all()[:num_episodes
]
612 # We take all non-empty titles
613 titles
= [_f
for _f
in (e
.title
for e
in episodes
) if _f
]
615 # there can not be a "common" title of a single title
619 # get the longest common substring
620 common_title
= utils
.longest_substr(titles
)
622 # but consider only the part up to the first number. Otherwise we risk
623 # removing part of the number (eg if a feed contains episodes 100-199)
624 common_title
= re
.search(r
'^\D*', common_title
).group(0)
626 if len(common_title
.strip()) < 2:
632 def get_episode_before(self
, episode
):
633 if not episode
.released
:
635 return self
.episode_set
.filter(released__lt
=episode
.released
).latest()
637 def get_episode_after(self
, episode
):
638 if not episode
.released
:
640 return self
.episode_set
.filter(released__gt
=episode
.released
).first()
644 """ A podcast is always in the global scope """
649 """ If models use this object as scope, they'll use this value """
653 def display_title(self
):
654 """ a title for display purposes """
659 logger
.warn('Podcast with ID {podcast_id} does not have a URL'
660 .format(podcast_id
=self
.id))
661 return _('Unknown Podcast')
663 return _('Unknown Podcast from {domain}'.format(
664 domain
=utils
.get_domain(self
.url
)))
667 class EpisodeQuerySet(MergedUUIDQuerySet
):
668 """ QuerySet for Episodes """
670 def toplist(self
, language
=None):
673 toplist
= toplist
.filter(language
=language
)
675 return toplist
.order_by('-listeners')
678 class EpisodeManager(GenericManager
):
679 """ Custom queries for Episodes """
681 def get_queryset(self
):
682 return EpisodeQuerySet(self
.model
, using
=self
._db
)
684 def get_or_create_for_url(self
, podcast
, url
, defaults
={}):
685 """ Create an Episode for a given URL
687 This is the only place where new episodes are created """
690 raise ValueError('The URL must not be empty')
692 # TODO: where to specify how uuid is created?
695 url
= utils
.to_maxlength(URL
, 'url', url
)
698 # try to fetch the episode
699 return Episode
.objects
.get(urls__url
=url
,
700 urls__scope
=podcast
.as_scope
,
702 except Episode
.DoesNotExist
:
703 # episode did not exist, try to create it
705 with transaction
.atomic():
706 episode
= Episode
.objects
.create(podcast
=podcast
,
710 url
= URL
.objects
.create(url
=url
,
713 content_object
=episode
,
716 # Keep episode_count up to date here; it is not
717 # recalculated when updating the podcast because counting
718 # episodes can be very slow for podcasts with many episodes
719 Podcast
.objects
.filter(pk
=podcast
.pk
)\
720 .update(episode_count
=F('episode_count')+1)
724 # URL could not be created, so it was created since the first get
725 except IntegrityError
:
726 return Episode
.objects
.get(urls__url
=url
,
727 urls__scope
=podcast
.as_scope
,
731 class Episode(UUIDModel
, TitleModel
, DescriptionModel
, LinkModel
,
732 LanguageModel
, LastUpdateModel
, UpdateInfoModel
, LicenseModel
,
733 FlattrModel
, ContentTypesModel
, MergedIdsModel
, OutdatedModel
,
734 AuthorModel
, UrlsMixin
, SlugsMixin
, MergedUUIDsMixin
,
735 OptionallyOrderedModel
):
738 guid
= models
.CharField(max_length
=200, null
=True)
739 content
= models
.TextField()
740 released
= models
.DateTimeField(null
=True, db_index
=True)
741 duration
= models
.BigIntegerField(null
=True)
742 filesize
= models
.BigIntegerField(null
=True)
743 mimetypes
= models
.CharField(max_length
=200)
744 podcast
= models
.ForeignKey(Podcast
, on_delete
=models
.PROTECT
)
745 listeners
= models
.PositiveIntegerField(null
=True, db_index
=True)
747 objects
= EpisodeManager()
750 ordering
= ['-order', '-released']
753 ('podcast', 'outdated', 'released'),
754 ('podcast', 'released'),
755 ('released', 'podcast'),
757 # index for typical episode toplist queries
758 ('language', 'listeners'),
760 ('podcast', 'order', 'released'),
765 """ An episode's scope is its podcast """
766 return self
.podcast
.id.hex
769 def display_title(self
):
770 # TODO: return basename of URL (see Podcast.display_title)
773 def get_short_title(self
, common_title
):
774 """ Title when used within the podcast's context """
775 if not self
.title
or not common_title
:
778 title
= self
.title
.replace(common_title
, '').strip()
779 title
= re
.sub(r
'^[\W\d]+', '', title
)
783 def get_episode_number(self
, common_title
):
784 """ Number of the episode """
785 if not self
.title
or not common_title
:
788 title
= self
.title
.replace(common_title
, '').strip()
789 match
= re
.search(r
'^\W*(\d+)', title
)
793 return int(match
.group(1))
796 class Tag(models
.Model
):
797 """ Tags any kind of Model
799 See also :class:`TagsMixin`
808 (DELICIOUS
, 'delicious'),
812 tag
= models
.SlugField()
814 # indicates where the tag came from
815 source
= models
.PositiveSmallIntegerField(choices
=SOURCE_CHOICES
)
817 # the user that created the tag (if it was created by a user,
819 user
= models
.ForeignKey(settings
.AUTH_USER_MODEL
, null
=True,
820 on_delete
=models
.CASCADE
)
822 # see https://docs.djangoproject.com/en/1.6/ref/contrib/contenttypes/#generic-relations
823 content_type
= models
.ForeignKey(ContentType
, on_delete
=models
.PROTECT
)
824 object_id
= models
.UUIDField()
825 content_object
= GenericForeignKey('content_type', 'object_id')
829 # a tag can only be assigned once from one source to one item
830 ('tag', 'source', 'user', 'content_type', 'object_id'),