From 11b7ccbf18815b8577be89ecb5004d01184f68b6 Mon Sep 17 00:00:00 2001 From: Stefan Koegl Date: Mon, 21 Jun 2010 21:23:39 +0200 Subject: [PATCH] store mimetype for each episode --- install/update-35.sql | 3 +++ mygpo/api/models/__init__.py | 1 + mygpo/data/feeddownloader.py | 30 +++++++++++++++++++----------- 3 files changed, 23 insertions(+), 11 deletions(-) create mode 100644 install/update-35.sql diff --git a/install/update-35.sql b/install/update-35.sql new file mode 100644 index 00000000..62102147 --- /dev/null +++ b/install/update-35.sql @@ -0,0 +1,3 @@ +alter table episode add column mimetype varchar(30); +create index mimetype on episode(mimetype); + diff --git a/mygpo/api/models/__init__.py b/mygpo/api/models/__init__.py index beaf4622..2815952f 100644 --- a/mygpo/api/models/__init__.py +++ b/mygpo/api/models/__init__.py @@ -277,6 +277,7 @@ class Episode(models.Model): language = models.CharField(max_length=10, null=True, blank=True) last_update = models.DateTimeField(auto_now=True) outdated = models.BooleanField(default=False) #set to true after episode hasn't been found in feed + mimetype = models.CharField(max_length=30, blank=True, null=True) def number(self): m = re.search('\D*(\d+)\D+', self.title) diff --git a/mygpo/data/feeddownloader.py b/mygpo/data/feeddownloader.py index e4d4f175..cef91bbe 100755 --- a/mygpo/data/feeddownloader.py +++ b/mygpo/data/feeddownloader.py @@ -46,15 +46,16 @@ def mark_outdated(podcast): e.save() -def check_mime(mimetype, url): - """Check if a mimetype is a "wanted" media type""" +def get_mimetype(mimetype, url): + """Returns the mimetype if its a "wanted" media type, otherwise None""" if not mimetype: mimetype, _encoding = mimetypes.guess_type(url) - if not mimetype: - return False + return mimetype + +def check_mimetype(mimetype): if '/' in mimetype: category, type = mimetype.split('/', 1) if category in ('audio', 'video', 'image'): @@ -64,21 +65,27 @@ def check_mime(mimetype, url): # but we do not want to accept all files with application category if type in ('ogg', ): return True + return False else: return False + def get_episode_url(entry): """Get the download / episode URL of a feedparser entry""" enclosures = getattr(entry, 'enclosures', []) for enclosure in enclosures: - if 'href' in enclosure and check_mime(enclosure.get('type', ''), enclosure['href']): - return enclosure['href'] + if 'href' in enclosure: + mimetype = get_mimetype(enclosure.get('type', ''), enclosure['href']) + if check_mimetype(mimetype): + return enclosure['href'], mimetype media_content = getattr(entry, 'media_content', []) for media in media_content: - if 'url' in media and check_mime(media.get('type', ''), media['url']): - return media['url'] + if 'url' in media: + mimetype = get_mimetype(media.get('type', ''), media['url']) + if check_mimetype(mimetype): + return media['url'], mimetype links = getattr(entry, 'links', []) for link in links: @@ -147,7 +154,7 @@ def update_feed_tags(podcast, tags): PodcastTag.objects.create(podcast=podcast, source=src, tag=tag) -def get_episode_metadata(entry, url): +def get_episode_metadata(entry, url, mimetype): d = { 'url': url, 'title': entry.get('title', entry.get('link', '')), @@ -159,6 +166,7 @@ def get_episode_metadata(entry, url): 'filesize': get_filesize(entry, url), 'language': entry.get('language', ''), 'outdated': False, + 'mimetype': mimetype, } try: d['timestamp'] = datetime.datetime(*(entry.updated_parsed)[:6]) @@ -232,13 +240,13 @@ def update_podcasts(fetch_queue): for entry in feed.entries: try: - url = get_episode_url(entry) + url, mimetype = get_episode_url(entry) if url is None: print 'Ignoring entry' continue url = sanitize_url(url, podcast=False, episode=True) - md = get_episode_metadata(entry, url) + md = get_episode_metadata(entry, url, mimetype) e, created = models.Episode.objects.get_or_create( podcast=podcast, url=url, -- 2.11.4.GIT