From 1f5e8a20c2e2c674ad017714d6ba8a025d11f7bf Mon Sep 17 00:00:00 2001
From: =?utf8?q?Stefan=20K=C3=B6gl?= <stefan@skoegl.net>
Date: Sun, 31 May 2015 14:00:07 +0200
Subject: [PATCH] Update podcasts from feedservice

---
 doc/dev/python3.rst                               |   1 -
 mygpo/data/feeddownloader.py                      | 912 +++++++++++-----------
 mygpo/data/management/commands/feed-downloader.py |   5 +-
 mygpo/data/tasks.py                               |   5 +-
 mygpo/directory/search.py                         |   7 +-
 mygpo/directory/views.py                          |  16 +-
 mygpo/settings.py                                 |   2 +
 mygpo/share/views.py                              |   5 +-
 requirements.txt                                  |   2 +-
 9 files changed, 479 insertions(+), 476 deletions(-)
 rewrite mygpo/data/feeddownloader.py (76%)

diff --git a/doc/dev/python3.rst b/doc/dev/python3.rst
index 17d8f903..0bf8eb27 100644
--- a/doc/dev/python3.rst
+++ b/doc/dev/python3.rst
@@ -19,5 +19,4 @@ Not OK
 
 Unknown
 -------
-* mygpo-feedservice
 * celery-redis
diff --git a/mygpo/data/feeddownloader.py b/mygpo/data/feeddownloader.py
dissimilarity index 76%
index eec0de11..cb95b401 100755
--- a/mygpo/data/feeddownloader.py
+++ b/mygpo/data/feeddownloader.py
@@ -1,449 +1,463 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-#
-# This file is part of my.gpodder.org.
-#
-# my.gpodder.org is free software: you can redistribute it and/or modify it
-# under the terms of the GNU Affero General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or (at your
-# option) any later version.
-#
-# my.gpodder.org is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
-# License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
-#
-
-import os.path
-import urllib2
-import httplib
-import hashlib
-from datetime import datetime
-from itertools import chain, islice
-import socket
-
-from django.db import transaction
-from django.conf import settings
-
-from mygpo.podcasts.models import Podcast, URL, Slug, Episode
-from mygpo.core.slugs import assign_missing_episode_slugs, PodcastSlug
-from mygpo.podcasts.models import DEFAULT_UPDATE_INTERVAL, \
-    MIN_UPDATE_INTERVAL, MAX_UPDATE_INTERVAL
-from feedservice.parse import parse_feed, FetchFeedException
-from feedservice.parse.text import ConvertMarkdown
-from feedservice.parse.models import ParserException
-from feedservice.parse.vimeo import VimeoError
-from mygpo.utils import file_hash, to_maxlength
-from mygpo.web.logo import CoverArt
-from mygpo.data.podcast import subscribe_at_hub
-from mygpo.data.tasks import update_related_podcasts
-from mygpo.pubsub.models import SubscriptionError
-from mygpo.directory.tags import update_category
-
-import logging
-logger = logging.getLogger(__name__)
-
-MAX_EPISODES_UPDATE=200
-
-class NoPodcastCreated(Exception):
-    """ raised when no podcast obj was created for a new URL """
-
-
-class NoEpisodesException(Exception):
-    """ raised when parsing something that doesn't contain any episodes """
-
-
-class PodcastUpdater(object):
-    """ Updates a number of podcasts with data from their feeds """
-
-    def update_queue(self, queue):
-        """ Fetch data for the URLs supplied as the queue iterable """
-
-        for n, podcast_url in enumerate(queue, 1):
-            logger.info('Update %d - %s', n, podcast_url)
-            try:
-                yield self.update(podcast_url)
-
-            except NoPodcastCreated as npc:
-                logger.info('No podcast created: %s', npc)
-
-            except:
-                logger.exception('Error while updating podcast "%s"',
-                                 podcast_url)
-                raise
-
-
-    def update(self, podcast_url):
-        """ Update the podcast for the supplied URL """
-
-        try:
-            parsed = self._fetch_feed(podcast_url)
-            self._validate_parsed(parsed)
-
-        except (ParserException, FetchFeedException, NoEpisodesException,
-                VimeoError, ValueError, socket.error, urllib2.HTTPError) as ex:
-            #TODO: catch valueError (for invalid Ipv6 in feedservice)
-
-            if isinstance(ex, VimeoError):
-                logger.exception('Problem when updating Vimeo feed %s',
-                                 podcast_url)
-
-            # if we fail to parse the URL, we don't even create the
-            # podcast object
-            try:
-                p = Podcast.objects.get(urls__url=podcast_url)
-                # if it exists already, we mark it as outdated
-                self._mark_outdated(p, 'error while fetching feed: %s' %
-                    str(ex))
-                return p
-
-            except Podcast.DoesNotExist:
-                raise NoPodcastCreated(ex)
-
-        assert parsed, 'fetch_feed must return something'
-        p = Podcast.objects.get_or_create_for_url(podcast_url)
-        episodes = self._update_episodes(p, parsed.episodes)
-        max_episode_order = self._order_episodes(p)
-        self._update_podcast(p, parsed, episodes, max_episode_order)
-        return p
-
-
-    def verify_podcast_url(self, podcast_url):
-        parsed = self._fetch_feed(podcast_url)
-        self._validate_parsed(parsed)
-        return True
-
-
-    def _fetch_feed(self, podcast_url):
-        import socket
-        t = socket.getdefaulttimeout()
-        socket.setdefaulttimeout(10)
-        return parse_feed(podcast_url, text_processor=ConvertMarkdown())
-        socket.setdefaulttimeout(t)
-
-
-
-    def _validate_parsed(self, parsed):
-        """ validates the parsed results and raises an exception if invalid
-
-        feedparser parses pretty much everything. We reject anything that
-        doesn't look like a feed"""
-
-        if not parsed or not parsed.episodes:
-            raise NoEpisodesException('no episodes found')
-
-
-    def _update_podcast(self, podcast, parsed, episodes, max_episode_order):
-        """ updates a podcast according to new parser results """
-
-        # we need that later to decide if we can "bump" a category
-        prev_latest_episode_timestamp = podcast.latest_episode_timestamp
-
-        podcast.title = parsed.title or podcast.title
-        podcast.description = parsed.description or podcast.description
-        podcast.subtitle = parsed.subtitle or podcast.subtitle
-        podcast.link = parsed.link or podcast.link
-        podcast.logo_url = parsed.logo or podcast.logo_url
-        podcast.author = to_maxlength(Podcast, 'author', parsed.author or podcast.author)
-        podcast.language = to_maxlength(Podcast, 'language', parsed.language or podcast.language)
-        podcast.content_types = ','.join(parsed.content_types) or podcast.content_types
-#podcast.tags['feed'] = parsed.tags or podcast.tags.get('feed', [])
-        podcast.common_episode_title = to_maxlength(Podcast,
-            'common_episode_title',
-            parsed.common_title or podcast.common_episode_title)
-        podcast.new_location = parsed.new_location or podcast.new_location
-        podcast.flattr_url = to_maxlength(Podcast, 'flattr_url',
-                                          parsed.flattr or podcast.flattr_url)
-        podcast.hub = parsed.hub or podcast.hub
-        podcast.license = parsed.license or podcast.license
-        podcast.max_episode_order = max_episode_order
-
-        podcast.add_missing_urls(parsed.urls)
-
-        if podcast.new_location:
-            try:
-                new_podcast = Podcast.objects.get(urls__url=podcast.new_location)
-                if new_podcast != podcast:
-                    self._mark_outdated(podcast, 'redirected to different podcast')
-                    return
-            except Podcast.DoesNotExist:
-                podcast.set_url(podcast.new_location)
-
-
-        # latest episode timestamp
-        episodes = Episode.objects.filter(podcast=podcast, released__isnull=False).order_by('released')
-
-        podcast.update_interval = get_update_interval(episodes)
-
-        latest_episode = episodes.last()
-        if latest_episode:
-            podcast.latest_episode_timestamp = latest_episode.released
-
-        # podcast.episode_count is not update here on purpose. It is, instead,
-        # continuously updated when creating new episodes in
-        # EpisodeManager.get_or_create_for_url
-
-        self._update_categories(podcast, prev_latest_episode_timestamp)
-
-        # try to download the logo and reset logo_url to None on http errors
-        found = self._save_podcast_logo(podcast.logo_url)
-        if not found:
-            podcast.logo_url = None
-
-        # The podcast is always saved (not just when there are changes) because
-        # we need to record the last update
-        logger.info('Saving podcast.')
-        podcast.last_update = datetime.utcnow()
-        podcast.save()
-
-
-        try:
-            subscribe_at_hub(podcast)
-        except SubscriptionError as se:
-            logger.warn('subscribing to hub failed: %s', str(se))
-
-
-        if not podcast.slug:
-            slug = PodcastSlug(podcast).get_slug()
-            if slug:
-                podcast.add_slug(slug)
-
-        assign_missing_episode_slugs(podcast)
-        update_related_podcasts.delay(podcast)
-
-
-    def _update_categories(self, podcast, prev_timestamp):
-        """ checks some practical requirements and updates a category """
-
-        from datetime import timedelta
-
-        max_timestamp = datetime.utcnow() + timedelta(days=1)
-
-        # no episodes at all
-        if not podcast.latest_episode_timestamp:
-            return
-
-        # no new episode
-        if prev_timestamp and podcast.latest_episode_timestamp <= prev_timestamp:
-            return
-
-        # too far in the future
-        if podcast.latest_episode_timestamp > max_timestamp:
-            return
-
-        # not enough subscribers
-        if podcast.subscriber_count() < settings.MIN_SUBSCRIBERS_CATEGORY:
-            return
-
-        update_category(podcast)
-
-
-    def _update_episodes(self, podcast, parsed_episodes):
-
-        pid = podcast.get_id()
-
-        # list of (obj, fun) where fun is the function to update obj
-        updated_episodes = []
-        episodes_to_update = list(islice(parsed_episodes, 0, MAX_EPISODES_UPDATE))
-        logger.info('Parsed %d (%d) episodes', len(parsed_episodes),
-                    len(episodes_to_update))
-
-        logger.info('Updating %d episodes', len(episodes_to_update))
-        for n, parsed in enumerate(episodes_to_update, 1):
-
-            url = get_episode_url(parsed)
-            if not url:
-                logger.info('Skipping episode %d for missing URL', n)
-                continue
-
-            logger.info('Updating episode %d / %d', n, len(parsed_episodes))
-
-            episode = Episode.objects.get_or_create_for_url(podcast, url)
-
-            update_episode(parsed, episode, podcast)
-            updated_episodes.append(episode)
-
-        # and mark the remaining ones outdated
-        current_episodes = Episode.objects.filter(podcast=podcast,
-                                                  outdated=False)[:500]
-        outdated_episodes = set(current_episodes) - set(updated_episodes)
-
-        logger.info('Marking %d episodes as outdated', len(outdated_episodes))
-        for episode in outdated_episodes:
-            mark_outdated(episode)
-
-    @transaction.atomic
-    def _order_episodes(self, podcast):
-        """ Reorder the podcast's episode according to release timestamp
-
-        Returns the highest order value (corresponding to the most recent
-        episode) """
-
-        num_episodes = podcast.episode_count
-        if not num_episodes:
-            return 0
-
-        episodes = podcast.episode_set.all().extra(select={
-                'has_released': 'released IS NOT NULL',
-            })\
-            .order_by('-has_released', '-released', 'pk')\
-            .only('pk')
-
-        for n, episode in enumerate(episodes.iterator(), 1):
-            # assign ``order`` from higher (most recent) to 0 (oldest)
-            # None means "unknown"
-            new_order = num_episodes - n
-
-            # optimize for new episodes that are newer than all existing
-            if episode.order == new_order:
-                continue
-
-            logger.info('Updating order from {} to {}'.format(episode.order,
-                                                              new_order))
-            episode.order = new_order
-            episode.save()
-
-        return num_episodes -1
-
-    def _save_podcast_logo(self, cover_art):
-        if not cover_art:
-            return
-
-        try:
-            image_sha1 = hashlib.sha1(cover_art).hexdigest()
-            prefix = CoverArt.get_prefix(image_sha1)
-
-            filename = CoverArt.get_original(prefix, image_sha1)
-            dirname = CoverArt.get_dir(filename)
-
-            # get hash of existing file
-            if os.path.exists(filename):
-                with open(filename) as f:
-                    old_hash = file_hash(f).digest()
-            else:
-                old_hash = ''
-
-            logger.info('Logo %s', cover_art)
-
-            # save new cover art
-            with open(filename, 'w') as fp:
-                fp.write(urllib2.urlopen(cover_art).read())
-
-            # get hash of new file
-            with open(filename) as f:
-                new_hash = file_hash(f).digest()
-
-            # remove thumbnails if cover changed
-            if old_hash != new_hash:
-                thumbnails = CoverArt.get_existing_thumbnails(prefix, filename)
-                logger.info('Removing %d thumbnails', len(thumbnails))
-                for f in thumbnails:
-                    os.unlink(f)
-
-            return cover_art
-
-        except (urllib2.HTTPError, urllib2.URLError, ValueError,
-                httplib.BadStatusLine, socket.error, IOError) as e:
-            logger.warn('Exception while updating podcast logo: %s', str(e))
-
-
-    def _mark_outdated(self, podcast, msg=''):
-        logger.info('marking podcast outdated: %s', msg)
-        podcast.outdated = True
-        podcast.last_update = datetime.utcnow()
-        podcast.save()
-        self._update_episodes(podcast, [])
-
-
-def get_episode_url(parsed_episode):
-    """ returns the URL of a parsed episode """
-    for f in parsed_episode.files:
-        if f.urls:
-            return f.urls[0]
-    return None
-
-
-def update_episode(parsed_episode, episode, podcast):
-    """ updates "episode" with the data from "parsed_episode" """
-
-    # TODO: check if there have been any changes, to avoid unnecessary updates
-    episode.guid = to_maxlength(Episode, 'guid', parsed_episode.guid or episode.guid)
-    episode.description = parsed_episode.description or episode.description
-    episode.subtitle = parsed_episode.subtitle or episode.subtitle
-    episode.content = parsed_episode.content or parsed_episode.description or episode.content
-    episode.link = to_maxlength(Episode, 'link',
-                                parsed_episode.link or episode.link)
-    episode.released = datetime.utcfromtimestamp(parsed_episode.released) if parsed_episode.released else episode.released
-    episode.author = to_maxlength(Episode, 'author', parsed_episode.author or episode.author)
-    episode.duration = parsed_episode.duration or episode.duration
-    episode.filesize = parsed_episode.files[0].filesize
-    episode.language = parsed_episode.language or episode.language or \
-                                                  podcast.language
-    episode.mimetypes = ','.join(list(set(filter(None, [f.mimetype for f in parsed_episode.files]))))
-    episode.flattr_url = to_maxlength(Episode, 'flattr_url',
-                                      parsed_episode.flattr or
-                                      episode.flattr_url)
-    episode.license = parsed_episode.license or episode.license
-
-    episode.title = to_maxlength(Episode, 'title',
-                                 parsed_episode.title or episode.title or
-                                 file_basename_no_extension(episode.url))
-
-    episode.last_update = datetime.utcnow()
-    episode.save()
-
-    parsed_urls = list(chain.from_iterable(f.urls for f in parsed_episode.files))
-    episode.add_missing_urls(parsed_urls)
-
-
-def mark_outdated(obj):
-    """ marks obj outdated if its not already """
-    if obj.outdated:
-        return None
-
-    obj.outdated = True
-    obj.last_update = datetime.utcnow()
-    obj.save()
-
-
-def get_update_interval(episodes):
-    """ calculates the avg interval between new episodes """
-
-    count = len(episodes)
-    if not count:
-        logger.info('no episodes, using default interval of %dh',
-            DEFAULT_UPDATE_INTERVAL)
-        return DEFAULT_UPDATE_INTERVAL
-
-    earliest = episodes[0]
-    now = datetime.utcnow()
-
-    timespan_s = (now - earliest.released).total_seconds()
-    timespan_h = timespan_s / 60 / 60
-
-    interval = int(timespan_h / count)
-    logger.info('%d episodes in %d days => %dh interval', count,
-        timespan_h / 24, interval)
-
-    # place interval between {MIN,MAX}_UPDATE_INTERVAL
-    interval = max(interval, MIN_UPDATE_INTERVAL)
-    interval = min(interval, MAX_UPDATE_INTERVAL)
-
-    return interval
-
-
-def file_basename_no_extension(filename):
-    """ Returns filename without extension
-
-    >>> file_basename_no_extension('/home/me/file.txt')
-    'file'
-
-    >>> file_basename_no_extension('file')
-    'file'
-    """
-    base = os.path.basename(filename)
-    name, extension = os.path.splitext(base)
-    return name
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# This file is part of my.gpodder.org.
+#
+# my.gpodder.org is free software: you can redistribute it and/or modify it
+# under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or (at your
+# option) any later version.
+#
+# my.gpodder.org is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os.path
+import urllib2
+from urlparse import urljoin
+import httplib
+import hashlib
+from datetime import datetime, timedelta
+from itertools import chain, islice
+import socket
+import requests
+
+from django.db import transaction
+from django.conf import settings
+
+from mygpo.podcasts.models import Podcast, URL, Slug, Episode
+from mygpo.core.slugs import assign_missing_episode_slugs, PodcastSlug
+from mygpo.podcasts.models import DEFAULT_UPDATE_INTERVAL, \
+    MIN_UPDATE_INTERVAL, MAX_UPDATE_INTERVAL
+from mygpo.utils import file_hash, to_maxlength
+from mygpo.web.logo import CoverArt
+from mygpo.data.podcast import subscribe_at_hub
+from mygpo.data.tasks import update_related_podcasts
+from mygpo.pubsub.models import SubscriptionError
+from mygpo.directory.tags import update_category
+
+import logging
+logger = logging.getLogger(__name__)
+
+MAX_EPISODES_UPDATE = 200
+
+
+class UpdatePodcastException(Exception):
+    pass
+
+
+class NoPodcastCreated(Exception):
+    """ raised when no podcast obj was created for a new URL """
+
+
+class NoEpisodesException(Exception):
+    """ raised when parsing something that doesn't contain any episodes """
+
+
+def update_podcasts(queue):
+    """ Fetch data for the URLs supplied as the queue iterable """
+
+    for n, podcast_url in enumerate(queue, 1):
+        logger.info('Update %d - %s', n, podcast_url)
+        try:
+            yield update_podcast(podcast_url)
+
+        except NoPodcastCreated as npc:
+            logger.info('No podcast created: %s', npc)
+
+        except:
+            logger.exception('Error while updating podcast "%s"',
+                             podcast_url)
+            raise
+
+
+def update_podcast(podcast_url):
+    """ Update the podcast for the supplied URL """
+
+    try:
+        parsed = _fetch_feed(podcast_url)
+        _validate_parsed(parsed)
+
+    except requests.exceptions.RequestException as re:
+        logging.exception('Error while fetching response from feedservice')
+
+    except NoEpisodesException as nee:
+        logging.warn('No episode found while parsing podcast')
+
+        # if we fail to parse the URL, we don't even create the
+        # podcast object
+        try:
+            p = Podcast.objects.get(urls__url=podcast_url)
+            # if it exists already, we mark it as outdated
+            _mark_outdated(p, 'error while fetching feed: %s' % str(ex))
+            return p
+
+        except Podcast.DoesNotExist:
+            raise NoPodcastCreated(ex)
+
+    assert parsed, 'fetch_feed must return something'
+    p = Podcast.objects.get_or_create_for_url(podcast_url)
+    episodes = _update_episodes(p, parsed.get('episodes', []))
+    max_episode_order = _order_episodes(p)
+    _update_podcast(p, parsed, episodes, max_episode_order)
+    return p
+
+
+def verify_podcast_url(podcast_url):
+    parsed = _fetch_feed(podcast_url)
+    _validate_parsed(parsed)
+    return True
+
+
+def _fetch_feed(podcast_url):
+    params = {'url': podcast_url}
+    headers = {
+        'Accept': 'application/json',
+    }
+    # markdown and other parameters?
+    url = urljoin(settings.FEEDSERVICE_URL, 'parse')
+    r = requests.get(url, params=params, headers=headers, timeout=10)
+    return r.json()[0]
+
+
+def _validate_parsed(parsed):
+    """ validates the parsed results and raises an exception if invalid
+
+    feedparser parses pretty much everything. We reject anything that
+    doesn't look like a feed"""
+
+    if not parsed or not parsed.get('episodes', []):
+        raise NoEpisodesException('no episodes found')
+
+
+def _update_podcast(podcast, parsed, episodes, max_episode_order):
+    """ updates a podcast according to new parser results """
+
+    # we need that later to decide if we can "bump" a category
+    prev_latest_episode_timestamp = podcast.latest_episode_timestamp
+
+    podcast.title = parsed.get('title') or podcast.title
+    podcast.description = parsed.get('description') or podcast.description
+    podcast.subtitle = parsed.get('subtitle') or podcast.subtitle
+    podcast.link = parsed.get('link') or podcast.link
+    podcast.logo_url = parsed.get('logo') or podcast.logo_url
+    podcast.author = to_maxlength(Podcast, 'author', parsed.get('author') or
+                                  podcast.author)
+    podcast.language = to_maxlength(Podcast, 'language',
+                                    parsed.get('language') or podcast.language)
+    podcast.content_types = ','.join(parsed.get('content_types') or
+                                     podcast.content_types)
+    #podcast.tags['feed'] = parsed.tags or podcast.tags.get('feed', [])
+    podcast.common_episode_title = to_maxlength(
+        Podcast,
+        'common_episode_title',
+        parsed.get('common_title') or podcast.common_episode_title)
+    podcast.new_location = parsed.get('new_location') or podcast.new_location
+    podcast.flattr_url = to_maxlength(Podcast, 'flattr_url',
+                                      parsed.get('flattr') or
+                                      podcast.flattr_url)
+    podcast.hub = parsed.get('hub') or podcast.hub
+    podcast.license = parsed.get('license') or podcast.license
+    podcast.max_episode_order = max_episode_order
+
+    podcast.add_missing_urls(parsed.get('urls', []))
+
+    if podcast.new_location:
+        try:
+            new_podcast = Podcast.objects.get(urls__url=podcast.new_location)
+            if new_podcast != podcast:
+                _mark_outdated(podcast, 'redirected to different podcast')
+                return
+        except Podcast.DoesNotExist:
+            podcast.set_url(podcast.new_location)
+
+    # latest episode timestamp
+    episodes = Episode.objects.filter(podcast=podcast,
+                                      released__isnull=False)\
+                              .order_by('released')
+
+    podcast.update_interval = get_update_interval(episodes)
+
+    latest_episode = episodes.last()
+    if latest_episode:
+        podcast.latest_episode_timestamp = latest_episode.released
+
+    # podcast.episode_count is not update here on purpose. It is, instead,
+    # continuously updated when creating new episodes in
+    # EpisodeManager.get_or_create_for_url
+
+    _update_categories(podcast, prev_latest_episode_timestamp)
+
+    # try to download the logo and reset logo_url to None on http errors
+    found = _save_podcast_logo(podcast.logo_url)
+    if not found:
+        podcast.logo_url = None
+
+    # The podcast is always saved (not just when there are changes) because
+    # we need to record the last update
+    logger.info('Saving podcast.')
+    podcast.last_update = datetime.utcnow()
+    podcast.save()
+
+    try:
+        subscribe_at_hub(podcast)
+    except SubscriptionError as se:
+        logger.warn('subscribing to hub failed: %s', str(se))
+
+    if not podcast.slug:
+        slug = PodcastSlug(podcast).get_slug()
+        if slug:
+            podcast.add_slug(slug)
+
+    assign_missing_episode_slugs(podcast)
+    update_related_podcasts.delay(podcast)
+
+
+def _update_categories(podcast, prev_timestamp):
+    """ checks some practical requirements and updates a category """
+
+    max_timestamp = datetime.utcnow() + timedelta(days=1)
+
+    # no episodes at all
+    if not podcast.latest_episode_timestamp:
+        return
+
+    # no new episode
+    if prev_timestamp and podcast.latest_episode_timestamp <= prev_timestamp:
+        return
+
+    # too far in the future
+    if podcast.latest_episode_timestamp > max_timestamp:
+        return
+
+    # not enough subscribers
+    if podcast.subscriber_count() < settings.MIN_SUBSCRIBERS_CATEGORY:
+        return
+
+    update_category(podcast)
+
+
+def _update_episodes(podcast, parsed_episodes):
+
+    pid = podcast.get_id()
+
+    # list of (obj, fun) where fun is the function to update obj
+    updated_episodes = []
+    episodes_to_update = list(islice(parsed_episodes, 0, MAX_EPISODES_UPDATE))
+    logger.info('Parsed %d (%d) episodes', len(parsed_episodes),
+                len(episodes_to_update))
+
+    logger.info('Updating %d episodes', len(episodes_to_update))
+    for n, parsed in enumerate(episodes_to_update, 1):
+
+        url = get_episode_url(parsed)
+        if not url:
+            logger.info('Skipping episode %d for missing URL', n)
+            continue
+
+        logger.info('Updating episode %d / %d', n, len(parsed_episodes))
+
+        episode = Episode.objects.get_or_create_for_url(podcast, url)
+
+        update_episode(parsed, episode, podcast)
+        updated_episodes.append(episode)
+
+    # and mark the remaining ones outdated
+    current_episodes = Episode.objects.filter(podcast=podcast,
+                                              outdated=False)[:500]
+    outdated_episodes = set(current_episodes) - set(updated_episodes)
+
+    logger.info('Marking %d episodes as outdated', len(outdated_episodes))
+    for episode in outdated_episodes:
+        mark_outdated(episode)
+
+
+@transaction.atomic
+def _order_episodes(podcast):
+    """ Reorder the podcast's episode according to release timestamp
+
+    Returns the highest order value (corresponding to the most recent
+    episode) """
+
+    num_episodes = podcast.episode_count
+    if not num_episodes:
+        return 0
+
+    episodes = podcast.episode_set.all().extra(select={
+        'has_released': 'released IS NOT NULL',
+        })\
+        .order_by('-has_released', '-released', 'pk')\
+        .only('pk')
+
+    for n, episode in enumerate(episodes.iterator(), 1):
+        # assign ``order`` from higher (most recent) to 0 (oldest)
+        # None means "unknown"
+        new_order = num_episodes - n
+
+        # optimize for new episodes that are newer than all existing
+        if episode.order == new_order:
+            continue
+
+        logger.info('Updating order from {} to {}'.format(episode.order,
+                                                          new_order))
+        episode.order = new_order
+        episode.save()
+
+    return num_episodes - 1
+
+
+def _save_podcast_logo(cover_art):
+    if not cover_art:
+        return
+
+    try:
+        image_sha1 = hashlib.sha1(cover_art).hexdigest()
+        prefix = CoverArt.get_prefix(image_sha1)
+
+        filename = CoverArt.get_original(prefix, image_sha1)
+        dirname = CoverArt.get_dir(filename)
+
+        # get hash of existing file
+        if os.path.exists(filename):
+            with open(filename) as f:
+                old_hash = file_hash(f).digest()
+        else:
+            old_hash = ''
+
+        logger.info('Logo %s', cover_art)
+
+        # save new cover art
+        with open(filename, 'w') as fp:
+            fp.write(urllib2.urlopen(cover_art).read())
+
+        # get hash of new file
+        with open(filename) as f:
+            new_hash = file_hash(f).digest()
+
+        # remove thumbnails if cover changed
+        if old_hash != new_hash:
+            thumbnails = CoverArt.get_existing_thumbnails(prefix, filename)
+            logger.info('Removing %d thumbnails', len(thumbnails))
+            for f in thumbnails:
+                os.unlink(f)
+
+        return cover_art
+
+    except (urllib2.HTTPError, urllib2.URLError, ValueError,
+            httplib.BadStatusLine, socket.error, IOError) as e:
+        logger.warn('Exception while updating podcast logo: %s', str(e))
+
+
+def _mark_outdated(podcast, msg=''):
+    logger.info('marking podcast outdated: %s', msg)
+    podcast.outdated = True
+    podcast.last_update = datetime.utcnow()
+    podcast.save()
+    _update_episodes(podcast, [])
+
+
+def get_episode_url(parsed_episode):
+    """ returns the URL of a parsed episode """
+    for f in parsed_episode.get('files', []):
+        if f.get('urls', []):
+            return f['urls'][0]
+    return None
+
+
+def update_episode(parsed_episode, episode, podcast):
+    """ updates "episode" with the data from "parsed_episode" """
+
+    # TODO: check if there have been any changes, to avoid unnecessary updates
+    episode.guid = to_maxlength(Episode, 'guid', parsed_episode.get('guid') or
+                                episode.guid)
+    episode.description = parsed_episode.get('description') or \
+        episode.description
+    episode.subtitle = parsed_episode.get('subtitle') or episode.subtitle
+    episode.content = parsed_episode.get('content') or \
+        parsed_episode.get('description') or episode.content
+    episode.link = to_maxlength(Episode, 'link',
+                                parsed_episode.get('link') or episode.link)
+    episode.released = datetime.utcfromtimestamp(
+        parsed_episode.get('released')) if parsed_episode.get('released') \
+        else episode.released
+    episode.author = to_maxlength(Episode, 'author',
+                                  parsed_episode.get('author') or
+                                  episode.author)
+    episode.duration = parsed_episode.get('duration') or episode.duration
+    episode.filesize = parsed_episode['files'][0]['filesize']
+    episode.language = parsed_episode.get('language') or \
+        episode.language or podcast.language
+    episode.mimetypes = ','.join(list(set(
+        filter(None, [f['mimetype'] for f in parsed_episode.get('files', [])])
+    )))
+    episode.flattr_url = to_maxlength(Episode, 'flattr_url',
+                                      parsed_episode.get('flattr') or
+                                      episode.flattr_url)
+    episode.license = parsed_episode.get('license') or episode.license
+
+    episode.title = to_maxlength(Episode, 'title',
+                                 parsed_episode.get('title') or
+                                 episode.title or
+                                 file_basename_no_extension(episode.url))
+
+    episode.last_update = datetime.utcnow()
+    episode.save()
+
+    parsed_urls = list(chain.from_iterable(
+        f.get('urls', []) for f in parsed_episode.get('files', [])))
+    episode.add_missing_urls(parsed_urls)
+
+
+def mark_outdated(obj):
+    """ marks obj outdated if its not already """
+    if obj.outdated:
+        return None
+
+    obj.outdated = True
+    obj.last_update = datetime.utcnow()
+    obj.save()
+
+
+def get_update_interval(episodes):
+    """ calculates the avg interval between new episodes """
+
+    count = len(episodes)
+    if not count:
+        logger.info('no episodes, using default interval of %dh',
+                    DEFAULT_UPDATE_INTERVAL)
+        return DEFAULT_UPDATE_INTERVAL
+
+    earliest = episodes[0]
+    now = datetime.utcnow()
+
+    timespan_s = (now - earliest.released).total_seconds()
+    timespan_h = timespan_s / 60 / 60
+
+    interval = int(timespan_h / count)
+    logger.info('%d episodes in %d days => %dh interval', count,
+                timespan_h / 24, interval)
+
+    # place interval between {MIN,MAX}_UPDATE_INTERVAL
+    interval = max(interval, MIN_UPDATE_INTERVAL)
+    interval = min(interval, MAX_UPDATE_INTERVAL)
+
+    return interval
+
+
+def file_basename_no_extension(filename):
+    """ Returns filename without extension
+
+    >>> file_basename_no_extension('/home/me/file.txt')
+    'file'
+
+    >>> file_basename_no_extension('file')
+    'file'
+    """
+    base = os.path.basename(filename)
+    name, extension = os.path.splitext(base)
+    return name
diff --git a/mygpo/data/management/commands/feed-downloader.py b/mygpo/data/management/commands/feed-downloader.py
index 49d6581f..3523b17b 100644
--- a/mygpo/data/management/commands/feed-downloader.py
+++ b/mygpo/data/management/commands/feed-downloader.py
@@ -3,7 +3,7 @@ import traceback
 from optparse import make_option
 
 from mygpo.maintenance.management.podcastcmd import PodcastCommand
-from mygpo.data.feeddownloader import PodcastUpdater
+from mygpo.data.feeddownloader import update_podcasts
 
 import socket
 socket.setdefaulttimeout(300)
@@ -35,6 +35,5 @@ class Command(PodcastCommand):
         else:
             logger.info('Updating podcasts...')
 
-            updater = PodcastUpdater()
-            for podcast in updater.update_queue(queue):
+            for podcast in update_podcasts(queue):
                 logger.info('Updated podcast %s', podcast)
diff --git a/mygpo/data/tasks.py b/mygpo/data/tasks.py
index fc1b0f17..ff1143c1 100644
--- a/mygpo/data/tasks.py
+++ b/mygpo/data/tasks.py
@@ -14,9 +14,8 @@ logger = get_task_logger(__name__)
 @celery.task
 def update_podcasts(podcast_urls):
     """ Task to update a podcast """
-    from mygpo.data.feeddownloader import PodcastUpdater
-    updater = PodcastUpdater()
-    podcasts = updater.update_queue(podcast_urls)
+    from mygpo.data.feeddownloader import update_podcasts as update
+    podcasts = update(podcast_urls)
     return list(podcasts)
 
 
diff --git a/mygpo/directory/search.py b/mygpo/directory/search.py
index f8dc3793..483619a3 100644
--- a/mygpo/directory/search.py
+++ b/mygpo/directory/search.py
@@ -1,6 +1,6 @@
 from mygpo.podcasts.models import Podcast
 from mygpo.utils import is_url, normalize_feed_url
-from mygpo.data.feeddownloader import PodcastUpdater, NoPodcastCreated
+from mygpo.data.feeddownloader import update_podcast, NoPodcastCreated
 from mygpo.search.index import search_podcasts as search
 
 
@@ -15,11 +15,8 @@ def search_podcasts(q):
             podcast = None
 
         if not podcast or not podcast.title:
-
-            updater = PodcastUpdater()
-
             try:
-                updater.update(url)
+                update_podcast(url)
             except NoPodcastCreated as npc:
                 return []
 
diff --git a/mygpo/directory/views.py b/mygpo/directory/views.py
index 03f90e40..10053773 100644
--- a/mygpo/directory/views.py
+++ b/mygpo/directory/views.py
@@ -20,9 +20,6 @@ from django.contrib import messages
 from django.utils.translation import ugettext as _
 from django.contrib.auth import get_user_model
 
-from feedservice.parse.models import ParserException
-from feedservice.parse import FetchFeedException
-
 from mygpo.podcasts.models import Podcast, Episode
 from mygpo.directory.search import search_podcasts
 from mygpo.web.utils import process_lang_params, get_language_names, \
@@ -31,7 +28,8 @@ from mygpo.directory.tags import Topics
 from mygpo.users.settings import FLATTR_TOKEN
 from mygpo.categories.models import Category
 from mygpo.podcastlists.models import PodcastList
-from mygpo.data.feeddownloader import PodcastUpdater, NoEpisodesException
+from mygpo.data.feeddownloader import (verify_podcast_url, NoEpisodesException,
+    UpdatePodcastException)
 from mygpo.data.tasks import update_podcasts
 
 
@@ -267,13 +265,10 @@ class MissingPodcast(View):
             except Podcast.DoesNotExist:
                 # check if we could add a podcast for the given URL
                 podcast = False
-                updater = PodcastUpdater()
-
                 try:
-                    can_add = updater.verify_podcast_url(url)
+                    can_add = verify_podcast_url(url)
 
-                except (ParserException, FetchFeedException,
-                        NoEpisodesException) as ex:
+                except (UpdatePodcastException, NoEpisodesException) as ex:
                     can_add = False
                     messages.error(request, unicode(ex))
 
@@ -321,8 +316,7 @@ class AddPodcastStatus(TemplateView):
             podcasts = result.get()
             messages.success(request, _('%d podcasts added' % len(podcasts)))
 
-        except (ParserException, FetchFeedException,
-                NoEpisodesException) as ex:
+        except (UpdatePodcastException, NoEpisodesException) as ex:
             messages.error(request, str(ex))
             podcast = None
 
diff --git a/mygpo/settings.py b/mygpo/settings.py
index d9564fb7..28f30d2c 100644
--- a/mygpo/settings.py
+++ b/mygpo/settings.py
@@ -271,6 +271,8 @@ GOOGLE_CLIENT_SECRET=''
 SUPPORT_URL=''
 
 
+FEEDSERVICE_URL = os.getenv('FEEDSERVICE_URL', 'http://feeds.gpodder.net/')
+
 # Elasticsearch settings
 
 ELASTICSEARCH_SERVER = os.getenv('ELASTICSEARCH_SERVER', '127.0.0.1:9200')
diff --git a/mygpo/share/views.py b/mygpo/share/views.py
index 5724079b..0013cd21 100644
--- a/mygpo/share/views.py
+++ b/mygpo/share/views.py
@@ -11,7 +11,7 @@ from django.utils.decorators import method_decorator
 from mygpo.podcasts.models import Podcast
 from mygpo.publisher.models import PublishedPodcast
 from mygpo.userfeeds.feeds import FavoriteFeed
-from mygpo.data.feeddownloader import PodcastUpdater
+from mygpo.data.feeddownloader import update_podcast
 
 import logging
 logger = logging.getLogger(__name__)
@@ -100,8 +100,7 @@ class FavoritesFeedCreateEntry(View):
             publisher=user,
         )
 
-        updater = PodcastUpdater()
-        updater.update(feed_url)
+        update_podcast(feed_url)
 
         return HttpResponseRedirect(reverse('share-favorites'))
 
diff --git a/requirements.txt b/requirements.txt
index fc71e885..af2f2e40 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,7 +7,6 @@ dj-database-url==0.3.0
 django-redis-sessions==0.4.0
 django-uuidfield==0.5.0
 feedparser==5.1.3
--e git+https://github.com/gpodder/mygpo-feedservice.git@b6d2641ad395455569435d22a224094b7dcef5b9#egg=feedservice-dev
 gunicorn==19.1.1
 html2text==2014.7.3
 markdown2==2.2.2
@@ -19,3 +18,4 @@ python-memcached==1.53
 redis==2.10.3
 ujson==1.33
 django-celery==3.1.10
+requests==2.7.0
-- 
2.11.4.GIT