From: Stefan Kögl Date: Tue, 11 Jun 2013 16:35:50 +0000 (+0200) Subject: normalize_feed_url() encodes characters X-Git-Url: https://repo.or.cz/w/mygpo.git/commitdiff_plain/596dfe9802d93658edcd2e0cc5d5d437f7144195 normalize_feed_url() encodes characters --- diff --git a/mygpo/utils.py b/mygpo/utils.py index c4824049..d0605d65 100644 --- a/mygpo/utils.py +++ b/mygpo/utils.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # # This file is part of my.gpodder.org. # @@ -933,12 +934,19 @@ def normalize_feed_url(url): >>> normalize_feed_url('http://w%20x:y%20z@example.org/') 'http://example.org/' >>> normalize_feed_url('http://example.com/x@y:z@test.com/') - 'http://example.com/x@y:z@test.com/' + 'http://example.com/x%40y%3Az%40test.com/' + >>> normalize_feed_url('http://en.wikipedia.org/wiki/Ä') + 'http://en.wikipedia.org/wiki/%C3%84' + >>> normalize_feed_url('http://en.wikipedia.org/w/index.php?title=Ä&action=edit') + 'http://en.wikipedia.org/w/index.php?title=%C3%84&action=edit' """ url = url.strip() if not url or len(url) < 8: return None + if isinstance(url, unicode): + url = url.encode('utf-8', 'ignore') + # This is a list of prefixes that you can use to minimize the amount of # keystrokes that you have to use. # Feel free to suggest other useful prefixes, and I'll add them here. @@ -966,6 +974,10 @@ def normalize_feed_url(url): # Schemes and domain names are case insensitive scheme, netloc = scheme.lower(), netloc.lower() + # encode non-encoded characters + path = urllib.quote(path, '/%') + query = urllib.quote_plus(query, ':&=') + # Remove authentication to protect users' privacy netloc = netloc.rsplit('@', 1)[-1]