1 # -*- coding: utf-8 -*-
3 # This file is part of my.gpodder.org.
5 # my.gpodder.org is free software: you can redistribute it and/or modify it
6 # under the terms of the GNU Affero General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or (at your
8 # option) any later version.
10 # my.gpodder.org is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
13 # License for more details.
15 # You should have received a copy of the GNU Affero General Public License
16 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
28 from datetime
import datetime
, timedelta
, date
32 import urllib
.request
, urllib
.parse
, urllib
.error
33 import urllib
.request
, urllib
.error
, urllib
.parse
37 from django
.db
import transaction
, IntegrityError
38 from django
.conf
import settings
39 from django
.core
.urlresolvers
import reverse
41 from mygpo
.core
.json
import json
44 logger
= logging
.getLogger(__name__
)
47 def daterange(from_date
, to_date
=None, leap
=timedelta(days
=1)):
49 >>> from_d = datetime(2010, 01, 01)
50 >>> to_d = datetime(2010, 01, 05)
51 >>> list(daterange(from_d, to_d))
52 [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
56 if isinstance(from_date
, datetime
):
57 to_date
= datetime
.utcnow()
59 to_date
= date
.today()
61 while from_date
<= to_date
:
63 from_date
= from_date
+ leap
66 def format_time(value
):
67 """Format an offset (in seconds) to a string
69 The offset should be an integer or float value.
77 >>> format_time(10921)
81 dt
= datetime
.utcfromtimestamp(value
)
82 except (ValueError, TypeError):
86 return dt
.strftime('%M:%S')
88 return dt
.strftime('%H:%M:%S')
90 def parse_time(value
):
95 >>> parse_time('05:10') #5*60+10
98 >>> parse_time('1:05:10') #60*60+5*60+10
102 raise ValueError('None value in parse_time')
104 if isinstance(value
, int):
105 # Don't need to parse already-converted time value
109 raise ValueError('Empty valueing in parse_time')
111 for format
in ('%H:%M:%S', '%M:%S'):
113 t
= time
.strptime(value
, format
)
114 return t
.tm_hour
* 60*60 + t
.tm_min
* 60 + t
.tm_sec
115 except ValueError as e
:
123 >>> parse_bool('True')
126 >>> parse_bool('true')
132 if isinstance(val
, bool):
134 if val
.lower() == 'true':
139 def iterate_together(lists
, key
=lambda x
: x
, reverse
=False):
141 takes ordered, possibly sparse, lists with similar items
142 (some items have a corresponding item in the other lists, some don't).
144 It then yield tuples of corresponding items, where one element is None is
145 there is no corresponding entry in one of the lists.
147 Tuples where both elements are None are skipped.
149 The results of the key method are used for the comparisons.
151 If reverse is True, the lists are expected to be sorted in reverse order
152 and the results will also be sorted reverse
154 >>> list(iterate_together([range(1, 3), range(1, 4, 2)]))
155 [(1, 1), (2, None), (None, 3)]
157 >>> list(iterate_together([[], []]))
160 >>> list(iterate_together([range(1, 3), range(3, 5)]))
161 [(1, None), (2, None), (None, 3), (None, 4)]
163 >>> list(iterate_together([range(1, 3), []]))
164 [(1, None), (2, None)]
166 >>> list(iterate_together([[1, None, 3], [None, None, 3]]))
170 Next
= collections
.namedtuple('Next', 'item more')
171 min_
= min if not reverse
else max
172 lt_
= operator
.lt
if not reverse
else operator
.gt
174 lists
= [iter(l
) for l
in lists
]
182 except StopIteration:
183 return Next(None, False)
186 return [None]*len(lists
)
188 # take first bunch of items
189 items
= [_take(l
) for l
in lists
]
191 while any(i
.item
is not None or i
.more
for i
in items
):
195 for n
, item
in enumerate(items
):
197 if item
.item
is None:
200 if all(x
is None for x
in res
):
204 min_v
= min_([x
for x
in res
if x
is not None], key
=key
)
206 if key(item
.item
) == key(min_v
):
209 elif lt_(key(item
.item
), key(min_v
)):
213 for n
, x
in enumerate(res
):
215 items
[n
] = _take(lists
[n
])
220 def progress(val
, max_val
, status_str
='', max_width
=50, stream
=sys
.stdout
):
222 factor
= float(val
)/max_val
if max_val
> 0 else 0
224 # progress as percentage
225 percentage_str
= '{val:.2%}'.format(val
=factor
)
227 # progress bar filled with #s
228 factor
= min(int(factor
*max_width
), max_width
)
229 progress_str
= '#' * factor
+ ' ' * (max_width
-factor
)
231 #insert percentage into bar
232 percentage_start
= int((max_width
-len(percentage_str
))/2)
233 progress_str
= progress_str
[:percentage_start
] + \
235 progress_str
[percentage_start
+len(percentage_str
):]
237 print('\r', end
=' ', file=stream
)
238 print('[ %s ] %s / %s | %s' % (
242 status_str
), end
=' ', file=stream
)
246 def set_cmp(list, simplify
):
248 Builds a set out of a list but uses the results of simplify to determine equality between items
250 simpl
= lambda x
: (simplify(x
), x
)
251 lst
= dict(list(map(simpl
, list)))
252 return list(lst
.values())
257 returns the first not-None object or None if the iterator is exhausted
266 return list(set(a
) & set(b
))
270 def remove_control_chars(s
):
271 all_chars
= (chr(i
) for i
in range(0x110000))
272 control_chars
= ''.join(map(chr, list(range(0,32)) + list(range(127,160))))
273 control_char_re
= re
.compile('[%s]' % re
.escape(control_chars
))
275 return control_char_re
.sub('', s
)
279 return tuple(map(list,list(zip(*a
))))
282 def parse_range(s
, min, max, default
=None):
284 Parses the string and returns its value. If the value is outside the given
285 range, its closest number within the range is returned
287 >>> parse_range('5', 0, 10)
290 >>> parse_range('0', 5, 10)
293 >>> parse_range('15',0, 10)
296 >>> parse_range('x', 0, 20)
299 >>> parse_range('x', 0, 20, 20)
310 except (ValueError, TypeError):
311 return default
if default
is not None else (max-min)/2
316 return [item
for sublist
in l
for item
in sublist
]
319 def linearize(key
, iterators
, reverse
=False):
321 Linearizes a number of iterators, sorted by some comparison function
324 iters
= [iter(i
) for i
in iterators
]
329 vals
. append( (v
, i
) )
330 except StopIteration:
334 vals
= sorted(vals
, key
=lambda x
: key(x
[0]), reverse
=reverse
)
335 val
, it
= vals
.pop(0)
339 vals
.append( (next_val
, it
) )
340 except StopIteration:
344 def skip_pairs(iterator
, cmp=cmp):
345 """ Skips pairs of equal items
347 >>> list(skip_pairs([]))
350 >>> list(skip_pairs([1]))
353 >>> list(skip_pairs([1, 2, 3]))
356 >>> list(skip_pairs([1, 1]))
359 >>> list(skip_pairs([1, 2, 2]))
362 >>> list(skip_pairs([1, 2, 2, 3]))
365 >>> list(skip_pairs([1, 2, 2, 2]))
368 >>> list(skip_pairs([1, 2, 2, 2, 2, 3]))
372 iterator
= iter(iterator
)
373 next
= next(iterator
)
378 next
= next(iterator
)
379 except StopIteration as e
:
383 if cmp(item
, next
) == 0:
384 next
= next(iterator
)
389 def get_timestamp(datetime_obj
):
390 """ Returns the timestamp as an int for the given datetime object
392 >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
395 >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
398 return int(time
.mktime(datetime_obj
.timetuple()))
402 re_url
= re
.compile('^https?://')
405 """ Returns true if a string looks like an URL
407 >>> is_url('http://example.com/some-path/file.xml')
410 >>> is_url('something else')
414 return bool(re_url
.match(string
))
418 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
419 # this does not increase asymptotical complexity
420 # but can still waste more time than it saves.
421 def shortest_of(strings
):
422 return min(strings
, key
=len)
424 def longest_substr(strings
):
426 Returns the longest common substring of the given strings
432 reference
= shortest_of(strings
)
433 length
= len(reference
)
434 #find a suitable slice i:j
435 for i
in range(length
):
436 #only consider strings long at least len(substr) + 1
437 for j
in range(i
+ len(substr
) + 1, length
):
438 candidate
= reference
[i
:j
]
439 if all(candidate
in text
for text
in strings
):
445 def additional_value(it
, gen_val
, val_changed
=lambda _
: True):
446 """ Provides an additional value to the elements, calculated when needed
448 For the elements from the iterator, some additional value can be computed
449 by gen_val (which might be an expensive computation).
451 If the elements in the iterator are ordered so that some subsequent
452 elements would generate the same additional value, val_changed can be
453 provided, which receives the next element from the iterator and the
454 previous additional value. If the element would generate the same
455 additional value (val_changed returns False), its computation is skipped.
457 >>> # get the next full hundred higher than x
458 >>> # this will probably be an expensive calculation
459 >>> next_hundred = lambda x: x + 100-(x % 100)
461 >>> # returns True if h is not the value that next_hundred(x) would provide
462 >>> # this should be a relatively cheap calculation, compared to the above
463 >>> diff_hundred = lambda x, h: (h-x) < 0 or (h - x) > 100
465 >>> xs = [0, 50, 100, 101, 199, 200, 201]
466 >>> list(additional_value(xs, next_hundred, diff_hundred))
467 [(0, 100), (50, 100), (100, 100), (101, 200), (199, 200), (200, 200), (201, 300)]
474 if current
is _none
or val_changed(x
, current
):
480 def file_hash(f
, h
=hashlib
.md5
, block_size
=2**20):
481 """ returns the hash of the contents of a file """
483 for chunk
in iter(lambda: f
.read(block_size
), ''):
489 def split_list(l
, prop
):
490 """ split elements that satisfy a property, and those that don't """
491 match
= list(filter(prop
, l
))
492 nomatch
= [x
for x
in l
if x
not in match
]
493 return match
, nomatch
496 def sorted_chain(links
, key
, reverse
=False):
497 """ Takes a list of iters can iterates over sorted elements
499 Each elment of links should be a tuple of (sort_key, iterator). The
500 elements of each iterator should be sorted already. sort_key should
501 indicate the key of the first element and needs to be comparable to the
504 The function returns an iterator over the globally sorted element that
505 ensures that as little iterators as possible are evaluated. When
508 # mixed_list initially contains all placeholders; later evaluated
509 # elements (from the iterators) are mixed in
510 mixed_list
= [(k
, link
, True) for k
, link
in links
]
513 _
, item
, expand
= mixed_list
.pop(0)
515 # found an element (from an earlier expansion), yield it
520 # found an iter that needs to be expanded.
521 # The iterator is fully consumed
522 new_items
= [(key(i
), i
, False) for i
in item
]
524 # sort links (placeholders) and elements together
525 mixed_list
= sorted(mixed_list
+ new_items
, key
=lambda k__v__e
: k__v__e
[0],
529 def url_add_authentication(url
, username
, password
):
531 Adds authentication data (username, password) to a given
532 URL in order to construct an authenticated URL.
534 >>> url_add_authentication('https://host.com/', '', None)
536 >>> url_add_authentication('http://example.org/', None, None)
537 'http://example.org/'
538 >>> url_add_authentication('telnet://host.com/', 'foo', 'bar')
539 'telnet://foo:bar@host.com/'
540 >>> url_add_authentication('ftp://example.org', 'billy', None)
541 'ftp://billy@example.org'
542 >>> url_add_authentication('ftp://example.org', 'billy', '')
543 'ftp://billy:@example.org'
544 >>> url_add_authentication('http://localhost/x', 'aa', 'bc')
545 'http://aa:bc@localhost/x'
546 >>> url_add_authentication('http://blubb.lan/u.html', 'i/o', 'P@ss:')
547 'http://i%2Fo:P@ss:@blubb.lan/u.html'
548 >>> url_add_authentication('http://a:b@x.org/', 'c', 'd')
550 >>> url_add_authentication('http://i%2F:P%40%3A@cx.lan', 'P@x', 'i/')
551 'http://P@x:i%2F@cx.lan'
552 >>> url_add_authentication('http://x.org/', 'a b', 'c d')
553 'http://a%20b:c%20d@x.org/'
555 if username
is None or username
== '':
558 # Relaxations of the strict quoting rules (bug 1521):
559 # 1. Accept '@' in username and password
560 # 2. Acecpt ':' in password only
561 username
= urllib
.parse
.quote(username
, safe
='@')
563 if password
is not None:
564 password
= urllib
.parse
.quote(password
, safe
='@:')
565 auth_string
= ':'.join((username
, password
))
567 auth_string
= username
569 url
= url_strip_authentication(url
)
571 url_parts
= list(urllib
.parse
.urlsplit(url
))
572 # url_parts[1] is the HOST part of the URL
573 url_parts
[1] = '@'.join((auth_string
, url_parts
[1]))
575 return urllib
.parse
.urlunsplit(url_parts
)
578 def urlopen(url
, headers
=None, data
=None):
580 An URL opener with the User-agent set to gPodder (with version)
582 username
, password
= username_password_from_url(url
)
583 if username
is not None or password
is not None:
584 url
= url_strip_authentication(url
)
585 password_mgr
= urllib
.request
.HTTPPasswordMgrWithDefaultRealm()
586 password_mgr
.add_password(None, url
, username
, password
)
587 handler
= urllib
.request
.HTTPBasicAuthHandler(password_mgr
)
588 opener
= urllib
.request
.build_opener(handler
)
590 opener
= urllib
.request
.build_opener()
595 headers
= dict(headers
)
597 headers
.update({'User-agent': settings
.USER_AGENT
})
598 request
= urllib
.request
.Request(url
, data
=data
, headers
=headers
)
599 return opener
.open(request
)
603 def username_password_from_url(url
):
605 Returns a tuple (username,password) containing authentication
606 data from the specified URL or (None,None) if no authentication
607 data can be found in the URL.
609 See Section 3.1 of RFC 1738 (http://www.ietf.org/rfc/rfc1738.txt)
611 >>> username_password_from_url('https://@host.com/')
613 >>> username_password_from_url('telnet://host.com/')
615 >>> username_password_from_url('ftp://foo:@host.com/')
617 >>> username_password_from_url('http://a:b@host.com/')
619 >>> username_password_from_url(1)
620 Traceback (most recent call last):
622 ValueError: URL has to be a string or unicode object.
623 >>> username_password_from_url(None)
624 Traceback (most recent call last):
626 ValueError: URL has to be a string or unicode object.
627 >>> username_password_from_url('http://a@b:c@host.com/')
629 >>> username_password_from_url('ftp://a:b:c@host.com/')
631 >>> username_password_from_url('http://i%2Fo:P%40ss%3A@host.com/')
633 >>> username_password_from_url('ftp://%C3%B6sterreich@host.com/')
634 ('\xc3\xb6sterreich', None)
635 >>> username_password_from_url('http://w%20x:y%20z@example.org/')
637 >>> username_password_from_url('http://example.com/x@y:z@test.com/')
640 if type(url
) not in (str, str):
641 raise ValueError('URL has to be a string or unicode object.')
643 (username
, password
) = (None, None)
645 (scheme
, netloc
, path
, params
, query
, fragment
) = urllib
.parse
.urlparse(url
)
648 (authentication
, netloc
) = netloc
.rsplit('@', 1)
649 if ':' in authentication
:
650 (username
, password
) = authentication
.split(':', 1)
652 # RFC1738 dictates that we should not allow ['/', '@', ':']
653 # characters in the username and password field (Section 3.1):
655 # 1. The "/" can't be in there at this point because of the way
656 # urlparse (which we use above) works.
657 # 2. Due to gPodder bug 1521, we allow "@" in the username and
658 # password field. We use netloc.rsplit('@', 1), which will
659 # make sure that we split it at the last '@' in netloc.
660 # 3. The colon must be excluded (RFC2617, Section 2) in the
661 # username, but is apparently allowed in the password. This
662 # is handled by the authentication.split(':', 1) above, and
663 # will cause any extraneous ':'s to be part of the password.
665 username
= urllib
.parse
.unquote(username
)
666 password
= urllib
.parse
.unquote(password
)
668 username
= urllib
.parse
.unquote(authentication
)
670 return (username
, password
)
673 def url_strip_authentication(url
):
675 Strips authentication data from an URL. Returns the URL with
676 the authentication data removed from it.
678 >>> url_strip_authentication('https://host.com/')
680 >>> url_strip_authentication('telnet://foo:bar@host.com/')
682 >>> url_strip_authentication('ftp://billy@example.org')
684 >>> url_strip_authentication('ftp://billy:@example.org')
686 >>> url_strip_authentication('http://aa:bc@localhost/x')
688 >>> url_strip_authentication('http://i%2Fo:P%40ss%3A@blubb.lan/u.html')
689 'http://blubb.lan/u.html'
690 >>> url_strip_authentication('http://c:d@x.org/')
692 >>> url_strip_authentication('http://P%40%3A:i%2F@cx.lan')
694 >>> url_strip_authentication('http://x@x.com:s3cret@example.com/')
695 'http://example.com/'
697 url_parts
= list(urllib
.parse
.urlsplit(url
))
698 # url_parts[1] is the HOST part of the URL
700 # Remove existing authentication data
701 if '@' in url_parts
[1]:
702 url_parts
[1] = url_parts
[1].rsplit('@', 1)[1]
704 return urllib
.parse
.urlunsplit(url_parts
)
707 # Native filesystem encoding detection
708 encoding
= sys
.getfilesystemencoding()
710 def sanitize_encoding(filename
):
712 Generate a sanitized version of a string (i.e.
713 remove invalid characters and encode in the
714 detected native language encoding).
716 >>> sanitize_encoding('\x80')
718 >>> sanitize_encoding(u'unicode')
721 # The encoding problem goes away in Python 3.. hopefully!
722 if sys
.version_info
>= (3, 0):
726 if not isinstance(filename
, str):
727 filename
= filename
.decode(encoding
, 'ignore')
728 return filename
.encode(encoding
, 'ignore')
732 """ returns the commit and message of the current git HEAD """
735 pr
= subprocess
.Popen('/usr/bin/git log -n 1 --oneline'.split(),
736 cwd
= settings
.BASE_DIR
,
737 stdout
= subprocess
.PIPE
,
738 stderr
= subprocess
.PIPE
,
744 (out
, err
) = pr
.communicate()
750 msg
= ' ' .join(outs
[1:])
755 # https://gist.github.com/samuraisam/901117
757 default_fudge
= timedelta(seconds
=0, microseconds
=0, days
=0)
759 def deep_eq(_v1
, _v2
, datetime_fudge
=default_fudge
, _assert
=False):
761 Tests for deep equality between two python data structures recursing
762 into sub-structures if necessary. Works with all python types including
763 iterators and generators. This function was dreampt up to test API responses
764 but could be used for anything. Be careful. With deeply nested structures
765 you may blow the stack.
768 datetime_fudge => this is a datetime.timedelta object which, when
769 comparing dates, will accept values that differ
770 by the number of seconds specified
771 _assert => passing yes for this will raise an assertion error
772 when values do not match, instead of returning
773 false (very useful in combination with pdb)
777 >>> x1, y1 = ({'a': 'b'}, {'a': 'b'})
780 >>> x2, y2 = ({'a': 'b'}, {'b': 'a'})
783 >>> x3, y3 = ({'a': {'b': 'c'}}, {'a': {'b': 'c'}})
786 >>> x4, y4 = ({'c': 't', 'a': {'b': 'c'}}, {'a': {'b': 'n'}, 'c': 't'})
789 >>> x5, y5 = ({'a': [1,2,3]}, {'a': [1,2,3]})
792 >>> x6, y6 = ({'a': [1,'b',8]}, {'a': [2,'b',8]})
795 >>> x7, y7 = ('a', 'a')
798 >>> x8, y8 = (['p','n',['asdf']], ['p','n',['asdf']])
801 >>> x9, y9 = (['p','n',['asdf',['omg']]], ['p', 'n', ['asdf',['nowai']]])
804 >>> x10, y10 = (1, 2)
805 >>> deep_eq(x10, y10)
807 >>> deep_eq((str(p) for p in xrange(10)), (str(p) for p in xrange(10)))
809 >>> str(deep_eq(range(4), range(4)))
811 >>> deep_eq(xrange(100), xrange(100))
813 >>> deep_eq(xrange(2), xrange(5))
815 >>> from datetime import datetime, timedelta
816 >>> d1, d2 = (datetime.utcnow(), datetime.utcnow() + timedelta(seconds=4))
819 >>> deep_eq(d1, d2, datetime_fudge=timedelta(seconds=5))
822 _deep_eq
= functools
.partial(deep_eq
, datetime_fudge
=datetime_fudge
,
825 def _check_assert(R
, a
, b
, reason
=''):
826 if _assert
and not R
:
827 assert 0, "an assertion has failed in deep_eq (%s) %s != %s" % (
828 reason
, str(a
), str(b
))
831 def _deep_dict_eq(d1
, d2
):
832 k1
, k2
= (sorted(d1
.keys()), sorted(d2
.keys()))
833 if k1
!= k2
: # keys should be exactly equal
834 return _check_assert(False, k1
, k2
, "keys")
836 return _check_assert(operator
.eq(sum(_deep_eq(d1
[k
], d2
[k
])
838 len(k1
)), d1
, d2
, "dictionaries")
840 def _deep_iter_eq(l1
, l2
):
841 if len(l1
) != len(l2
):
842 return _check_assert(False, l1
, l2
, "lengths")
843 return _check_assert(operator
.eq(sum(_deep_eq(v1
, v2
)
844 for v1
, v2
in zip(l1
, l2
)),
845 len(l1
)), l1
, l2
, "iterables")
849 if type(a
) == datetime
and type(b
) == datetime
:
850 s
= datetime_fudge
.seconds
851 t1
, t2
= (time
.mktime(a
.timetuple()), time
.mktime(b
.timetuple()))
853 l
= -l
if l
> 0 else l
854 return _check_assert((-s
if s
> 0 else s
) <= l
, a
, b
, "dates")
855 return _check_assert(_op(a
, b
), a
, b
, "values")
859 # guard against strings because they are iterable and their
860 # elements yield iterables infinitely.
863 if isinstance(_v1
, t
):
866 if isinstance(_v1
, dict):
870 c1
, c2
= (list(iter(_v1
)), list(iter(_v2
)))
879 def parse_request_body(request
):
880 """ returns the parsed request body, handles gzip encoding """
882 raw_body
= request
.body
883 content_enc
= request
.META
.get('HTTP_CONTENT_ENCODING')
885 if content_enc
== 'gzip':
886 raw_body
= zlib
.decompress(raw_body
)
888 return json
.loads(raw_body
)
891 def normalize_feed_url(url
):
893 Converts any URL to http:// or ftp:// so that it can be
894 used with "wget". If the URL cannot be converted (invalid
895 or unknown scheme), "None" is returned.
897 This will also normalize feed:// and itpc:// to http://.
899 >>> normalize_feed_url('itpc://example.org/podcast.rss')
900 'http://example.org/podcast.rss'
902 If no URL scheme is defined (e.g. "curry.com"), we will
903 simply assume the user intends to add a http:// feed.
905 >>> normalize_feed_url('curry.com')
908 There are even some more shortcuts for advanced users
909 and lazy typists (see the source for details).
911 >>> normalize_feed_url('fb:43FPodcast')
912 'http://feeds.feedburner.com/43FPodcast'
914 It will also take care of converting the domain name to
915 all-lowercase (because domains are not case sensitive):
917 >>> normalize_feed_url('http://Example.COM/')
918 'http://example.com/'
920 Some other minimalistic changes are also taken care of,
921 e.g. a ? with an empty query is removed:
923 >>> normalize_feed_url('http://example.org/test?')
924 'http://example.org/test'
926 Leading and trailing whitespace is removed
928 >>> normalize_feed_url(' http://example.com/podcast.rss ')
929 'http://example.com/podcast.rss'
931 HTTP Authentication is removed to protect users' privacy
933 >>> normalize_feed_url('http://a@b:c@host.com/')
935 >>> normalize_feed_url('ftp://a:b:c@host.com/')
937 >>> normalize_feed_url('http://i%2Fo:P%40ss%3A@host.com/')
939 >>> normalize_feed_url('ftp://%C3%B6sterreich@host.com/')
941 >>> normalize_feed_url('http://w%20x:y%20z@example.org/')
942 'http://example.org/'
943 >>> normalize_feed_url('http://example.com/x@y:z@test.com/')
944 'http://example.com/x%40y%3Az%40test.com/'
945 >>> normalize_feed_url('http://en.wikipedia.org/wiki/Ä')
946 'http://en.wikipedia.org/wiki/%C3%84'
947 >>> normalize_feed_url('http://en.wikipedia.org/w/index.php?title=Ä&action=edit')
948 'http://en.wikipedia.org/w/index.php?title=%C3%84&action=edit'
951 if not url
or len(url
) < 8:
954 if isinstance(url
, str):
955 url
= url
.encode('utf-8', 'ignore')
957 # This is a list of prefixes that you can use to minimize the amount of
958 # keystrokes that you have to use.
959 # Feel free to suggest other useful prefixes, and I'll add them here.
961 'fb:': 'http://feeds.feedburner.com/%s',
962 'yt:': 'http://www.youtube.com/rss/user/%s/videos.rss',
963 'sc:': 'http://soundcloud.com/%s',
964 'fm4od:': 'http://onapp1.orf.at/webcam/fm4/fod/%s.xspf',
965 # YouTube playlists. To get a list of playlists per-user, use:
966 # https://gdata.youtube.com/feeds/api/users/<username>/playlists
967 'ytpl:': 'http://gdata.youtube.com/feeds/api/playlists/%s',
970 for prefix
, expansion
in PREFIXES
.items():
971 if url
.startswith(prefix
):
972 url
= expansion
% (url
[len(prefix
):],)
975 # Assume HTTP for URLs without scheme
977 url
= 'http://' + url
979 scheme
, netloc
, path
, query
, fragment
= urllib
.parse
.urlsplit(url
)
981 # Schemes and domain names are case insensitive
982 scheme
, netloc
= scheme
.lower(), netloc
.lower()
984 # encode non-encoded characters
985 path
= urllib
.parse
.quote(path
, '/%')
986 query
= urllib
.parse
.quote_plus(query
, ':&=')
988 # Remove authentication to protect users' privacy
989 netloc
= netloc
.rsplit('@', 1)[-1]
991 # Normalize empty paths to "/"
995 # feed://, itpc:// and itms:// are really http://
996 if scheme
in ('feed', 'itpc', 'itms'):
999 if scheme
not in ('http', 'https', 'ftp', 'file'):
1002 # urlunsplit might return "a slighty different, but equivalent URL"
1003 return urllib
.parse
.urlunsplit((scheme
, netloc
, path
, query
, fragment
))
1006 def partition(items
, predicate
=bool):
1007 a
, b
= itertools
.tee((predicate(item
), item
) for item
in items
)
1008 return ((item
for pred
, item
in a
if not pred
),
1009 (item
for pred
, item
in b
if pred
))
1012 def split_quoted(s
):
1013 """ Splits a quoted string
1015 >>> split_quoted('some "quoted text"') == ['some', 'quoted text']
1018 >>> split_quoted('"quoted text') == ['quoted', 'text']
1021 # 4 quotes here are 2 in the doctest is one in the actual string
1022 >>> split_quoted('text\\\\') == ['text']
1027 # split by whitespace, preserve quoted substrings
1028 keywords
= shlex
.split(s
)
1031 # No closing quotation (eg '"text')
1032 # No escaped character (eg '\')
1033 s
= s
.replace('"', '').replace("'", '').replace('\\', '')
1034 keywords
= shlex
.split(s
)
1040 """ Return the link to the Django Admin Edit page """
1041 return reverse('admin:%s_%s_change' % (obj
._meta
.app_label
,
1042 obj
._meta
.module_name
),
1046 def random_token(length
=32):
1049 return "".join(random
.sample(string
.letters
+string
.digits
, length
))
1052 def to_maxlength(cls
, field
, val
):
1053 """ Cut val to the maximum length of cls's field """
1057 max_length
= cls
._meta
.get_field(field
).max_length
1058 orig_length
= len(val
)
1059 if orig_length
> max_length
:
1060 val
= val
[:max_length
]
1061 logger
.warn('%s.%s length reduced from %d to %d',
1062 cls
.__name
__, field
, orig_length
, max_length
)
1067 def get_domain(url
):
1068 """ Returns the domain name of a URL
1070 >>> get_domain('http://example.com')
1073 >>> get_domain('https://example.com:80/my-podcast/feed.rss')
1076 netloc
= urllib
.parse
.urlparse(url
).netloc
1078 port_idx
= netloc
.index(':')
1079 return netloc
[:port_idx
]
1085 def set_ordered_entries(obj
, new_entries
, existing
, EntryClass
,
1086 value_name
, parent_name
):
1087 """ Update the object's entries to the given list
1089 'new_entries' should be a list of objects that are later wrapped in
1090 EntryClass instances. 'value_name' is the name of the EntryClass property
1091 that contains the values; 'parent_name' is the one that references obj.
1093 Entries that do not exist are created. Existing entries that are not in
1094 'new_entries' are deleted. """
1096 logger
.info('%d existing entries', len(existing
))
1098 logger
.info('%d new entries', len(new_entries
))
1100 with transaction
.atomic():
1101 max_order
= max([s
.order
for s
in list(existing
.values())] +
1103 logger
.info('Renumbering entries starting from %d', max_order
+1)
1104 for n
, entry
in enumerate(list(existing
.values()), max_order
+1):
1108 logger
.info('%d existing entries', len(existing
))
1110 for n
, entry
in enumerate(new_entries
):
1112 e
= existing
.pop(entry
)
1113 logger
.info('Updating existing entry %d: %s', n
, entry
)
1117 logger
.info('Creating new entry %d: %s', n
, entry
)
1123 from mygpo
.podcasts
.models
import ScopedModel
1124 if issubclass(EntryClass
, ScopedModel
):
1125 links
['scope'] = obj
.scope
1127 EntryClass
.objects
.create(order
=n
, **links
)
1128 except IntegrityError
as ie
:
1129 logger
.warn('Could not create enry for %s: %s', obj
, ie
)
1131 with transaction
.atomic():
1132 delete
= [s
.pk
for s
in list(existing
.values())]
1133 logger
.info('Deleting %d entries', len(delete
))
1134 EntryClass
.objects
.filter(id__in
=delete
).delete()