1 # -*- coding: utf-8 -*-
13 from datetime
import datetime
, timedelta
, date
17 import urllib
.request
, urllib
.parse
, urllib
.error
18 import urllib
.request
, urllib
.error
, urllib
.parse
22 from django
.db
import transaction
, IntegrityError
23 from django
.conf
import settings
24 from django
.urls
import reverse
27 logger
= logging
.getLogger(__name__
)
30 def daterange(from_date
, to_date
=None, leap
=timedelta(days
=1)):
32 >>> from_d = datetime(2010, 1, 1)
33 >>> to_d = datetime(2010, 1, 5)
34 >>> list(daterange(from_d, to_d))
35 [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
39 if isinstance(from_date
, datetime
):
40 to_date
= datetime
.utcnow()
42 to_date
= date
.today()
44 while from_date
<= to_date
:
46 from_date
= from_date
+ leap
49 def format_time(value
):
50 """Format an offset (in seconds) to a string
52 The offset should be an integer or float value.
60 >>> format_time(10921)
64 dt
= datetime
.utcfromtimestamp(value
)
65 except (ValueError, TypeError):
69 return dt
.strftime('%M:%S')
71 return dt
.strftime('%H:%M:%S')
73 def parse_time(value
):
78 >>> parse_time('05:10') #5*60+10
81 >>> parse_time('1:05:10') #60*60+5*60+10
85 raise ValueError('None value in parse_time')
87 if isinstance(value
, int):
88 # Don't need to parse already-converted time value
92 raise ValueError('Empty valueing in parse_time')
94 for format
in ('%H:%M:%S', '%M:%S'):
96 t
= time
.strptime(value
, format
)
97 return t
.tm_hour
* 60*60 + t
.tm_min
* 60 + t
.tm_sec
98 except ValueError as e
:
106 >>> parse_bool('True')
109 >>> parse_bool('true')
115 if isinstance(val
, bool):
117 if val
.lower() == 'true':
122 def progress(val
, max_val
, status_str
='', max_width
=50, stream
=sys
.stdout
):
124 factor
= float(val
)/max_val
if max_val
> 0 else 0
126 # progress as percentage
127 percentage_str
= '{val:.2%}'.format(val
=factor
)
129 # progress bar filled with #s
130 factor
= min(int(factor
*max_width
), max_width
)
131 progress_str
= '#' * factor
+ ' ' * (max_width
-factor
)
133 #insert percentage into bar
134 percentage_start
= int((max_width
-len(percentage_str
))/2)
135 progress_str
= progress_str
[:percentage_start
] + \
137 progress_str
[percentage_start
+len(percentage_str
):]
139 print('\r', end
=' ', file=stream
)
140 print('[ %s ] %s / %s | %s' % (
144 status_str
), end
=' ', file=stream
)
149 return list(set(a
) & set(b
))
152 def parse_range(s
, min, max, default
=None):
154 Parses the string and returns its value. If the value is outside the given
155 range, its closest number within the range is returned
157 >>> parse_range('5', 0, 10)
160 >>> parse_range('0', 5.0, 10)
163 >>> parse_range('15',0, 10)
166 >>> parse_range('x', 0., 20)
169 >>> parse_range('x', 0, 20, 20)
182 except (ValueError, TypeError):
183 return default
if default
is not None else out_type((max-min)/2)
186 def get_timestamp(datetime_obj
):
187 """ Returns the timestamp as an int for the given datetime object
189 >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
192 >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
195 return int(time
.mktime(datetime_obj
.timetuple()))
199 re_url
= re
.compile('^https?://')
202 """ Returns true if a string looks like an URL
204 >>> is_url('http://example.com/some-path/file.xml')
207 >>> is_url('something else')
211 return bool(re_url
.match(string
))
215 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
216 # this does not increase asymptotical complexity
217 # but can still waste more time than it saves.
218 def shortest_of(strings
):
219 return min(strings
, key
=len)
221 def longest_substr(strings
):
223 Returns the longest common substring of the given strings
229 reference
= shortest_of(strings
)
230 length
= len(reference
)
231 #find a suitable slice i:j
232 for i
in range(length
):
233 #only consider strings long at least len(substr) + 1
234 for j
in range(i
+ len(substr
) + 1, length
):
235 candidate
= reference
[i
:j
]
236 if all(candidate
in text
for text
in strings
):
241 def file_hash(f
, h
=hashlib
.md5
, block_size
=2**20):
242 """ returns the hash of the contents of a file """
245 buf
= f
.read(block_size
)
253 def url_add_authentication(url
, username
, password
):
255 Adds authentication data (username, password) to a given
256 URL in order to construct an authenticated URL.
258 >>> url_add_authentication('https://host.com/', '', None)
260 >>> url_add_authentication('http://example.org/', None, None)
261 'http://example.org/'
262 >>> url_add_authentication('telnet://host.com/', 'foo', 'bar')
263 'telnet://foo:bar@host.com/'
264 >>> url_add_authentication('ftp://example.org', 'billy', None)
265 'ftp://billy@example.org'
266 >>> url_add_authentication('ftp://example.org', 'billy', '')
267 'ftp://billy:@example.org'
268 >>> url_add_authentication('http://localhost/x', 'aa', 'bc')
269 'http://aa:bc@localhost/x'
270 >>> url_add_authentication('http://blubb.lan/u.html', 'i/o', 'P@ss:')
271 'http://i%2Fo:P@ss:@blubb.lan/u.html'
272 >>> url_add_authentication('http://a:b@x.org/', 'c', 'd')
274 >>> url_add_authentication('http://i%2F:P%40%3A@cx.lan', 'P@x', 'i/')
275 'http://P@x:i%2F@cx.lan'
276 >>> url_add_authentication('http://x.org/', 'a b', 'c d')
277 'http://a%20b:c%20d@x.org/'
279 if username
is None or username
== '':
282 # Relaxations of the strict quoting rules (bug 1521):
283 # 1. Accept '@' in username and password
284 # 2. Acecpt ':' in password only
285 username
= urllib
.parse
.quote(username
, safe
='@')
287 if password
is not None:
288 password
= urllib
.parse
.quote(password
, safe
='@:')
289 auth_string
= ':'.join((username
, password
))
291 auth_string
= username
293 url
= url_strip_authentication(url
)
295 url_parts
= list(urllib
.parse
.urlsplit(url
))
296 # url_parts[1] is the HOST part of the URL
297 url_parts
[1] = '@'.join((auth_string
, url_parts
[1]))
299 return urllib
.parse
.urlunsplit(url_parts
)
302 def urlopen(url
, headers
=None, data
=None):
304 An URL opener with the User-agent set to gPodder (with version)
306 username
, password
= username_password_from_url(url
)
307 if username
is not None or password
is not None:
308 url
= url_strip_authentication(url
)
309 password_mgr
= urllib
.request
.HTTPPasswordMgrWithDefaultRealm()
310 password_mgr
.add_password(None, url
, username
, password
)
311 handler
= urllib
.request
.HTTPBasicAuthHandler(password_mgr
)
312 opener
= urllib
.request
.build_opener(handler
)
314 opener
= urllib
.request
.build_opener()
319 headers
= dict(headers
)
321 headers
.update({'User-agent': settings
.USER_AGENT
})
322 request
= urllib
.request
.Request(url
, data
=data
, headers
=headers
)
323 return opener
.open(request
)
327 def username_password_from_url(url
):
329 Returns a tuple (username,password) containing authentication
330 data from the specified URL or (None,None) if no authentication
331 data can be found in the URL.
333 See Section 3.1 of RFC 1738 (http://www.ietf.org/rfc/rfc1738.txt)
335 >>> username_password_from_url('https://@host.com/')
337 >>> username_password_from_url('telnet://host.com/')
339 >>> username_password_from_url('ftp://foo:@host.com/')
341 >>> username_password_from_url('http://a:b@host.com/')
343 >>> username_password_from_url(1)
344 Traceback (most recent call last):
346 ValueError: URL has to be a string or unicode object.
347 >>> username_password_from_url(None)
348 Traceback (most recent call last):
350 ValueError: URL has to be a string or unicode object.
351 >>> username_password_from_url('http://a@b:c@host.com/')
353 >>> username_password_from_url('ftp://a:b:c@host.com/')
355 >>> username_password_from_url('http://i%2Fo:P%40ss%3A@host.com/')
357 >>> username_password_from_url('ftp://%C3%B6sterreich@host.com/')
359 >>> username_password_from_url('http://w%20x:y%20z@example.org/')
361 >>> username_password_from_url('http://example.com/x@y:z@test.com/')
364 if type(url
) not in (str, str):
365 raise ValueError('URL has to be a string or unicode object.')
367 (username
, password
) = (None, None)
369 (scheme
, netloc
, path
, params
, query
, fragment
) = urllib
.parse
.urlparse(url
)
372 (authentication
, netloc
) = netloc
.rsplit('@', 1)
373 if ':' in authentication
:
374 (username
, password
) = authentication
.split(':', 1)
376 # RFC1738 dictates that we should not allow ['/', '@', ':']
377 # characters in the username and password field (Section 3.1):
379 # 1. The "/" can't be in there at this point because of the way
380 # urlparse (which we use above) works.
381 # 2. Due to gPodder bug 1521, we allow "@" in the username and
382 # password field. We use netloc.rsplit('@', 1), which will
383 # make sure that we split it at the last '@' in netloc.
384 # 3. The colon must be excluded (RFC2617, Section 2) in the
385 # username, but is apparently allowed in the password. This
386 # is handled by the authentication.split(':', 1) above, and
387 # will cause any extraneous ':'s to be part of the password.
389 username
= urllib
.parse
.unquote(username
)
390 password
= urllib
.parse
.unquote(password
)
392 username
= urllib
.parse
.unquote(authentication
)
394 return (username
, password
)
397 def url_strip_authentication(url
):
399 Strips authentication data from an URL. Returns the URL with
400 the authentication data removed from it.
402 >>> url_strip_authentication('https://host.com/')
404 >>> url_strip_authentication('telnet://foo:bar@host.com/')
406 >>> url_strip_authentication('ftp://billy@example.org')
408 >>> url_strip_authentication('ftp://billy:@example.org')
410 >>> url_strip_authentication('http://aa:bc@localhost/x')
412 >>> url_strip_authentication('http://i%2Fo:P%40ss%3A@blubb.lan/u.html')
413 'http://blubb.lan/u.html'
414 >>> url_strip_authentication('http://c:d@x.org/')
416 >>> url_strip_authentication('http://P%40%3A:i%2F@cx.lan')
418 >>> url_strip_authentication('http://x@x.com:s3cret@example.com/')
419 'http://example.com/'
421 url_parts
= list(urllib
.parse
.urlsplit(url
))
422 # url_parts[1] is the HOST part of the URL
424 # Remove existing authentication data
425 if '@' in url_parts
[1]:
426 url_parts
[1] = url_parts
[1].rsplit('@', 1)[1]
428 return urllib
.parse
.urlunsplit(url_parts
)
431 # Native filesystem encoding detection
432 encoding
= sys
.getfilesystemencoding()
436 """ returns the commit and message of the current git HEAD """
439 pr
= subprocess
.Popen('/usr/bin/git log -n 1 --oneline'.split(),
440 cwd
= settings
.BASE_DIR
,
441 stdout
= subprocess
.PIPE
,
442 stderr
= subprocess
.PIPE
,
448 (out
, err
) = pr
.communicate()
452 outs
= [o
.decode('utf-8') for o
in out
.split()]
454 msg
= ' ' .join(outs
[1:])
458 def parse_request_body(request
):
459 """ returns the parsed request body, handles gzip encoding """
461 raw_body
= request
.body
462 content_enc
= request
.META
.get('HTTP_CONTENT_ENCODING')
464 if content_enc
== 'gzip':
465 raw_body
= zlib
.decompress(raw_body
)
467 return json
.loads(raw_body
.decode('utf-8'))
470 def normalize_feed_url(url
):
472 Converts any URL to http:// or ftp:// so that it can be
473 used with "wget". If the URL cannot be converted (invalid
474 or unknown scheme), "None" is returned.
476 This will also normalize feed:// and itpc:// to http://.
478 >>> normalize_feed_url('itpc://example.org/podcast.rss')
479 'http://example.org/podcast.rss'
481 If no URL scheme is defined (e.g. "curry.com"), we will
482 simply assume the user intends to add a http:// feed.
484 >>> normalize_feed_url('curry.com')
487 There are even some more shortcuts for advanced users
488 and lazy typists (see the source for details).
490 >>> normalize_feed_url('fb:43FPodcast')
491 'http://feeds.feedburner.com/43FPodcast'
493 It will also take care of converting the domain name to
494 all-lowercase (because domains are not case sensitive):
496 >>> normalize_feed_url('http://Example.COM/')
497 'http://example.com/'
499 Some other minimalistic changes are also taken care of,
500 e.g. a ? with an empty query is removed:
502 >>> normalize_feed_url('http://example.org/test?')
503 'http://example.org/test'
505 Leading and trailing whitespace is removed
507 >>> normalize_feed_url(' http://example.com/podcast.rss ')
508 'http://example.com/podcast.rss'
510 HTTP Authentication is removed to protect users' privacy
512 >>> normalize_feed_url('http://a@b:c@host.com/')
514 >>> normalize_feed_url('ftp://a:b:c@host.com/')
516 >>> normalize_feed_url('http://i%2Fo:P%40ss%3A@host.com/')
518 >>> normalize_feed_url('ftp://%C3%B6sterreich@host.com/')
520 >>> normalize_feed_url('http://w%20x:y%20z@example.org/')
521 'http://example.org/'
522 >>> normalize_feed_url('http://example.com/x@y:z@test.com/')
523 'http://example.com/x%40y%3Az%40test.com/'
524 >>> normalize_feed_url('http://en.wikipedia.org/wiki/Ä')
525 'http://en.wikipedia.org/wiki/%C3%84'
526 >>> normalize_feed_url('http://en.wikipedia.org/w/index.php?title=Ä&action=edit')
527 'http://en.wikipedia.org/w/index.php?title=%C3%84&action=edit'
530 if not url
or len(url
) < 8:
533 # This is a list of prefixes that you can use to minimize the amount of
534 # keystrokes that you have to use.
535 # Feel free to suggest other useful prefixes, and I'll add them here.
537 'fb:': 'http://feeds.feedburner.com/%s',
538 'yt:': 'http://www.youtube.com/rss/user/%s/videos.rss',
539 'sc:': 'http://soundcloud.com/%s',
540 'fm4od:': 'http://onapp1.orf.at/webcam/fm4/fod/%s.xspf',
541 # YouTube playlists. To get a list of playlists per-user, use:
542 # https://gdata.youtube.com/feeds/api/users/<username>/playlists
543 'ytpl:': 'http://gdata.youtube.com/feeds/api/playlists/%s',
546 for prefix
, expansion
in PREFIXES
.items():
547 if url
.startswith(prefix
):
548 url
= expansion
% (url
[len(prefix
):],)
551 # Assume HTTP for URLs without scheme
553 url
= 'http://' + url
555 scheme
, netloc
, path
, query
, fragment
= urllib
.parse
.urlsplit(url
)
557 # Schemes and domain names are case insensitive
558 scheme
, netloc
= scheme
.lower(), netloc
.lower()
560 # encode non-encoded characters
561 path
= urllib
.parse
.quote(path
, '/%')
562 query
= urllib
.parse
.quote_plus(query
, ':&=')
564 # Remove authentication to protect users' privacy
565 netloc
= netloc
.rsplit('@', 1)[-1]
567 # Normalize empty paths to "/"
571 # feed://, itpc:// and itms:// are really http://
572 if scheme
in ('feed', 'itpc', 'itms'):
575 if scheme
not in ('http', 'https', 'ftp', 'file'):
578 # urlunsplit might return "a slighty different, but equivalent URL"
579 return urllib
.parse
.urlunsplit((scheme
, netloc
, path
, query
, fragment
))
583 """ Return the link to the Django Admin Edit page """
584 return reverse('admin:%s_%s_change' % (obj
._meta
.app_label
,
585 obj
._meta
.model_name
),
589 def random_token(length
=32):
592 return "".join(random
.sample(string
.ascii_letters
+string
.digits
, length
))
595 def to_maxlength(cls
, field
, val
):
596 """ Cut val to the maximum length of cls's field """
600 max_length
= cls
._meta
.get_field(field
).max_length
601 orig_length
= len(val
)
602 if orig_length
> max_length
:
603 val
= val
[:max_length
]
604 logger
.warn('%s.%s length reduced from %d to %d',
605 cls
.__name
__, field
, orig_length
, max_length
)
611 """ Returns the domain name of a URL
613 >>> get_domain('http://example.com')
616 >>> get_domain('https://example.com:80/my-podcast/feed.rss')
619 netloc
= urllib
.parse
.urlparse(url
).netloc
621 port_idx
= netloc
.index(':')
622 return netloc
[:port_idx
]
628 def set_ordered_entries(obj
, new_entries
, existing
, EntryClass
,
629 value_name
, parent_name
):
630 """ Update the object's entries to the given list
632 'new_entries' should be a list of objects that are later wrapped in
633 EntryClass instances. 'value_name' is the name of the EntryClass property
634 that contains the values; 'parent_name' is the one that references obj.
636 Entries that do not exist are created. Existing entries that are not in
637 'new_entries' are deleted. """
639 logger
.info('%d existing entries', len(existing
))
641 logger
.info('%d new entries', len(new_entries
))
643 with transaction
.atomic():
644 max_order
= max([s
.order
for s
in existing
.values()] +
646 logger
.info('Renumbering entries starting from %d', max_order
+1)
647 for n
, entry
in enumerate(existing
.values(), max_order
+1):
651 logger
.info('%d existing entries', len(existing
))
653 for n
, entry
in enumerate(new_entries
):
655 e
= existing
.pop(entry
)
656 logger
.info('Updating existing entry %d: %s', n
, entry
)
660 logger
.info('Creating new entry %d: %s', n
, entry
)
666 from mygpo
.podcasts
.models
import ScopedModel
667 if issubclass(EntryClass
, ScopedModel
):
668 links
['scope'] = obj
.scope
670 EntryClass
.objects
.create(order
=n
, **links
)
671 except IntegrityError
as ie
:
672 logger
.warn('Could not create enry for %s: %s', obj
, ie
)
674 with transaction
.atomic():
675 delete
= [s
.pk
for s
in existing
.values()]
676 logger
.info('Deleting %d entries', len(delete
))
677 EntryClass
.objects
.filter(id__in
=delete
).delete()