1 # -*- coding: utf-8 -*-
13 from datetime
import datetime
, timedelta
, date
17 import urllib
.request
, urllib
.parse
, urllib
.error
18 import urllib
.request
, urllib
.error
, urllib
.parse
22 from django
.db
import transaction
, IntegrityError
23 from django
.conf
import settings
24 from django
.urls
import reverse
28 logger
= logging
.getLogger(__name__
)
31 def daterange(from_date
, to_date
=None, leap
=timedelta(days
=1)):
33 >>> from_d = datetime(2010, 1, 1)
34 >>> to_d = datetime(2010, 1, 5)
35 >>> list(daterange(from_d, to_d))
36 [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
40 if isinstance(from_date
, datetime
):
41 to_date
= datetime
.utcnow()
43 to_date
= date
.today()
45 while from_date
<= to_date
:
47 from_date
= from_date
+ leap
51 def format_time(value
):
52 """Format an offset (in seconds) to a string
54 The offset should be an integer or float value.
62 >>> format_time(10921)
66 dt
= datetime
.utcfromtimestamp(value
)
67 except (ValueError, TypeError):
71 return dt
.strftime('%M:%S')
73 return dt
.strftime('%H:%M:%S')
76 def parse_time(value
):
81 >>> parse_time('05:10') #5*60+10
84 >>> parse_time('1:05:10') #60*60+5*60+10
88 raise ValueError('None value in parse_time')
90 if isinstance(value
, int):
91 # Don't need to parse already-converted time value
95 raise ValueError('Empty valueing in parse_time')
97 for format
in ('%H:%M:%S', '%M:%S'):
99 t
= time
.strptime(value
, format
)
100 return t
.tm_hour
* 60 * 60 + t
.tm_min
* 60 + t
.tm_sec
101 except ValueError as e
:
109 >>> parse_bool('True')
112 >>> parse_bool('true')
118 if isinstance(val
, bool):
120 if val
.lower() == 'true':
125 def progress(val
, max_val
, status_str
='', max_width
=50, stream
=sys
.stdout
):
127 factor
= float(val
) / max_val
if max_val
> 0 else 0
129 # progress as percentage
130 percentage_str
= '{val:.2%}'.format(val
=factor
)
132 # progress bar filled with #s
133 factor
= min(int(factor
* max_width
), max_width
)
134 progress_str
= '#' * factor
+ ' ' * (max_width
- factor
)
136 # insert percentage into bar
137 percentage_start
= int((max_width
- len(percentage_str
)) / 2)
139 progress_str
[:percentage_start
]
141 + progress_str
[percentage_start
+ len(percentage_str
) :]
144 print('\r', end
=' ', file=stream
)
146 '[ %s ] %s / %s | %s' % (progress_str
, val
, max_val
, status_str
),
154 return list(set(a
) & set(b
))
157 def parse_range(s
, min, max, default
=None):
159 Parses the string and returns its value. If the value is outside the given
160 range, its closest number within the range is returned
162 >>> parse_range('5', 0, 10)
165 >>> parse_range('0', 5.0, 10)
168 >>> parse_range('15',0, 10)
171 >>> parse_range('x', 0., 20)
174 >>> parse_range('x', 0, 20, 20)
187 except (ValueError, TypeError):
188 return default
if default
is not None else out_type((max - min) / 2)
191 def get_timestamp(datetime_obj
):
192 """ Returns the timestamp as an int for the given datetime object
194 >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
197 >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
200 return int(time
.mktime(datetime_obj
.timetuple()))
203 re_url
= re
.compile('^https?://')
207 """ Returns true if a string looks like an URL
209 >>> is_url('http://example.com/some-path/file.xml')
212 >>> is_url('something else')
216 return bool(re_url
.match(string
))
219 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
220 # this does not increase asymptotical complexity
221 # but can still waste more time than it saves.
222 def shortest_of(strings
):
223 return min(strings
, key
=len)
226 def longest_substr(strings
):
228 Returns the longest common substring of the given strings
234 reference
= shortest_of(strings
)
235 length
= len(reference
)
236 # find a suitable slice i:j
237 for i
in range(length
):
238 # only consider strings long at least len(substr) + 1
239 for j
in range(i
+ len(substr
) + 1, length
):
240 candidate
= reference
[i
:j
]
241 if all(candidate
in text
for text
in strings
):
246 def file_hash(f
, h
=hashlib
.md5
, block_size
=2 ** 20):
247 """ returns the hash of the contents of a file """
250 buf
= f
.read(block_size
)
258 def url_add_authentication(url
, username
, password
):
260 Adds authentication data (username, password) to a given
261 URL in order to construct an authenticated URL.
263 >>> url_add_authentication('https://host.com/', '', None)
265 >>> url_add_authentication('http://example.org/', None, None)
266 'http://example.org/'
267 >>> url_add_authentication('telnet://host.com/', 'foo', 'bar')
268 'telnet://foo:bar@host.com/'
269 >>> url_add_authentication('ftp://example.org', 'billy', None)
270 'ftp://billy@example.org'
271 >>> url_add_authentication('ftp://example.org', 'billy', '')
272 'ftp://billy:@example.org'
273 >>> url_add_authentication('http://localhost/x', 'aa', 'bc')
274 'http://aa:bc@localhost/x'
275 >>> url_add_authentication('http://blubb.lan/u.html', 'i/o', 'P@ss:')
276 'http://i%2Fo:P@ss:@blubb.lan/u.html'
277 >>> url_add_authentication('http://a:b@x.org/', 'c', 'd')
279 >>> url_add_authentication('http://i%2F:P%40%3A@cx.lan', 'P@x', 'i/')
280 'http://P@x:i%2F@cx.lan'
281 >>> url_add_authentication('http://x.org/', 'a b', 'c d')
282 'http://a%20b:c%20d@x.org/'
284 if username
is None or username
== '':
287 # Relaxations of the strict quoting rules (bug 1521):
288 # 1. Accept '@' in username and password
289 # 2. Acecpt ':' in password only
290 username
= urllib
.parse
.quote(username
, safe
='@')
292 if password
is not None:
293 password
= urllib
.parse
.quote(password
, safe
='@:')
294 auth_string
= ':'.join((username
, password
))
296 auth_string
= username
298 url
= url_strip_authentication(url
)
300 url_parts
= list(urllib
.parse
.urlsplit(url
))
301 # url_parts[1] is the HOST part of the URL
302 url_parts
[1] = '@'.join((auth_string
, url_parts
[1]))
304 return urllib
.parse
.urlunsplit(url_parts
)
307 def urlopen(url
, headers
=None, data
=None):
309 An URL opener with the User-agent set to gPodder (with version)
311 username
, password
= username_password_from_url(url
)
312 if username
is not None or password
is not None:
313 url
= url_strip_authentication(url
)
314 password_mgr
= urllib
.request
.HTTPPasswordMgrWithDefaultRealm()
315 password_mgr
.add_password(None, url
, username
, password
)
316 handler
= urllib
.request
.HTTPBasicAuthHandler(password_mgr
)
317 opener
= urllib
.request
.build_opener(handler
)
319 opener
= urllib
.request
.build_opener()
324 headers
= dict(headers
)
326 headers
.update({'User-agent': settings
.USER_AGENT
})
327 request
= urllib
.request
.Request(url
, data
=data
, headers
=headers
)
328 return opener
.open(request
)
331 def username_password_from_url(url
):
333 Returns a tuple (username,password) containing authentication
334 data from the specified URL or (None,None) if no authentication
335 data can be found in the URL.
337 See Section 3.1 of RFC 1738 (http://www.ietf.org/rfc/rfc1738.txt)
339 >>> username_password_from_url('https://@host.com/')
341 >>> username_password_from_url('telnet://host.com/')
343 >>> username_password_from_url('ftp://foo:@host.com/')
345 >>> username_password_from_url('http://a:b@host.com/')
347 >>> username_password_from_url(1)
348 Traceback (most recent call last):
350 ValueError: URL has to be a string or unicode object.
351 >>> username_password_from_url(None)
352 Traceback (most recent call last):
354 ValueError: URL has to be a string or unicode object.
355 >>> username_password_from_url('http://a@b:c@host.com/')
357 >>> username_password_from_url('ftp://a:b:c@host.com/')
359 >>> username_password_from_url('http://i%2Fo:P%40ss%3A@host.com/')
361 >>> username_password_from_url('ftp://%C3%B6sterreich@host.com/')
363 >>> username_password_from_url('http://w%20x:y%20z@example.org/')
365 >>> username_password_from_url('http://example.com/x@y:z@test.com/')
368 if type(url
) not in (str, str):
369 raise ValueError('URL has to be a string or unicode object.')
371 (username
, password
) = (None, None)
373 (scheme
, netloc
, path
, params
, query
, fragment
) = urllib
.parse
.urlparse(url
)
376 (authentication
, netloc
) = netloc
.rsplit('@', 1)
377 if ':' in authentication
:
378 (username
, password
) = authentication
.split(':', 1)
380 # RFC1738 dictates that we should not allow ['/', '@', ':']
381 # characters in the username and password field (Section 3.1):
383 # 1. The "/" can't be in there at this point because of the way
384 # urlparse (which we use above) works.
385 # 2. Due to gPodder bug 1521, we allow "@" in the username and
386 # password field. We use netloc.rsplit('@', 1), which will
387 # make sure that we split it at the last '@' in netloc.
388 # 3. The colon must be excluded (RFC2617, Section 2) in the
389 # username, but is apparently allowed in the password. This
390 # is handled by the authentication.split(':', 1) above, and
391 # will cause any extraneous ':'s to be part of the password.
393 username
= urllib
.parse
.unquote(username
)
394 password
= urllib
.parse
.unquote(password
)
396 username
= urllib
.parse
.unquote(authentication
)
398 return (username
, password
)
401 def url_strip_authentication(url
):
403 Strips authentication data from an URL. Returns the URL with
404 the authentication data removed from it.
406 >>> url_strip_authentication('https://host.com/')
408 >>> url_strip_authentication('telnet://foo:bar@host.com/')
410 >>> url_strip_authentication('ftp://billy@example.org')
412 >>> url_strip_authentication('ftp://billy:@example.org')
414 >>> url_strip_authentication('http://aa:bc@localhost/x')
416 >>> url_strip_authentication('http://i%2Fo:P%40ss%3A@blubb.lan/u.html')
417 'http://blubb.lan/u.html'
418 >>> url_strip_authentication('http://c:d@x.org/')
420 >>> url_strip_authentication('http://P%40%3A:i%2F@cx.lan')
422 >>> url_strip_authentication('http://x@x.com:s3cret@example.com/')
423 'http://example.com/'
425 url_parts
= list(urllib
.parse
.urlsplit(url
))
426 # url_parts[1] is the HOST part of the URL
428 # Remove existing authentication data
429 if '@' in url_parts
[1]:
430 url_parts
[1] = url_parts
[1].rsplit('@', 1)[1]
432 return urllib
.parse
.urlunsplit(url_parts
)
435 # Native filesystem encoding detection
436 encoding
= sys
.getfilesystemencoding()
440 """ returns the commit and message of the current git HEAD """
443 pr
= subprocess
.Popen(
444 '/usr/bin/git log -n 1 --oneline'.split(),
445 cwd
=settings
.BASE_DIR
,
446 stdout
=subprocess
.PIPE
,
447 stderr
=subprocess
.PIPE
,
453 (out
, err
) = pr
.communicate()
457 outs
= [o
.decode('utf-8') for o
in out
.split()]
459 msg
= ' '.join(outs
[1:])
463 def parse_request_body(request
):
464 """ returns the parsed request body, handles gzip encoding """
466 raw_body
= request
.body
467 content_enc
= request
.META
.get('HTTP_CONTENT_ENCODING')
469 if content_enc
== 'gzip':
470 raw_body
= zlib
.decompress(raw_body
)
472 return json
.loads(raw_body
.decode('utf-8'))
475 def normalize_feed_url(url
):
477 Converts any URL to http:// or ftp:// so that it can be
478 used with "wget". If the URL cannot be converted (invalid
479 or unknown scheme), "None" is returned.
481 This will also normalize feed:// and itpc:// to http://.
483 >>> normalize_feed_url('itpc://example.org/podcast.rss')
484 'http://example.org/podcast.rss'
486 If no URL scheme is defined (e.g. "curry.com"), we will
487 simply assume the user intends to add a http:// feed.
489 >>> normalize_feed_url('curry.com')
492 There are even some more shortcuts for advanced users
493 and lazy typists (see the source for details).
495 >>> normalize_feed_url('fb:43FPodcast')
496 'http://feeds.feedburner.com/43FPodcast'
498 It will also take care of converting the domain name to
499 all-lowercase (because domains are not case sensitive):
501 >>> normalize_feed_url('http://Example.COM/')
502 'http://example.com/'
504 Some other minimalistic changes are also taken care of,
505 e.g. a ? with an empty query is removed:
507 >>> normalize_feed_url('http://example.org/test?')
508 'http://example.org/test'
510 Leading and trailing whitespace is removed
512 >>> normalize_feed_url(' http://example.com/podcast.rss ')
513 'http://example.com/podcast.rss'
515 HTTP Authentication is removed to protect users' privacy
517 >>> normalize_feed_url('http://a@b:c@host.com/')
519 >>> normalize_feed_url('ftp://a:b:c@host.com/')
521 >>> normalize_feed_url('http://i%2Fo:P%40ss%3A@host.com/')
523 >>> normalize_feed_url('ftp://%C3%B6sterreich@host.com/')
525 >>> normalize_feed_url('http://w%20x:y%20z@example.org/')
526 'http://example.org/'
527 >>> normalize_feed_url('http://example.com/x@y:z@test.com/')
528 'http://example.com/x%40y%3Az%40test.com/'
529 >>> normalize_feed_url('http://en.wikipedia.org/wiki/Ä')
530 'http://en.wikipedia.org/wiki/%C3%84'
531 >>> normalize_feed_url('http://en.wikipedia.org/w/index.php?title=Ä&action=edit')
532 'http://en.wikipedia.org/w/index.php?title=%C3%84&action=edit'
535 if not url
or len(url
) < 8:
538 # This is a list of prefixes that you can use to minimize the amount of
539 # keystrokes that you have to use.
540 # Feel free to suggest other useful prefixes, and I'll add them here.
542 'fb:': 'http://feeds.feedburner.com/%s',
543 'yt:': 'http://www.youtube.com/rss/user/%s/videos.rss',
544 'sc:': 'http://soundcloud.com/%s',
545 'fm4od:': 'http://onapp1.orf.at/webcam/fm4/fod/%s.xspf',
546 # YouTube playlists. To get a list of playlists per-user, use:
547 # https://gdata.youtube.com/feeds/api/users/<username>/playlists
548 'ytpl:': 'http://gdata.youtube.com/feeds/api/playlists/%s',
551 for prefix
, expansion
in PREFIXES
.items():
552 if url
.startswith(prefix
):
553 url
= expansion
% (url
[len(prefix
) :],)
556 # Assume HTTP for URLs without scheme
558 url
= 'http://' + url
560 scheme
, netloc
, path
, query
, fragment
= urllib
.parse
.urlsplit(url
)
562 # Schemes and domain names are case insensitive
563 scheme
, netloc
= scheme
.lower(), netloc
.lower()
565 # encode non-encoded characters
566 path
= urllib
.parse
.quote(path
, '/%')
567 query
= urllib
.parse
.quote_plus(query
, ':&=')
569 # Remove authentication to protect users' privacy
570 netloc
= netloc
.rsplit('@', 1)[-1]
572 # Normalize empty paths to "/"
576 # feed://, itpc:// and itms:// are really http://
577 if scheme
in ('feed', 'itpc', 'itms'):
580 if scheme
not in ('http', 'https', 'ftp', 'file'):
583 # urlunsplit might return "a slighty different, but equivalent URL"
584 return urllib
.parse
.urlunsplit((scheme
, netloc
, path
, query
, fragment
))
588 """ Return the link to the Django Admin Edit page """
590 'admin:%s_%s_change' % (obj
._meta
.app_label
, obj
._meta
.model_name
),
595 def random_token(length
=32):
599 return "".join(random
.sample(string
.ascii_letters
+ string
.digits
, length
))
602 def to_maxlength(cls
, field
, val
):
603 """ Cut val to the maximum length of cls's field """
607 max_length
= cls
._meta
.get_field(field
).max_length
608 orig_length
= len(val
)
609 if orig_length
> max_length
:
610 val
= val
[:max_length
]
612 '%s.%s length reduced from %d to %d',
623 """ Returns the domain name of a URL
625 >>> get_domain('http://example.com')
628 >>> get_domain('https://example.com:80/my-podcast/feed.rss')
631 netloc
= urllib
.parse
.urlparse(url
).netloc
633 port_idx
= netloc
.index(':')
634 return netloc
[:port_idx
]
640 def set_ordered_entries(
641 obj
, new_entries
, existing
, EntryClass
, value_name
, parent_name
643 """ Update the object's entries to the given list
645 'new_entries' should be a list of objects that are later wrapped in
646 EntryClass instances. 'value_name' is the name of the EntryClass property
647 that contains the values; 'parent_name' is the one that references obj.
649 Entries that do not exist are created. Existing entries that are not in
650 'new_entries' are deleted. """
652 logger
.info('%d existing entries', len(existing
))
654 logger
.info('%d new entries', len(new_entries
))
656 with transaction
.atomic():
657 max_order
= max([s
.order
for s
in existing
.values()] + [len(new_entries
)])
658 logger
.info('Renumbering entries starting from %d', max_order
+ 1)
659 for n
, entry
in enumerate(existing
.values(), max_order
+ 1):
663 logger
.info('%d existing entries', len(existing
))
665 for n
, entry
in enumerate(new_entries
):
667 e
= existing
.pop(entry
)
668 logger
.info('Updating existing entry %d: %s', n
, entry
)
672 logger
.info('Creating new entry %d: %s', n
, entry
)
674 links
= {value_name
: entry
, parent_name
: obj
}
675 from mygpo
.podcasts
.models
import ScopedModel
677 if issubclass(EntryClass
, ScopedModel
):
678 links
['scope'] = obj
.scope
680 EntryClass
.objects
.create(order
=n
, **links
)
681 except IntegrityError
as ie
:
682 logger
.warn('Could not create enry for %s: %s', obj
, ie
)
684 with transaction
.atomic():
685 delete
= [s
.pk
for s
in existing
.values()]
686 logger
.info('Deleting %d entries', len(delete
))
687 EntryClass
.objects
.filter(id__in
=delete
).delete()