Merge pull request #793 from gpodder/remove-advertise
[mygpo.git] / mygpo / utils.py
blobc75f70add83ebd81c329d9ce6e873b346d3c06fd
1 # -*- coding: utf-8 -*-
3 import json
4 import functools
5 import types
6 import subprocess
7 import os
8 import operator
9 import sys
10 import re
11 import collections
12 import itertools
13 from datetime import datetime, timedelta, date
14 import time
15 import hashlib
16 import urllib.parse
17 import urllib.request, urllib.parse, urllib.error
18 import urllib.request, urllib.error, urllib.parse
19 import zlib
20 import shlex
22 from django.db import transaction, IntegrityError
23 from django.conf import settings
24 from django.urls import reverse
26 import logging
28 logger = logging.getLogger(__name__)
31 def daterange(from_date, to_date=None, leap=timedelta(days=1)):
32 """
33 >>> from_d = datetime(2010, 1, 1)
34 >>> to_d = datetime(2010, 1, 5)
35 >>> list(daterange(from_d, to_d))
36 [datetime.datetime(2010, 1, 1, 0, 0), datetime.datetime(2010, 1, 2, 0, 0), datetime.datetime(2010, 1, 3, 0, 0), datetime.datetime(2010, 1, 4, 0, 0), datetime.datetime(2010, 1, 5, 0, 0)]
37 """
39 if to_date is None:
40 if isinstance(from_date, datetime):
41 to_date = datetime.utcnow()
42 else:
43 to_date = date.today()
45 while from_date <= to_date:
46 yield from_date
47 from_date = from_date + leap
48 return
51 def format_time(value):
52 """Format an offset (in seconds) to a string
54 The offset should be an integer or float value.
56 >>> format_time(0)
57 '00:00'
58 >>> format_time(20)
59 '00:20'
60 >>> format_time(3600)
61 '01:00:00'
62 >>> format_time(10921)
63 '03:02:01'
64 """
65 try:
66 dt = datetime.utcfromtimestamp(value)
67 except (ValueError, TypeError):
68 return ""
70 if dt.hour == 0:
71 return dt.strftime("%M:%S")
72 else:
73 return dt.strftime("%H:%M:%S")
76 def parse_time(value):
77 """
78 >>> parse_time(10)
81 >>> parse_time('05:10') #5*60+10
82 310
84 >>> parse_time('1:05:10') #60*60+5*60+10
85 3910
86 """
87 if value is None:
88 raise ValueError("None value in parse_time")
90 if isinstance(value, int):
91 # Don't need to parse already-converted time value
92 return value
94 if value == "":
95 raise ValueError("Empty valueing in parse_time")
97 for format in ("%H:%M:%S", "%M:%S"):
98 try:
99 t = time.strptime(value, format)
100 return t.tm_hour * 60 * 60 + t.tm_min * 60 + t.tm_sec
101 except ValueError as e:
102 continue
104 return int(value)
107 def parse_bool(val):
109 >>> parse_bool('True')
110 True
112 >>> parse_bool('true')
113 True
115 >>> parse_bool('')
116 False
118 if isinstance(val, bool):
119 return val
120 if val.lower() == "true":
121 return True
122 return False
125 def progress(val, max_val, status_str="", max_width=50, stream=sys.stdout):
127 factor = float(val) / max_val if max_val > 0 else 0
129 # progress as percentage
130 percentage_str = "{val:.2%}".format(val=factor)
132 # progress bar filled with #s
133 factor = min(int(factor * max_width), max_width)
134 progress_str = "#" * factor + " " * (max_width - factor)
136 # insert percentage into bar
137 percentage_start = int((max_width - len(percentage_str)) / 2)
138 progress_str = (
139 progress_str[:percentage_start]
140 + percentage_str
141 + progress_str[percentage_start + len(percentage_str) :]
144 print("\r", end=" ", file=stream)
145 print(
146 "[ %s ] %s / %s | %s" % (progress_str, val, max_val, status_str),
147 end=" ",
148 file=stream,
150 stream.flush()
153 def intersect(a, b):
154 return list(set(a) & set(b))
157 def parse_range(s, min, max, default=None):
159 Parses the string and returns its value. If the value is outside the given
160 range, its closest number within the range is returned
162 >>> parse_range('5', 0, 10)
165 >>> parse_range('0', 5.0, 10)
168 >>> parse_range('15',0, 10)
171 >>> parse_range('x', 0., 20)
172 10.0
174 >>> parse_range('x', 0, 20, 20)
177 out_type = type(min)
179 try:
180 val = int(s)
181 if val < min:
182 return min
183 if val > max:
184 return max
185 return val
187 except (ValueError, TypeError):
188 return default if default is not None else out_type((max - min) / 2)
191 def get_timestamp(datetime_obj):
192 """Returns the timestamp as an int for the given datetime object
194 >>> get_timestamp(datetime(2011, 4, 7, 9, 30, 6))
195 1302168606
197 >>> get_timestamp(datetime(1970, 1, 1, 0, 0, 0))
200 return int(time.mktime(datetime_obj.timetuple()))
203 re_url = re.compile("^https?://")
206 def is_url(string):
207 """Returns true if a string looks like an URL
209 >>> is_url('http://example.com/some-path/file.xml')
210 True
212 >>> is_url('something else')
213 False
216 return bool(re_url.match(string))
219 # from http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
220 # this does not increase asymptotical complexity
221 # but can still waste more time than it saves.
222 def shortest_of(strings):
223 return min(strings, key=len)
226 def longest_substr(strings):
228 Returns the longest common substring of the given strings
231 substr = ""
232 if not strings:
233 return substr
234 reference = shortest_of(strings)
235 length = len(reference)
236 # find a suitable slice i:j
237 for i in range(length):
238 # only consider strings long at least len(substr) + 1
239 for j in range(i + len(substr) + 1, length):
240 candidate = reference[i:j]
241 if all(candidate in text for text in strings):
242 substr = candidate
243 return substr
246 def file_hash(f, h=hashlib.md5, block_size=2**20):
247 """returns the hash of the contents of a file"""
248 f_hash = h()
249 while True:
250 buf = f.read(block_size)
251 if not buf:
252 break
253 f_hash.update(buf)
255 return f_hash
258 def url_add_authentication(url, username, password):
260 Adds authentication data (username, password) to a given
261 URL in order to construct an authenticated URL.
263 >>> url_add_authentication('https://host.com/', '', None)
264 'https://host.com/'
265 >>> url_add_authentication('http://example.org/', None, None)
266 'http://example.org/'
267 >>> url_add_authentication('telnet://host.com/', 'foo', 'bar')
268 'telnet://foo:bar@host.com/'
269 >>> url_add_authentication('ftp://example.org', 'billy', None)
270 'ftp://billy@example.org'
271 >>> url_add_authentication('ftp://example.org', 'billy', '')
272 'ftp://billy:@example.org'
273 >>> url_add_authentication('http://localhost/x', 'aa', 'bc')
274 'http://aa:bc@localhost/x'
275 >>> url_add_authentication('http://blubb.lan/u.html', 'i/o', 'P@ss:')
276 'http://i%2Fo:P@ss:@blubb.lan/u.html'
277 >>> url_add_authentication('http://a:b@x.org/', 'c', 'd')
278 'http://c:d@x.org/'
279 >>> url_add_authentication('http://i%2F:P%40%3A@cx.lan', 'P@x', 'i/')
280 'http://P@x:i%2F@cx.lan'
281 >>> url_add_authentication('http://x.org/', 'a b', 'c d')
282 'http://a%20b:c%20d@x.org/'
284 if username is None or username == "":
285 return url
287 # Relaxations of the strict quoting rules (bug 1521):
288 # 1. Accept '@' in username and password
289 # 2. Acecpt ':' in password only
290 username = urllib.parse.quote(username, safe="@")
292 if password is not None:
293 password = urllib.parse.quote(password, safe="@:")
294 auth_string = ":".join((username, password))
295 else:
296 auth_string = username
298 url = url_strip_authentication(url)
300 url_parts = list(urllib.parse.urlsplit(url))
301 # url_parts[1] is the HOST part of the URL
302 url_parts[1] = "@".join((auth_string, url_parts[1]))
304 return urllib.parse.urlunsplit(url_parts)
307 def urlopen(url, headers=None, data=None):
309 An URL opener with the User-agent set to gPodder (with version)
311 username, password = username_password_from_url(url)
312 if username is not None or password is not None:
313 url = url_strip_authentication(url)
314 password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
315 password_mgr.add_password(None, url, username, password)
316 handler = urllib.request.HTTPBasicAuthHandler(password_mgr)
317 opener = urllib.request.build_opener(handler)
318 else:
319 opener = urllib.request.build_opener()
321 if headers is None:
322 headers = {}
323 else:
324 headers = dict(headers)
326 headers.update({"User-agent": settings.USER_AGENT})
327 request = urllib.request.Request(url, data=data, headers=headers)
328 return opener.open(request)
331 def username_password_from_url(url):
332 r"""
333 Returns a tuple (username,password) containing authentication
334 data from the specified URL or (None,None) if no authentication
335 data can be found in the URL.
337 See Section 3.1 of RFC 1738 (http://www.ietf.org/rfc/rfc1738.txt)
339 >>> username_password_from_url('https://@host.com/')
340 ('', None)
341 >>> username_password_from_url('telnet://host.com/')
342 (None, None)
343 >>> username_password_from_url('ftp://foo:@host.com/')
344 ('foo', '')
345 >>> username_password_from_url('http://a:b@host.com/')
346 ('a', 'b')
347 >>> username_password_from_url(1)
348 Traceback (most recent call last):
350 ValueError: URL has to be a string or unicode object.
351 >>> username_password_from_url(None)
352 Traceback (most recent call last):
354 ValueError: URL has to be a string or unicode object.
355 >>> username_password_from_url('http://a@b:c@host.com/')
356 ('a@b', 'c')
357 >>> username_password_from_url('ftp://a:b:c@host.com/')
358 ('a', 'b:c')
359 >>> username_password_from_url('http://i%2Fo:P%40ss%3A@host.com/')
360 ('i/o', 'P@ss:')
361 >>> username_password_from_url('ftp://%C3%B6sterreich@host.com/')
362 ('österreich', None)
363 >>> username_password_from_url('http://w%20x:y%20z@example.org/')
364 ('w x', 'y z')
365 >>> username_password_from_url('http://example.com/x@y:z@test.com/')
366 (None, None)
368 if type(url) not in (str, str):
369 raise ValueError("URL has to be a string or unicode object.")
371 (username, password) = (None, None)
373 (scheme, netloc, path, params, query, fragment) = urllib.parse.urlparse(url)
375 if "@" in netloc:
376 (authentication, netloc) = netloc.rsplit("@", 1)
377 if ":" in authentication:
378 (username, password) = authentication.split(":", 1)
380 # RFC1738 dictates that we should not allow ['/', '@', ':']
381 # characters in the username and password field (Section 3.1):
383 # 1. The "/" can't be in there at this point because of the way
384 # urlparse (which we use above) works.
385 # 2. Due to gPodder bug 1521, we allow "@" in the username and
386 # password field. We use netloc.rsplit('@', 1), which will
387 # make sure that we split it at the last '@' in netloc.
388 # 3. The colon must be excluded (RFC2617, Section 2) in the
389 # username, but is apparently allowed in the password. This
390 # is handled by the authentication.split(':', 1) above, and
391 # will cause any extraneous ':'s to be part of the password.
393 username = urllib.parse.unquote(username)
394 password = urllib.parse.unquote(password)
395 else:
396 username = urllib.parse.unquote(authentication)
398 return (username, password)
401 def url_strip_authentication(url):
403 Strips authentication data from an URL. Returns the URL with
404 the authentication data removed from it.
406 >>> url_strip_authentication('https://host.com/')
407 'https://host.com/'
408 >>> url_strip_authentication('telnet://foo:bar@host.com/')
409 'telnet://host.com/'
410 >>> url_strip_authentication('ftp://billy@example.org')
411 'ftp://example.org'
412 >>> url_strip_authentication('ftp://billy:@example.org')
413 'ftp://example.org'
414 >>> url_strip_authentication('http://aa:bc@localhost/x')
415 'http://localhost/x'
416 >>> url_strip_authentication('http://i%2Fo:P%40ss%3A@blubb.lan/u.html')
417 'http://blubb.lan/u.html'
418 >>> url_strip_authentication('http://c:d@x.org/')
419 'http://x.org/'
420 >>> url_strip_authentication('http://P%40%3A:i%2F@cx.lan')
421 'http://cx.lan'
422 >>> url_strip_authentication('http://x@x.com:s3cret@example.com/')
423 'http://example.com/'
425 url_parts = list(urllib.parse.urlsplit(url))
426 # url_parts[1] is the HOST part of the URL
428 # Remove existing authentication data
429 if "@" in url_parts[1]:
430 url_parts[1] = url_parts[1].rsplit("@", 1)[1]
432 return urllib.parse.urlunsplit(url_parts)
435 # Native filesystem encoding detection
436 encoding = sys.getfilesystemencoding()
439 def get_git_head():
440 """returns the commit and message of the current git HEAD"""
442 try:
443 pr = subprocess.Popen(
444 "/usr/bin/git log -n 1 --oneline".split(),
445 cwd=settings.BASE_DIR,
446 stdout=subprocess.PIPE,
447 stderr=subprocess.PIPE,
450 except OSError:
451 return None, None
453 (out, err) = pr.communicate()
454 if err:
455 return None, None
457 outs = [o.decode("utf-8") for o in out.split()]
458 commit = outs[0]
459 msg = " ".join(outs[1:])
460 return commit, msg
463 def parse_request_body(request):
464 """returns the parsed request body, handles gzip encoding"""
466 raw_body = request.body
467 content_enc = request.META.get("HTTP_CONTENT_ENCODING")
469 if content_enc == "gzip":
470 raw_body = zlib.decompress(raw_body)
472 return json.loads(raw_body.decode("utf-8"))
475 def normalize_feed_url(url):
477 Converts any URL to http:// or ftp:// so that it can be
478 used with "wget". If the URL cannot be converted (invalid
479 or unknown scheme), "None" is returned.
481 This will also normalize feed:// and itpc:// to http://.
483 >>> normalize_feed_url('itpc://example.org/podcast.rss')
484 'http://example.org/podcast.rss'
486 If no URL scheme is defined (e.g. "curry.com"), we will
487 simply assume the user intends to add a http:// feed.
489 >>> normalize_feed_url('curry.com')
490 'http://curry.com/'
492 There are even some more shortcuts for advanced users
493 and lazy typists (see the source for details).
495 >>> normalize_feed_url('fb:43FPodcast')
496 'http://feeds.feedburner.com/43FPodcast'
498 It will also take care of converting the domain name to
499 all-lowercase (because domains are not case sensitive):
501 >>> normalize_feed_url('http://Example.COM/')
502 'http://example.com/'
504 Some other minimalistic changes are also taken care of,
505 e.g. a ? with an empty query is removed:
507 >>> normalize_feed_url('http://example.org/test?')
508 'http://example.org/test'
510 Leading and trailing whitespace is removed
512 >>> normalize_feed_url(' http://example.com/podcast.rss ')
513 'http://example.com/podcast.rss'
515 HTTP Authentication is removed to protect users' privacy
517 >>> normalize_feed_url('http://a@b:c@host.com/')
518 'http://host.com/'
519 >>> normalize_feed_url('ftp://a:b:c@host.com/')
520 'ftp://host.com/'
521 >>> normalize_feed_url('http://i%2Fo:P%40ss%3A@host.com/')
522 'http://host.com/'
523 >>> normalize_feed_url('ftp://%C3%B6sterreich@host.com/')
524 'ftp://host.com/'
525 >>> normalize_feed_url('http://w%20x:y%20z@example.org/')
526 'http://example.org/'
527 >>> normalize_feed_url('http://example.com/x@y:z@test.com/')
528 'http://example.com/x%40y%3Az%40test.com/'
529 >>> normalize_feed_url('http://en.wikipedia.org/wiki/Ä')
530 'http://en.wikipedia.org/wiki/%C3%84'
531 >>> normalize_feed_url('http://en.wikipedia.org/w/index.php?title=Ä&action=edit')
532 'http://en.wikipedia.org/w/index.php?title=%C3%84&action=edit'
534 url = url.strip()
535 if not url or len(url) < 8:
536 return None
538 # This is a list of prefixes that you can use to minimize the amount of
539 # keystrokes that you have to use.
540 # Feel free to suggest other useful prefixes, and I'll add them here.
541 PREFIXES = {
542 "fb:": "http://feeds.feedburner.com/%s",
543 "yt:": "http://www.youtube.com/rss/user/%s/videos.rss",
544 "sc:": "http://soundcloud.com/%s",
545 "fm4od:": "http://onapp1.orf.at/webcam/fm4/fod/%s.xspf",
546 # YouTube playlists. To get a list of playlists per-user, use:
547 # https://gdata.youtube.com/feeds/api/users/<username>/playlists
548 "ytpl:": "http://gdata.youtube.com/feeds/api/playlists/%s",
551 for prefix, expansion in PREFIXES.items():
552 if url.startswith(prefix):
553 url = expansion % (url[len(prefix) :],)
554 break
556 # Assume HTTP for URLs without scheme
557 if not "://" in url:
558 url = "http://" + url
560 scheme, netloc, path, query, fragment = urllib.parse.urlsplit(url)
562 # Schemes and domain names are case insensitive
563 scheme, netloc = scheme.lower(), netloc.lower()
565 # encode non-encoded characters
566 path = urllib.parse.quote(path, "/%")
567 query = urllib.parse.quote_plus(query, ":&=")
569 # Remove authentication to protect users' privacy
570 netloc = netloc.rsplit("@", 1)[-1]
572 # Normalize empty paths to "/"
573 if path == "":
574 path = "/"
576 # feed://, itpc:// and itms:// are really http://
577 if scheme in ("feed", "itpc", "itms"):
578 scheme = "http"
580 if scheme not in ("http", "https", "ftp", "file"):
581 return None
583 # urlunsplit might return "a slighty different, but equivalent URL"
584 return urllib.parse.urlunsplit((scheme, netloc, path, query, fragment))
587 def edit_link(obj):
588 """Return the link to the Django Admin Edit page"""
589 return reverse(
590 "admin:%s_%s_change" % (obj._meta.app_label, obj._meta.model_name),
591 args=(obj.pk,),
595 def random_token(length=32):
596 import random
597 import string
599 return "".join(random.sample(string.ascii_letters + string.digits, length))
602 def to_maxlength(cls, field, val):
603 """Cut val to the maximum length of cls's field"""
604 if val is None:
605 return None
607 max_length = cls._meta.get_field(field).max_length
608 orig_length = len(val)
609 if orig_length > max_length:
610 val = val[:max_length]
611 logger.warning(
612 "%s.%s length reduced from %d to %d",
613 cls.__name__,
614 field,
615 orig_length,
616 max_length,
619 return val
622 def get_domain(url):
623 """Returns the domain name of a URL
625 >>> get_domain('http://example.com')
626 'example.com'
628 >>> get_domain('https://example.com:80/my-podcast/feed.rss')
629 'example.com'
631 netloc = urllib.parse.urlparse(url).netloc
632 try:
633 port_idx = netloc.index(":")
634 return netloc[:port_idx]
636 except ValueError:
637 return netloc
640 def set_ordered_entries(
641 obj, new_entries, existing, EntryClass, value_name, parent_name
643 """Update the object's entries to the given list
645 'new_entries' should be a list of objects that are later wrapped in
646 EntryClass instances. 'value_name' is the name of the EntryClass property
647 that contains the values; 'parent_name' is the one that references obj.
649 Entries that do not exist are created. Existing entries that are not in
650 'new_entries' are deleted."""
652 logger.info("%d existing entries", len(existing))
654 logger.info("%d new entries", len(new_entries))
656 with transaction.atomic():
657 max_order = max([s.order for s in existing.values()] + [len(new_entries)])
658 logger.info("Renumbering entries starting from %d", max_order + 1)
659 for n, entry in enumerate(existing.values(), max_order + 1):
660 entry.order = n
661 entry.save()
663 logger.info("%d existing entries", len(existing))
665 for n, entry in enumerate(new_entries):
666 try:
667 e = existing.pop(entry)
668 logger.info("Updating existing entry %d: %s", n, entry)
669 e.order = n
670 e.save()
671 except KeyError:
672 logger.info("Creating new entry %d: %s", n, entry)
673 try:
674 links = {value_name: entry, parent_name: obj}
675 from mygpo.podcasts.models import ScopedModel
677 if issubclass(EntryClass, ScopedModel):
678 links["scope"] = obj.scope
680 EntryClass.objects.create(order=n, **links)
681 except IntegrityError as ie:
682 logger.warning("Could not create enry for %s: %s", obj, ie)
684 with transaction.atomic():
685 delete = [s.pk for s in existing.values()]
686 logger.info("Deleting %d entries", len(delete))
687 EntryClass.objects.filter(id__in=delete).delete()