Patch #402 (new genkan_io provider)
[manga-dl.git] / manga_py / providers / helpers / std.py
blob0356c235c5d89e4020e05b8a21899ec75015ee35
1 import re
2 from logging import error
3 from time import sleep
4 from typing import Optional
6 from requests import get
9 class Std:
10 _download_cookies = None
11 _download_headers = None
13 def _elements(self, selector, content=None) -> list:
14 if not content:
15 content = self.content
16 return self.document_fromstring(content, selector)
18 def _cover_from_content(self, selector, attr='src') -> str:
19 image = self._elements(selector)
20 if image is not None and len(image):
21 return self.http().normalize_uri(image[0].get(attr))
23 @staticmethod
24 def _first_select_options(parser, selector, skip_first=True) -> list:
25 options = 'option'
26 if skip_first:
27 options = 'option + option'
28 select = parser.cssselect(selector)
29 if select:
30 return select[0].cssselect(options)
31 return []
33 @classmethod
34 def _images_helper(cls, parser, selector, attr='src', alternative_attr='data-src') -> list:
35 image = parser.cssselect(selector)
36 images = []
37 for i in image:
38 src = i.get(attr) or i.get(alternative_attr)
39 images.append(src.strip(' \r\n\t\0'))
40 return images
42 @classmethod
43 def _idx_to_x2(cls, idx, default=0) -> list:
44 return [
45 str(idx[0]),
46 str(default if len(idx) < 2 or not idx[1] else idx[1])
49 @staticmethod
50 def _join_groups(idx, glue='-') -> str:
51 result = []
52 for i in idx:
53 if i:
54 result.append(i)
55 return glue.join(result)
57 def _get_name(self, selector, url=None) -> str:
58 if url is None:
59 url = self.get_url()
60 return re.search(selector, url).group(1)
62 def _get_content(self, tpl, **kwargs) -> str:
63 try:
64 _kw = kwargs.copy()
65 _kw.setdefault('domain', self.domain)
66 _kw.setdefault('manga_name', self.manga_name)
67 return self.http_get(tpl.format(**_kw))
68 except Exception:
69 return self.http_get(tpl.format(self.domain, self.manga_name))
71 def _base_cookies(self, url=None):
72 if url is None:
73 url = self.get_url()
74 cookies = self.http().get_base_cookies(url)
75 self._storage['cookies'] = cookies.get_dict()
77 def parse_background(self, image) -> str:
78 url = re.search(
79 r'background.+?url\([\'"]?([^\s]+?)[\'"]?\)',
80 image.get('style')
82 return self.http().normalize_uri(url.group(1))
84 def text_content(self, content, selector, idx: int = 0, strip: bool = True) -> Optional[str]:
85 doc = self.document_fromstring(content, selector)
86 if not doc:
87 return None
88 return self.element_text_content(doc[idx], strip)
90 def element_text_content(self, element, strip: bool = True) -> str:
91 text = element.text_content()
92 if strip:
93 text = text.strip()
94 return text
96 def _download(self, file_name, url, method):
97 # clean file downloader
98 cookies = self._download_cookies or {}
99 headers = self._download_headers or {}
101 now_try_count = 0
102 while now_try_count < 5:
103 with open(file_name, 'wb') as out_file:
104 now_try_count += 1
105 response = get(url, timeout=60, allow_redirects=True, headers=headers, cookies=cookies)
106 if response.status_code >= 400:
107 error('ERROR! Code {}\nUrl: {}'.format(
108 response.status_code,
109 url,
111 sleep(2)
112 continue
113 out_file.write(response.content)
114 response.close()
115 out_file.close()
116 break
118 @staticmethod
119 def _test_url(url: str, path: str = None) -> bool:
120 _path = r'https?://.+?\.\w{2,7}'
121 if path is not None:
122 _path += path
123 _re = re.compile(_path)
124 return _re.search(url) is not None