Update workflows/publish_pypi.yml
[manga-dl.git] / manga_py / providers / helpers / std.py
blob69cea1fc86a93a9cdde5fbc0f02e9670c091b253
1 import re
2 from logging import error
3 from time import sleep
4 from typing import Optional
6 from requests import get
9 class Std:
10 _download_cookies = None
11 _download_headers = None
13 def _elements(self, selector, content=None) -> list:
14 if not content:
15 content = self.content
16 return self.document_fromstring(content, selector)
18 def _cover_from_content(self, selector, attr='src') -> Optional[str]:
19 image = self._elements(selector)
20 if image is not None and len(image):
21 return self.normalize_uri(image[0].get(attr))
22 return ''
24 @staticmethod
25 def _first_select_options(parser, selector, skip_first=True) -> list:
26 options = 'option'
27 if skip_first:
28 options = 'option + option'
29 select = parser.cssselect(selector)
30 if select:
31 return select[0].cssselect(options)
32 return []
34 @classmethod
35 def _images_helper(cls, parser, selector, attr='src', alternative_attr='data-src') -> list:
36 image = parser.cssselect(selector)
37 images = []
38 for i in image:
39 src = i.get(attr) or i.get(alternative_attr)
40 images.append(src.strip(' \r\n\t\0'))
41 return images
43 @classmethod
44 def _idx_to_x2(cls, idx, default=0) -> list:
45 return [
46 str(idx[0]),
47 str(default if len(idx) < 2 or not idx[1] else idx[1])
50 @staticmethod
51 def _join_groups(idx, glue='-') -> str:
52 result = []
53 for i in idx:
54 if i:
55 result.append(i)
56 return glue.join(result)
58 def _get_name(self, selector, url=None) -> str:
59 if url is None:
60 url = self.get_url()
61 return re.search(selector, url).group(1)
63 def _get_content(self, tpl, domain=None, manga_name=None, name=None, **kwargs) -> str:
64 """
65 :param tpl:
66 :param domain:
67 :param manga_name:
68 :param name:
69 :param kwargs:
70 :return:
71 """
72 try:
73 return self.http_get(tpl.format(
74 domain=(domain or self.domain),
75 manga_name=(manga_name or self.manga_name),
76 name=(name or self.name),
77 **kwargs
79 except Exception:
80 return self.http_get(tpl.format(self.domain, self.manga_name))
82 def _base_cookies(self, url=None):
83 if url is None:
84 url = self.get_url()
85 cookies = self.http().get_base_cookies(url)
86 self._storage['cookies'] = cookies.get_dict()
88 def parse_background(self, image) -> str:
89 url = re.search(
90 r'background.+?url\([\'"]?([^\s]+?)[\'"]?\)',
91 image.get('style')
93 return self.normalize_uri(url.group(1))
95 def text_content_full(self, content, selector, idx: int = 0, strip: bool = True) -> Optional[str]:
96 doc = self.document_fromstring(content, selector)
97 if not doc:
98 return None
99 return self.element_text_content_full(doc[idx], strip)
101 def element_text_content_full(self, element, strip: bool = True) -> str:
102 text = element.text_content()
103 if strip:
104 text = text.strip()
105 return text
107 def text_content(self, content, selector, idx: int = 0, strip: bool = True) -> Optional[str]:
108 doc = self.document_fromstring(content, selector)
109 if not doc:
110 return None
111 return self.element_text_content(doc[idx], strip)
113 def element_text_content(self, element, strip: bool = True) -> str:
114 text = element.text
115 if strip:
116 text = text.strip()
117 return text
119 def _download(self, file_name, url, method):
120 # clean file downloader
121 cookies = self._download_cookies or {}
122 headers = self._download_headers or {}
124 now_try_count = 0
125 while now_try_count < 5:
126 with open(file_name, 'wb') as out_file:
127 now_try_count += 1
128 response = get(url, timeout=60, allow_redirects=True, headers=headers, cookies=cookies)
129 if response.status_code >= 400:
130 error('ERROR! Code {}\nUrl: {}'.format(
131 response.status_code,
132 url,
134 sleep(2)
135 continue
136 out_file.write(response.content)
137 response.close()
138 out_file.close()
139 break
141 @staticmethod
142 def _test_url(url: str, path: str = None) -> bool:
143 _path = r'https?://.+?\.\w{2,7}'
144 if path is not None:
145 _path += path
146 _re = re.compile(_path)
147 return _re.search(url) is not None