2 from logging
import error
4 from typing
import Optional
6 from requests
import get
10 _download_cookies
= None
11 _download_headers
= None
13 def _elements(self
, selector
, content
=None) -> list:
15 content
= self
.content
16 return self
.document_fromstring(content
, selector
)
18 def _cover_from_content(self
, selector
, attr
='src') -> str:
19 image
= self
._elements
(selector
)
20 if image
is not None and len(image
):
21 return self
.http().normalize_uri(image
[0].get(attr
))
24 def _first_select_options(parser
, selector
, skip_first
=True) -> list:
27 options
= 'option + option'
28 select
= parser
.cssselect(selector
)
30 return select
[0].cssselect(options
)
34 def _images_helper(cls
, parser
, selector
, attr
='src', alternative_attr
='data-src') -> list:
35 image
= parser
.cssselect(selector
)
38 src
= i
.get(attr
) or i
.get(alternative_attr
)
39 images
.append(src
.strip(' \r\n\t\0'))
43 def _idx_to_x2(cls
, idx
, default
=0) -> list:
46 str(default
if len(idx
) < 2 or not idx
[1] else idx
[1])
50 def _join_groups(idx
, glue
='-') -> str:
55 return glue
.join(result
)
57 def _get_name(self
, selector
, url
=None) -> str:
60 return re
.search(selector
, url
).group(1)
62 def _get_content(self
, tpl
, **kwargs
) -> str:
65 _kw
.setdefault('domain', self
.domain
)
66 _kw
.setdefault('manga_name', self
.manga_name
)
67 return self
.http_get(tpl
.format(**_kw
))
69 return self
.http_get(tpl
.format(self
.domain
, self
.manga_name
))
71 def _base_cookies(self
, url
=None):
74 cookies
= self
.http().get_base_cookies(url
)
75 self
._storage
['cookies'] = cookies
.get_dict()
77 def parse_background(self
, image
) -> str:
79 r
'background.+?url\([\'"]?([^\s]+?)[\'"]?\
)',
82 return self.http().normalize_uri(url.group(1))
84 def text_content(self, content, selector, idx: int = 0, strip: bool = True) -> Optional[str]:
85 doc = self.document_fromstring(content, selector)
88 return self.element_text_content(doc[idx], strip)
90 def element_text_content(self, element, strip: bool = True) -> str:
91 text = element.text_content()
96 def _download(self, file_name, url, method):
97 # clean file downloader
98 cookies = self._download_cookies or {}
99 headers = self._download_headers or {}
102 while now_try_count < 5:
103 with open(file_name, 'wb
') as out_file:
105 response = get(url, timeout=60, allow_redirects=True, headers=headers, cookies=cookies)
106 if response.status_code >= 400:
107 error('ERROR
! Code
{}\nUrl
: {}'.format(
108 response.status_code,
113 out_file.write(response.content)
119 def _test_url(url: str, path: str = None) -> bool:
120 _path = r'https?
://.+?\
.\w
{2,7}'
123 _re = re.compile(_path)
124 return _re.search(url) is not None