2 from logging
import error
4 from typing
import Optional
6 from requests
import get
10 _download_cookies
= None
11 _download_headers
= None
13 def _elements(self
, selector
, content
=None) -> list:
15 content
= self
.content
16 return self
.document_fromstring(content
, selector
)
18 def _cover_from_content(self
, selector
, attr
='src') -> Optional
[str]:
19 image
= self
._elements
(selector
)
20 if image
is not None and len(image
):
21 return self
.normalize_uri(image
[0].get(attr
))
25 def _first_select_options(parser
, selector
, skip_first
=True) -> list:
28 options
= 'option + option'
29 select
= parser
.cssselect(selector
)
31 return select
[0].cssselect(options
)
35 def _images_helper(cls
, parser
, selector
, attr
='src', alternative_attr
='data-src') -> list:
36 image
= parser
.cssselect(selector
)
39 src
= i
.get(attr
) or i
.get(alternative_attr
)
40 images
.append(src
.strip(' \r\n\t\0'))
44 def _idx_to_x2(cls
, idx
, default
=0) -> list:
47 str(default
if len(idx
) < 2 or not idx
[1] else idx
[1])
51 def _join_groups(idx
, glue
='-') -> str:
56 return glue
.join(result
)
58 def _get_name(self
, selector
, url
=None) -> str:
61 return re
.search(selector
, url
).group(1)
63 def _get_content(self
, tpl
, domain
=None, manga_name
=None, name
=None, **kwargs
) -> str:
73 return self
.http_get(tpl
.format(
74 domain
=(domain
or self
.domain
),
75 manga_name
=(manga_name
or self
.manga_name
),
76 name
=(name
or self
.name
),
80 return self
.http_get(tpl
.format(self
.domain
, self
.manga_name
))
82 def _base_cookies(self
, url
=None):
85 cookies
= self
.http().get_base_cookies(url
)
86 self
._storage
['cookies'] = cookies
.get_dict()
88 def parse_background(self
, image
) -> str:
90 r
'background.+?url\([\'"]?([^\s]+?)[\'"]?\
)',
93 return self.normalize_uri(url.group(1))
95 def text_content_full(self, content, selector, idx: int = 0, strip: bool = True) -> Optional[str]:
96 doc = self.document_fromstring(content, selector)
99 return self.element_text_content_full(doc[idx], strip)
101 def element_text_content_full(self, element, strip: bool = True) -> str:
102 text = element.text_content()
107 def text_content(self, content, selector, idx: int = 0, strip: bool = True) -> Optional[str]:
108 doc = self.document_fromstring(content, selector)
111 return self.element_text_content(doc[idx], strip)
113 def element_text_content(self, element, strip: bool = True) -> str:
119 def _download(self, file_name, url, method):
120 # clean file downloader
121 cookies = self._download_cookies or {}
122 headers = self._download_headers or {}
125 while now_try_count < 5:
126 with open(file_name, 'wb
') as out_file:
128 response = get(url, timeout=60, allow_redirects=True, headers=headers, cookies=cookies)
129 if response.status_code >= 400:
130 error('ERROR
! Code
{}\nUrl
: {}'.format(
131 response.status_code,
136 out_file.write(response.content)
142 def _test_url(url: str, path: str = None) -> bool:
143 _path = r'https?
://.+?\
.\w
{2,7}'
146 _re = re.compile(_path)
147 return _re.search(url) is not None