manga_py/providers/helpers/std.py

   1 import re
   2 from logging import error
   3 from time import sleep
   4 from typing import Optional
   5
   6 from requests import get
   7
   8
   9 class Std:
  10     _download_cookies = None
  11     _download_headers = None
  12
  13     def _elements(self, selector, content=None) -> list:
  14         if not content:
  15             content = self.content
  16         return self.document_fromstring(content, selector)
  17
  18     def _cover_from_content(self, selector, attr='src') -> str:
  19         image = self._elements(selector)
  20         if image is not None and len(image):
  21             return self.http().normalize_uri(image[0].get(attr))
  22
  23     @staticmethod
  24     def _first_select_options(parser, selector, skip_first=True) -> list:
  25         options = 'option'
  26         if skip_first:
  27             options = 'option + option'
  28         select = parser.cssselect(selector)
  29         if select:
  30             return select[0].cssselect(options)
  31         return []
  32
  33     @classmethod
  34     def _images_helper(cls, parser, selector, attr='src', alternative_attr='data-src') -> list:
  35         image = parser.cssselect(selector)
  36         images = []
  37         for i in image:
  38             src = i.get(attr) or i.get(alternative_attr)
  39             images.append(src.strip(' \r\n\t\0'))
  40         return images
  41
  42     @classmethod
  43     def _idx_to_x2(cls, idx, default=0) -> list:
  44         return [
  45             str(idx[0]),
  46             str(default if len(idx) < 2 or not idx[1] else idx[1])
  47         ]
  48
  49     @staticmethod
  50     def _join_groups(idx, glue='-') -> str:
  51         result = []
  52         for i in idx:
  53             if i:
  54                 result.append(i)
  55         return glue.join(result)
  56
  57     def _get_name(self, selector, url=None) -> str:
  58         if url is None:
  59             url = self.get_url()
  60         return re.search(selector, url).group(1)
  61
  62     def _get_content(self, tpl, **kwargs) -> str:
  63         try:
  64             _kw = kwargs.copy()
  65             _kw.setdefault('domain', self.domain)
  66             _kw.setdefault('manga_name', self.manga_name)
  67             return self.http_get(tpl.format(**_kw))
  68         except Exception:
  69             return self.http_get(tpl.format(self.domain, self.manga_name))
  70
  71     def _base_cookies(self, url=None):
  72         if url is None:
  73             url = self.get_url()
  74         cookies = self.http().get_base_cookies(url)
  75         self._storage['cookies'] = cookies.get_dict()
  76
  77     def parse_background(self, image) -> str:
  78         url = re.search(
  79             r'background.+?url\([\'"]?([^\s]+?)[\'"]?\)',
  80             image.get('style')
  81         )
  82         return self.http().normalize_uri(url.group(1))
  83
  84     def text_content(self, content, selector, idx: int = 0, strip: bool = True) -> Optional[str]:
  85         doc = self.document_fromstring(content, selector)
  86         if not doc:
  87             return None
  88         return self.element_text_content(doc[idx], strip)
  89
  90     def element_text_content(self, element, strip: bool = True) -> str:
  91         text = element.text_content()
  92         if strip:
  93             text = text.strip()
  94         return text
  95
  96     def _download(self, file_name, url, method):
  97         # clean file downloader
  98         cookies = self._download_cookies or {}
  99         headers = self._download_headers or {}
 100
 101         now_try_count = 0
 102         while now_try_count < 5:
 103             with open(file_name, 'wb') as out_file:
 104                 now_try_count += 1
 105                 response = get(url, timeout=60, allow_redirects=True, headers=headers, cookies=cookies)
 106                 if response.status_code >= 400:
 107                     error('ERROR! Code {}\nUrl: {}'.format(
 108                         response.status_code,
 109                         url,
 110                     ))
 111                     sleep(2)
 112                     continue
 113                 out_file.write(response.content)
 114                 response.close()
 115                 out_file.close()
 116                 break
 117
 118     @staticmethod
 119     def _test_url(url: str, path: str = None) -> bool:
 120         _path = r'https?://.+?\.\w{2,7}'
 121         if path is not None:
 122             _path += path
 123         _re = re.compile(_path)
 124         return _re.search(url) is not None