manga_py/providers/helpers/std.py

   1 import re
   2 from logging import error
   3 from time import sleep
   4 from typing import Optional
   5
   6 from requests import get
   7
   8
   9 class Std:
  10     _download_cookies = None
  11     _download_headers = None
  12
  13     def _elements(self, selector, content=None) -> list:
  14         if not content:
  15             content = self.content
  16         return self.document_fromstring(content, selector)
  17
  18     def _cover_from_content(self, selector, attr='src') -> Optional[str]:
  19         image = self._elements(selector)
  20         if image is not None and len(image):
  21             return self.normalize_uri(image[0].get(attr))
  22         return ''
  23
  24     @staticmethod
  25     def _first_select_options(parser, selector, skip_first=True) -> list:
  26         options = 'option'
  27         if skip_first:
  28             options = 'option + option'
  29         select = parser.cssselect(selector)
  30         if select:
  31             return select[0].cssselect(options)
  32         return []
  33
  34     @classmethod
  35     def _images_helper(cls, parser, selector, attr='src', alternative_attr='data-src') -> list:
  36         image = parser.cssselect(selector)
  37         images = []
  38         for i in image:
  39             src = i.get(attr) or i.get(alternative_attr)
  40             images.append(src.strip(' \r\n\t\0'))
  41         return images
  42
  43     @classmethod
  44     def _idx_to_x2(cls, idx, default=0) -> list:
  45         return [
  46             str(idx[0]),
  47             str(default if len(idx) < 2 or not idx[1] else idx[1])
  48         ]
  49
  50     @staticmethod
  51     def _join_groups(idx, glue='-') -> str:
  52         result = []
  53         for i in idx:
  54             if i:
  55                 result.append(i)
  56         return glue.join(result)
  57
  58     def _get_name(self, selector, url=None) -> str:
  59         if url is None:
  60             url = self.get_url()
  61         return re.search(selector, url).group(1)
  62
  63     def _get_content(self, tpl, domain=None, manga_name=None, name=None, **kwargs) -> str:
  64         """
  65         :param tpl:
  66         :param domain:
  67         :param manga_name:
  68         :param name:
  69         :param kwargs:
  70         :return:
  71         """
  72         try:
  73             return self.http_get(tpl.format(
  74                 domain=(domain or self.domain),
  75                 manga_name=(manga_name or self.manga_name),
  76                 name=(name or self.name),
  77                 **kwargs
  78             ))
  79         except Exception:
  80             return self.http_get(tpl.format(self.domain, self.manga_name))
  81
  82     def _base_cookies(self, url=None):
  83         if url is None:
  84             url = self.get_url()
  85         cookies = self.http().get_base_cookies(url)
  86         self._storage['cookies'] = cookies.get_dict()
  87
  88     def parse_background(self, image) -> str:
  89         url = re.search(
  90             r'background.+?url\([\'"]?([^\s]+?)[\'"]?\)',
  91             image.get('style')
  92         )
  93         return self.normalize_uri(url.group(1))
  94
  95     def text_content_full(self, content, selector, idx: int = 0, strip: bool = True) -> Optional[str]:
  96         doc = self.document_fromstring(content, selector)
  97         if not doc:
  98             return None
  99         return self.element_text_content_full(doc[idx], strip)
 100
 101     def element_text_content_full(self, element, strip: bool = True) -> str:
 102         text = element.text_content()
 103         if strip:
 104             text = text.strip()
 105         return text
 106
 107     def text_content(self, content, selector, idx: int = 0, strip: bool = True) -> Optional[str]:
 108         doc = self.document_fromstring(content, selector)
 109         if not doc:
 110             return None
 111         return self.element_text_content(doc[idx], strip)
 112
 113     def element_text_content(self, element, strip: bool = True) -> str:
 114         text = element.text
 115         if strip:
 116             text = text.strip()
 117         return text
 118
 119     def _download(self, file_name, url, method):
 120         # clean file downloader
 121         cookies = self._download_cookies or {}
 122         headers = self._download_headers or {}
 123
 124         now_try_count = 0
 125         while now_try_count < 5:
 126             with open(file_name, 'wb') as out_file:
 127                 now_try_count += 1
 128                 response = get(url, timeout=60, allow_redirects=True, headers=headers, cookies=cookies)
 129                 if response.status_code >= 400:
 130                     error('ERROR! Code {}\nUrl: {}'.format(
 131                         response.status_code,
 132                         url,
 133                     ))
 134                     sleep(2)
 135                     continue
 136                 out_file.write(response.content)
 137                 response.close()
 138                 out_file.close()
 139                 break
 140
 141     @staticmethod
 142     def _test_url(url: str, path: str = None) -> bool:
 143         _path = r'https?://.+?\.\w{2,7}'
 144         if path is not None:
 145             _path += path
 146         _re = re.compile(_path)
 147         return _re.search(url) is not None