Prvotní pokusy, stahujeme dětské pořady
[rozhlaspirat.git] / detske_porady.py
blob12f62ffda8cba7ba1b1b30d76034bfb49bd0cfd6
1 #!/usr/bin/env python3
3 # viz dokumentaci k api https://rapidoc.croapp.cz/
5 import requests
6 import logging
7 import io
8 import os
10 import mutagen
11 from mutagen.mp3 import MP3, EasyMP3
12 from mutagen.easyid3 import EasyID3
13 from mutagen.id3 import ID3
14 from mutagen.apev2 import APEv2, APEv2File, APENoHeaderError
16 class SessionL(requests.Session): # requests Session s automatickým logováním GET požadavků
17 def get_logged(self, url, *aargs, **kwargs):
18 logging.info("GET " + url)
19 try:
20 r = self.get(url, *aargs, **kwargs)
21 except requests.exceptions.RequestException as e:
22 logging.warn('Chyba spojení, zkouším znova...')
23 print(aargs)
24 time.sleep(prodleva*5)
25 r = self.get(url, *aargs, **kwargs)
26 logging.info(r.status_code)
27 return r
29 def vyžer_vše(self, url, *aargs, **kwargs):
30 r = self.get_logged(url, *aargs, **kwargs)
31 rj = r.json()
32 data = rj['data']
33 meta = rj['meta']
34 try:
35 while rj['links']['next'] is not None:
36 r = self.get_logged(rj['links']['next'], *aargs, **kwargs)
37 rj = r.json()
38 data.extend(rj['data'])
39 except KeyError:
40 pass
41 return {'meta': meta, 'data': data}
43 headers={'user-agent':'Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0',
44 'accept-language':'cs',
45 'cache-control':'max-age=0',
46 'dnt':'1',
47 'upgrade-insecure-requests':'1',
48 'accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
51 s = SessionL()
52 s.headers.update(headers)
53 s.verify=False # kašlem na ověření TSL certifikátu
54 requests.urllib3.disable_warnings() # fujka
56 # Zde je natvrdo link na dětské pořady, ostatní možnosti viz na adrese https://api.mujrozhlas.cz/episodes/
57 episody = s.vyžer_vše("https://api.mujrozhlas.cz/topics/8ed08518-9d92-437c-a96f-e5df046aff4e/episodes")
59 # složky napevno napraseny
60 prefix = '.'
61 dir_origfilename = os.path.join(prefix, 'by_origfilename')
62 #dir_ep_uuid = os.path.join(prefix, 'by_episode_id')
63 dir_human = os.path.join(prefix, 'pořady')
64 dir_meta = os.path.join(prefix, 'meta')
65 os.makedirs(dir_origfilename, exist_ok = True)
66 #os.makedirs(dir_ep_uuid, exist_ok = True)
67 os.makedirs(dir_human, exist_ok = True)
68 os.makedirs(dir_meta, exist_ok = True)
70 for ep in episody['data']:
71 al = ep['attributes']['audioLinks']
72 if len(al) > 1:
73 logging.warn("Nalezeno víc audio odkazů, používám první" + str(al))
74 al_url = al[0]['url']
75 orig_filename = al_url.split('/')[-1]
76 dlpath = os.path.join(dir_origfilename, orig_filename)
77 metapath = os.path.join(dir_meta, orig_filename + '.json')
78 if os.path.exists(metapath):
79 logging.info(metapath + " existuje, už zjevně staženo")
80 continue
81 r = s.get_logged(al_url)
82 if r.status_code != 200:
83 logging.warn("Divná odpověď %d, ignoruji a jedu dál (%s)" % (r.status_code, al_url))
84 continue
85 with open(dlpath, 'wb') as fil:
86 fil.write(r.content)
87 id3 = EasyMP3(dlpath)
88 id3tags = dict(id3)
89 id3.clear()
90 id3.save(dlpath)
91 try:
92 ape = APEv2File(dlpath)
93 except APENoHeaderError:
94 ape = APEv2File()
95 title = ep['attributes']['shortTitle']
96 ape['Title'] = title
97 part = ep['attributes']['part']
98 ape['Track'] = str(part)
99 try:
100 ape['TotalParts'] = str(ep['attributes']['mirroredSerial']['totalParts'])
101 except KeyError:
102 pass
103 show = ep['attributes']['mirroredShow']['title']
104 ape['Album'] = show
105 ape['ShowUrl'] = ep['relationships']['show']['links']['related']
106 ape['EpisodeId'] = ep['id']
107 ape['File'] = al_url
108 for key, val in id3tags.items():
109 ape['id3_'+key] = val
110 ape.save(dlpath)
111 targetdir = os.path.join(dir_human, show, title) if part is not None else(
112 os.path.join(dir_human, show) )
113 os.makedirs(targetdir, exist_ok=True)
114 targetpath = os.path.join(targetdir, '%s_%d.mp3'%(title, part) if part is not None else '%s.mp3'%(title,))
115 os.link(dlpath, targetpath)
116 # Nakonec si uložíme metadat (čímž taky budeme vědět, že jsme si danou episodu už stáhli)
117 requests.compat.json.dump(ep, open(metapath, 'w'), indent=' ')