Merge branch 'stable' into devel
[tails.git] / bin / copy-iuks-to-rsync-server-and-verify
blob2d4b9ad6a1868a8f7ed4e5c9b0dd4b3388046746
1 #!/usr/bin/python3
3 import argparse
4 import logging
5 import os
6 import re
7 import subprocess
8 import sys
10 from typing import List
11 from pathlib import Path
12 from urllib.parse import urlparse
13 from urllib.request import Request, urlopen
14 import urllib.error
16 from bs4 import BeautifulSoup  # type: ignore
18 JENKINS_IUKS_BASE_URL = "https://nightly.tails.boum.org/build_IUKs"
19 RSYNC_SERVER_HOSTNAME = "rsync.lizard"
20 LOG_FORMAT = "%(asctime)-15s %(levelname)s %(message)s"
21 log = logging.getLogger()
25 def main():
26     parser = argparse.ArgumentParser(
27         description="Copy IUKs from Jenkins to our rsync server \
28         and verify that they match those built locally")
29     parser.add_argument("--hashes-file",
30                         type=str,
31                         action="store",
32                         required=True)
33     parser.add_argument("--jenkins-build-id",
34                         type=int,
35                         action="store",
36                         required=True)
37     parser.add_argument("--work-dir", type=str, action="store", default=".")
38     parser.add_argument("-q",
39                         "--quiet",
40                         action="store_true",
41                         help="quiet output")
42     parser.add_argument("--debug", action="store_true", help="debug output")
43     parser.add_argument("--skip-sending-hashes-file",
44                         action="store_true",
45                         help="Assume the hashes file was uploaded already")
46     parser.add_argument("--skip-downloading-iuks",
47                         action="store_true",
48                         help="Assume the IUKs were already downloaded")
49     parser.add_argument("--ignore-404",
50                         action="store_true",
51                         default=False,
52                         help="If a IUK is not found, ignore this. " + 
53                         "This is useful when we are maintaining Tails " +
54                         "N.x (N+1).y at the same time")
55     parser.add_argument("--dry-run",
56             action='store_true',
57             default=False,
58             help="Don't change any file on the server; this is mostly for development purposes")
59     args = parser.parse_args()
61     if args.debug:
62         logging.basicConfig(level=logging.DEBUG, format=LOG_FORMAT)
63     elif args.quiet:
64         logging.basicConfig(level=logging.WARN, format=LOG_FORMAT)
65     else:
66         logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)
68     if not Path(args.hashes_file).exists():
69         log.error("%s does not exist", args.hashes_file)
70         sys.exit(1)
72     command = CopyAndVerify(args)
74     if not args.skip_sending_hashes_file:
75         command.send_hashes_file(
76             hashes_file=args.hashes_file,
77             desthost=RSYNC_SERVER_HOSTNAME,
78             destdir=args.work_dir,
79         )
81     if not args.skip_downloading_iuks:
82         command.download_iuks_from_jenkins(
83             hashes_file=args.hashes_file,
84             desthost=RSYNC_SERVER_HOSTNAME,
85             destdir=args.work_dir,
86             jenkins_iuks_base_url=JENKINS_IUKS_BASE_URL,
87             jenkins_build_id=args.jenkins_build_id,
88         )
90     if command.verify_iuks(
91             desthost=RSYNC_SERVER_HOSTNAME,
92             iuks_dir=args.work_dir,
93             hashes_file=Path(args.work_dir, args.hashes_file).name,
94             ) is False:
95         sys.exit(3)
97     if not command.check_success():
98         sys.exit(4)
99     sys.exit(0)
102 class CopyAndVerify:
103     def __init__(self, args):
104         self.dry_run = args.dry_run
105         self.ignore_404 = args.ignore_404
107         self.ignored_404 = 0
109     def run(self, *args, **kwargs):
110         skip_if_dry_run = kwargs.pop('skip_if_dry_run', False)
111         cmd = str(args[0]) if args else str(args)
112         if skip_if_dry_run and self.dry_run:
113             log.info('Would run %s, but --dry-run prevents this',
114                     cmd)
115             return
116         return subprocess.run(*args, **kwargs)
118     def send_hashes_file(self, hashes_file: str, desthost: str, destdir: str) -> None:
119         log.info("Sending %(f)s to %(d)s on %(h)s…" % {
120             "f": hashes_file,
121             "d": destdir,
122             "h": desthost,
123         })
124         self.run(["scp", hashes_file, "%s:%s" % (desthost, destdir)],
125             check=True,
126             skip_if_dry_run=True)
128     def iuks_listed_in(self, hashes_file: str) -> List[str]:
129         with Path(hashes_file).open() as f:
130             lines = f.readlines()
131         return [line.split('  ')[-1].rstrip() for line in lines]
133     def get_jenkins_iuks_urls(self, jenkins_iuks_base_url: str,
134                               jenkins_build_id: int) -> List[str]:
135         urls: List[str] = []
136         source_version_index_url = jenkins_iuks_base_url + \
137             "/configurations/axis-SOURCE_VERSION"
138         for source_version_url in [
139                 source_version_index_url + '/' + link.get('href')
140                 for link in BeautifulSoup(
141                     urlopen(Request(source_version_index_url)),  # nosec B310
142                     'html.parser').find_all(href=re.compile('^[1-9]'))
143         ]:
144             axis_label_index_url = source_version_url + "axis-label_exp/"
145             # When requesting the URL we want the directory list to be
146             # sorted by modification date (C=M) in descending order (O=D)
147             # so we easily can determine the latest IUK build.
148             sort_query = '?C=M&O=D'
149             request_url = axis_label_index_url + sort_query
150             log.debug("Looking at %s", axis_label_index_url)
151             label_urls = [
152                 axis_label_index_url + link.get('href').removesuffix(sort_query)
153                 for link in BeautifulSoup(urlopen(Request(request_url)),  # nosec B310
154                                           'html.parser').find_all(
155                                               href=re.compile('^[a-z]'))
156             ]
157             if len(label_urls) == 0:
158                 log.debug("Found no label URL in %s, ignoring this source version",
159                           axis_label_index_url)
160                 continue
161             # The first element is the latest build given how we sort
162             label_url = label_urls[0]
164             artifacts_index_url = label_url + '/builds/' + str(
165                 jenkins_build_id) + '/archive/'
166             log.debug("Looking at %s", artifacts_index_url)
167             try:
168                 page = BeautifulSoup(urlopen(Request(artifacts_index_url)), 'html.parser')  # nosec B310
169             except urllib.error.HTTPError as exc:
170                 if self.ignore_404 and exc.code == 404:
171                     log.info("Error %d on %s, skipping", exc.code, artifacts_index_url)
172                     self.ignored_404 += 1
173                     continue
174                 raise
175             iuk_urls = [
176                 artifacts_index_url + link.get('href')
177                 for link in
178                 page.find_all(href=re.compile('[.]iuk$'))
179             ]
180             if len(iuk_urls) == 0:
181                 log.debug("Found no IUK URL in %s, ignoring this source version",
182                           artifacts_index_url)
183                 continue
184             if len(iuk_urls) > 1:
185                 log.error("Found too many IUK URLs in %s: %s", artifacts_index_url,
186                           iuk_urls)
187                 sys.exit(1)
188             else:
189                 iuk_url = iuk_urls[0]
190             urls.append(iuk_url)
191         log.debug("Found IUK URLs: %s", urls)
192         return urls
194     def download_iuks_from_jenkins(self, hashes_file: str, desthost: str, destdir: str,
195                                    jenkins_iuks_base_url: str,
196                                    jenkins_build_id: int) -> None:
197         log.info("Downloading IUKs from Jenkins to %s…", desthost)
198         expected_iuks = self.iuks_listed_in(hashes_file)
199         log.debug("IUKS: %s", ', '.join(expected_iuks))
200         jenkins_iuks_urls = self.get_jenkins_iuks_urls(jenkins_iuks_base_url,
201                                                        jenkins_build_id)
202         jenkins_iuks = [
203             os.path.basename(urlparse(url).path) for url in jenkins_iuks_urls
204         ]
205         if set(expected_iuks) != set(jenkins_iuks):
206             log.error(
207                 "Jenkins' set of IUKs differs from local one:\n"
208                 " - locally: %s\n"
209                 " - Jenkins: %s\n",
210                 expected_iuks, jenkins_iuks)
211             sys.exit(1)
212         for iuk_url in jenkins_iuks_urls:
213             log.debug("Downloading %s to %s", iuk_url, destdir)
214             self.run([
215                 "ssh", desthost, "wget", "--quiet", "--no-clobber",
216                 "--directory-prefix=%s" % destdir, iuk_url
217                 ], check=True, skip_if_dry_run=True)
219     def verify_iuks(self, desthost: str, iuks_dir: str, hashes_file: str) -> None:
220         log.info("Verifying that IUKs built on Jenkins match those you've built…")
221         try:
222             self.run([
223                 "ssh", desthost,
224                 "cd '%(d)s' && sha256sum --check --strict '%(f)s'" % {
225                     "d": iuks_dir,
226                     "f": Path(hashes_file).name,
227                     }
228                 ], check=True)
229         except subprocess.CalledProcessError:
230             print("\nERROR: IUKs built on Jenkins don't match yours\n",
231                   file=sys.stderr)
232             return False
233         return True
235     def check_success(self):
236         '''This always returns True, but leaves us some room to define more validation'''
237         if self.ignored_404 > 0:
238             log.warning('%d 404s have been found while looking for IUKs. ' +
239                         'Please inspect the log to be sure that this is correct',
240                         self.ignored_404)
241         return True
244 if __name__ == "__main__":
245     main()