testing/web-platform/interop.py

   1 # This Source Code Form is subject to the terms of the Mozilla Public
   2 # License, v. 2.0. If a copy of the MPL was not distributed with this
   3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
   4
   5 import argparse
   6 import csv
   7 import math
   8 import os
   9 import re
  10 import shutil
  11 import sys
  12 import tempfile
  13 from typing import Callable, Iterable, List, Mapping, Optional, Set, Tuple
  14
  15 repos = ["autoland", "mozilla-central", "try", "mozilla-central", "mozilla-beta", "wpt"]
  16
  17 default_fetch_task_filters = ["-web-platform-tests-|-spidermonkey-"]
  18 default_interop_task_filters = {
  19     "wpt": ["-firefox-"],
  20     None: [
  21         "web-platform-tests",
  22         "linux.*-64",
  23         "/opt",
  24         "!-nofis|-headless|-asan|-tsan|-ccov",
  25     ],
  26 }
  27
  28
  29 def get_parser_fetch_logs() -> argparse.Namespace:
  30     parser = argparse.ArgumentParser()
  31     parser.add_argument(
  32         "--log-dir", action="store", help="Directory into which to download logs"
  33     )
  34     parser.add_argument(
  35         "--task-filter",
  36         dest="task_filters",
  37         action="append",
  38         help="Regex filter applied to task names. Filters starting ! must not match. Filters starting ^ (after any !) match the entire task name, otherwise any substring can match. Multiple filters must all match",
  39     )
  40     parser.add_argument(
  41         "--check-complete",
  42         action="store_true",
  43         help="Only download logs if the task is complete",
  44     )
  45     parser.add_argument(
  46         "commits",
  47         nargs="+",
  48         help="repo:commit e.g. mozilla-central:fae24810aef1 for the runs to include",
  49     )
  50     return parser
  51
  52
  53 def get_parser_interop_score() -> argparse.Namespace:
  54     parser = get_parser_fetch_logs()
  55     parser.add_argument(
  56         "--year",
  57         action="store",
  58         default=2023,
  59         type=int,
  60         help="Interop year to score against",
  61     )
  62     parser.add_argument(
  63         "--category-filter",
  64         action="append",
  65         dest="category_filters",
  66         help="Regex filter applied to category names. Filters starting ! must not match. Filters starting ^ (after any !) match the entire task name, otherwise any substring can match. Multiple filters must all match",
  67     )
  68     parser.add_argument(
  69         "--expected-failures",
  70         help="Path to a file containing a list of tests which are not expected to pass",
  71     )
  72     return parser
  73
  74
  75 def print_scores(
  76     runs: Iterable[Tuple[str, str]],
  77     results_by_category: Mapping[str, List[int]],
  78     expected_failures_by_category: Optional[Mapping[str, List[Tuple[int, int]]]],
  79     include_total: bool,
  80 ):
  81     include_expected_failures = expected_failures_by_category is not None
  82
  83     writer = csv.writer(sys.stdout, delimiter="\t")
  84
  85     headers = ["Category"]
  86     for repo, commit in runs:
  87         prefix = f"{repo}:{commit}"
  88         headers.append(f"{prefix}-score")
  89         if include_expected_failures:
  90             headers.append(f"{prefix}-expected-failures")
  91             headers.append(f"{prefix}-adjusted-score")
  92
  93     writer.writerow(headers)
  94
  95     totals = {"score": [0.0] * len(runs)}
  96     if include_expected_failures:
  97         totals["expected_failures"] = [0.0] * len(runs)
  98         totals["adjusted_score"] = [0.0] * len(runs)
  99
 100     for category, category_results in results_by_category.items():
 101         category_row = []
 102         category_row.append(category)
 103         for category_index, result in enumerate(category_results):
 104             for run_index, run_score in enumerate(category_results):
 105                 category_row.append(f"{run_score / 10:.1f}")
 106                 totals["score"][run_index] += run_score
 107                 if include_expected_failures:
 108                     expected_failures, adjusted_score = expected_failures_by_category[
 109                         category
 110                     ][run_index]
 111                     category_row.append(f"{expected_failures / 10:.1f}")
 112                     category_row.append(f"{adjusted_score / 10:.1f}")
 113                     totals["expected_failures"][run_index] += expected_failures
 114                     totals["adjusted_score"][run_index] += adjusted_score
 115         writer.writerow(category_row)
 116
 117     if include_total:
 118
 119         def get_total(score, floor=True):
 120             total = float(score) / (len(results_by_category))
 121             if floor:
 122                 total = math.floor(total)
 123             total /= 10.0
 124             return total
 125
 126         totals_row = ["Total"]
 127         for i in range(len(runs)):
 128             totals_row.append(f"{get_total(totals['score'][i]):.1f}")
 129             if include_expected_failures:
 130                 totals_row.append(
 131                     f"{get_total(totals['expected_failures'][i], floor=False):.1f}"
 132                 )
 133                 totals_row.append(f"{get_total(totals['adjusted_score'][i]):.1f}")
 134         writer.writerow(totals_row)
 135
 136
 137 def get_wptreports(
 138     repo: str, commit: str, task_filters: List[str], log_dir: str, check_complete: bool
 139 ) -> List[str]:
 140     import tcfetch
 141
 142     return tcfetch.download_artifacts(
 143         repo,
 144         commit,
 145         task_filters=task_filters,
 146         check_complete=check_complete,
 147         out_dir=log_dir,
 148     )
 149
 150
 151 def get_runs(commits: List[str]) -> List[Tuple[str, str]]:
 152     runs = []
 153     for item in commits:
 154         if ":" not in item:
 155             raise ValueError(f"Expected commits of the form repo:commit, got {item}")
 156         repo, commit = item.split(":", 1)
 157         if repo not in repos:
 158             raise ValueError(f"Unsupported repo {repo}")
 159         runs.append((repo, commit))
 160     return runs
 161
 162
 163 def get_category_filter(
 164     category_filters: Optional[List[str]],
 165 ) -> Optional[Callable[[str], bool]]:
 166     if category_filters is None:
 167         return None
 168
 169     filters = []
 170     for item in category_filters:
 171         if not item:
 172             continue
 173         invert = item[0] == "!"
 174         if invert:
 175             item = item[1:]
 176         if item[0] == "^":
 177             regex = re.compile(item)
 178         else:
 179             regex = re.compile(f"^(.*)(?:{item})")
 180         filters.append((regex, invert))
 181
 182     def match_filters(category):
 183         for regex, invert in filters:
 184             matches = regex.match(category) is not None
 185             if invert:
 186                 matches = not matches
 187             if not matches:
 188                 return False
 189         return True
 190
 191     return match_filters
 192
 193
 194 def fetch_logs(
 195     commits: List[str],
 196     task_filters: List[str],
 197     log_dir: Optional[str],
 198     check_complete: bool,
 199     **kwargs,
 200 ):
 201     runs = get_runs(commits)
 202
 203     if not task_filters:
 204         task_filters = default_fetch_task_filters
 205
 206     if log_dir is None:
 207         log_dir = os.path.abspath(os.curdir)
 208
 209     for repo, commit in runs:
 210         get_wptreports(repo, commit, task_filters, log_dir, check_complete)
 211
 212
 213 def get_expected_failures(path: str) -> Mapping[str, Set[Optional[str]]]:
 214     expected_failures = {}
 215     with open(path) as f:
 216         for i, entry in enumerate(csv.reader(f)):
 217             entry = [item.strip() for item in entry]
 218             if not any(item for item in entry) or entry[0][0] == "#":
 219                 continue
 220             if len(entry) > 2:
 221                 raise ValueError(
 222                     f"{path}:{i+1} expected at most two columns, got {len(entry)}"
 223                 )
 224             if entry[0][0] != "/":
 225                 raise ValueError(
 226                     f'{path}:{i+1} "{entry[0]}" is not a valid test id (must start with "/")'
 227                 )
 228             test_id = entry[0]
 229             if test_id not in expected_failures:
 230                 expected_failures[test_id] = set()
 231             if len(entry) == 2:
 232                 subtest_id = entry[1]
 233                 if subtest_id == "":
 234                     print(
 235                         f"Warning: {path}:{i+1} got empty string subtest id, remove the trailing comma to make this apply to the full test"
 236                     )
 237                 expected_failures[test_id].add(subtest_id)
 238             else:
 239                 expected_failures[test_id].add(None)
 240     return expected_failures
 241
 242
 243 def score_runs(
 244     commits: List[str],
 245     task_filters: List[str],
 246     log_dir: Optional[str],
 247     year: int,
 248     check_complete: bool,
 249     category_filters: Optional[List[str]],
 250     expected_failures: Optional[str],
 251     **kwargs,
 252 ):
 253     from wpt_interop import score
 254
 255     runs = get_runs(commits)
 256
 257     temp_dir = None
 258     if log_dir is None:
 259         temp_dir = tempfile.mkdtemp()
 260         log_dir = temp_dir
 261
 262     try:
 263         if expected_failures is not None:
 264             expected_failures_data = get_expected_failures(expected_failures)
 265         else:
 266             expected_failures_data = None
 267
 268         run_logs = []
 269         for repo, commit in runs:
 270             if not task_filters:
 271                 if repo in default_interop_task_filters:
 272                     filters = default_interop_task_filters[repo]
 273                 else:
 274                     filters = default_interop_task_filters[None]
 275             else:
 276                 filters = task_filters
 277
 278             log_paths = get_wptreports(repo, commit, filters, log_dir, check_complete)
 279             if not log_paths:
 280                 print(f"Failed to get any logs for {repo}:{commit}", file=sys.stderr)
 281             else:
 282                 run_logs.append(log_paths)
 283
 284         if not run_logs:
 285             print("No logs to process", file=sys.stderr)
 286
 287         include_total = category_filters is None
 288
 289         category_filter = (
 290             get_category_filter(category_filters) if category_filters else None
 291         )
 292
 293         scores, expected_failure_scores = score.score_wptreports(
 294             run_logs,
 295             year=year,
 296             category_filter=category_filter,
 297             expected_failures=expected_failures_data,
 298         )
 299         print_scores(runs, scores, expected_failure_scores, include_total)
 300     finally:
 301         if temp_dir is not None:
 302             shutil.rmtree(temp_dir, True)