taskcluster/gecko_taskgraph/decision.py

   1 # This Source Code Form is subject to the terms of the Mozilla Public
   2 # License, v. 2.0. If a copy of the MPL was not distributed with this
   3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
   4
   5
   6 import json
   7 import logging
   8 import os
   9 import shutil
  10 import sys
  11 import time
  12 from collections import defaultdict
  13
  14 import yaml
  15 from redo import retry
  16 from taskgraph import create
  17 from taskgraph.create import create_tasks
  18
  19 # TODO: Let standalone taskgraph generate parameters instead of calling internals
  20 from taskgraph.decision import (
  21     _determine_more_accurate_base_ref,
  22     _determine_more_accurate_base_rev,
  23     _get_env_prefix,
  24 )
  25 from taskgraph.generator import TaskGraphGenerator
  26 from taskgraph.parameters import Parameters
  27 from taskgraph.taskgraph import TaskGraph
  28 from taskgraph.util.python_path import find_object
  29 from taskgraph.util.taskcluster import get_artifact
  30 from taskgraph.util.vcs import get_repository
  31 from taskgraph.util.yaml import load_yaml
  32
  33 from . import GECKO
  34 from .actions import render_actions_json
  35 from .files_changed import get_changed_files
  36 from .parameters import get_app_version, get_version
  37 from .try_option_syntax import parse_message
  38 from .util.backstop import BACKSTOP_INDEX, is_backstop
  39 from .util.bugbug import push_schedules
  40 from .util.chunking import resolver
  41 from .util.hg import get_hg_commit_message, get_hg_revision_branch
  42 from .util.partials import populate_release_history
  43 from .util.taskcluster import insert_index
  44 from .util.taskgraph import find_decision_task, find_existing_tasks_from_previous_kinds
  45
  46 logger = logging.getLogger(__name__)
  47
  48 ARTIFACTS_DIR = "artifacts"
  49
  50 # For each project, this gives a set of parameters specific to the project.
  51 # See `taskcluster/docs/parameters.rst` for information on parameters.
  52 PER_PROJECT_PARAMETERS = {
  53     "try": {
  54         "enable_always_target": True,
  55         "target_tasks_method": "try_tasks",
  56         "release_type": "nightly",
  57     },
  58     "kaios-try": {
  59         "target_tasks_method": "try_tasks",
  60     },
  61     "ash": {
  62         "target_tasks_method": "default",
  63     },
  64     "cedar": {
  65         "target_tasks_method": "default",
  66     },
  67     "holly": {
  68         "enable_always_target": True,
  69         "target_tasks_method": "holly_tasks",
  70     },
  71     "oak": {
  72         "target_tasks_method": "default",
  73         "release_type": "nightly-oak",
  74     },
  75     "graphics": {
  76         "target_tasks_method": "graphics_tasks",
  77     },
  78     "autoland": {
  79         "optimize_strategies": "gecko_taskgraph.optimize:project.autoland",
  80         "target_tasks_method": "autoland_tasks",
  81         "test_manifest_loader": "bugbug",  # Remove this line to disable "manifest scheduling".
  82     },
  83     "mozilla-central": {
  84         "target_tasks_method": "mozilla_central_tasks",
  85         "release_type": "nightly",
  86     },
  87     "mozilla-beta": {
  88         "target_tasks_method": "mozilla_beta_tasks",
  89         "release_type": "beta",
  90     },
  91     "mozilla-release": {
  92         "target_tasks_method": "mozilla_release_tasks",
  93         "release_type": "release",
  94     },
  95     "mozilla-esr115": {
  96         "target_tasks_method": "mozilla_esr115_tasks",
  97         "release_type": "esr115",
  98     },
  99     "pine": {
 100         "target_tasks_method": "pine_tasks",
 101         "release_type": "nightly-pine",
 102     },
 103     "larch": {
 104         "target_tasks_method": "larch_tasks",
 105         "release_type": "nightly-larch",
 106     },
 107     "kaios": {
 108         "target_tasks_method": "kaios_tasks",
 109     },
 110     "toolchains": {
 111         "target_tasks_method": "mozilla_central_tasks",
 112     },
 113     # the default parameters are used for projects that do not match above.
 114     "default": {
 115         "target_tasks_method": "default",
 116     },
 117 }
 118
 119
 120 def full_task_graph_to_runnable_jobs(full_task_json):
 121     runnable_jobs = {}
 122     for label, node in full_task_json.items():
 123         if not ("extra" in node["task"] and "treeherder" in node["task"]["extra"]):
 124             continue
 125
 126         th = node["task"]["extra"]["treeherder"]
 127         runnable_jobs[label] = {"symbol": th["symbol"]}
 128
 129         for i in ("groupName", "groupSymbol", "collection"):
 130             if i in th:
 131                 runnable_jobs[label][i] = th[i]
 132         if th.get("machine", {}).get("platform"):
 133             runnable_jobs[label]["platform"] = th["machine"]["platform"]
 134     return runnable_jobs
 135
 136
 137 def full_task_graph_to_manifests_by_task(full_task_json):
 138     manifests_by_task = defaultdict(list)
 139     for label, node in full_task_json.items():
 140         manifests = node["attributes"].get("test_manifests")
 141         if not manifests:
 142             continue
 143
 144         manifests_by_task[label].extend(manifests)
 145     return manifests_by_task
 146
 147
 148 def try_syntax_from_message(message):
 149     """
 150     Parse the try syntax out of a commit message, returning '' if none is
 151     found.
 152     """
 153     try_idx = message.find("try:")
 154     if try_idx == -1:
 155         return ""
 156     return message[try_idx:].split("\n", 1)[0]
 157
 158
 159 def taskgraph_decision(options, parameters=None):
 160     """
 161     Run the decision task.  This function implements `mach taskgraph decision`,
 162     and is responsible for
 163
 164      * processing decision task command-line options into parameters
 165      * running task-graph generation exactly the same way the other `mach
 166        taskgraph` commands do
 167      * generating a set of artifacts to memorialize the graph
 168      * calling TaskCluster APIs to create the graph
 169     """
 170
 171     parameters = parameters or (
 172         lambda graph_config: get_decision_parameters(graph_config, options)
 173     )
 174
 175     decision_task_id = os.environ["TASK_ID"]
 176
 177     # create a TaskGraphGenerator instance
 178     tgg = TaskGraphGenerator(
 179         root_dir=options.get("root"),
 180         parameters=parameters,
 181         decision_task_id=decision_task_id,
 182         write_artifacts=True,
 183     )
 184
 185     if not create.testing:
 186         # set additional index paths for the decision task
 187         set_decision_indexes(decision_task_id, tgg.parameters, tgg.graph_config)
 188
 189     # write out the parameters used to generate this graph
 190     write_artifact("parameters.yml", dict(**tgg.parameters))
 191
 192     # write out the public/actions.json file
 193     write_artifact(
 194         "actions.json",
 195         render_actions_json(tgg.parameters, tgg.graph_config, decision_task_id),
 196     )
 197
 198     # write out the full graph for reference
 199     full_task_json = tgg.full_task_graph.to_json()
 200     write_artifact("full-task-graph.json", full_task_json)
 201
 202     # write out the public/runnable-jobs.json file
 203     write_artifact(
 204         "runnable-jobs.json", full_task_graph_to_runnable_jobs(full_task_json)
 205     )
 206
 207     # write out the public/manifests-by-task.json file
 208     write_artifact(
 209         "manifests-by-task.json.gz",
 210         full_task_graph_to_manifests_by_task(full_task_json),
 211     )
 212
 213     # write out the public/tests-by-manifest.json file
 214     write_artifact("tests-by-manifest.json.gz", resolver.tests_by_manifest)
 215
 216     # this is just a test to check whether the from_json() function is working
 217     _, _ = TaskGraph.from_json(full_task_json)
 218
 219     # write out the target task set to allow reproducing this as input
 220     write_artifact("target-tasks.json", list(tgg.target_task_set.tasks.keys()))
 221
 222     # write out the optimized task graph to describe what will actually happen,
 223     # and the map of labels to taskids
 224     write_artifact("task-graph.json", tgg.morphed_task_graph.to_json())
 225     write_artifact("label-to-taskid.json", tgg.label_to_taskid)
 226
 227     # write bugbug scheduling information if it was invoked
 228     if len(push_schedules) > 0:
 229         write_artifact("bugbug-push-schedules.json", push_schedules.popitem()[1])
 230
 231     # cache run-task & misc/fetch-content
 232     scripts_root_dir = os.path.join(GECKO, "taskcluster/scripts")
 233     run_task_file_path = os.path.join(scripts_root_dir, "run-task")
 234     fetch_content_file_path = os.path.join(scripts_root_dir, "misc/fetch-content")
 235     shutil.copy2(run_task_file_path, ARTIFACTS_DIR)
 236     shutil.copy2(fetch_content_file_path, ARTIFACTS_DIR)
 237
 238     # actually create the graph
 239     create_tasks(
 240         tgg.graph_config,
 241         tgg.morphed_task_graph,
 242         tgg.label_to_taskid,
 243         tgg.parameters,
 244         decision_task_id=decision_task_id,
 245     )
 246
 247
 248 def get_decision_parameters(graph_config, options):
 249     """
 250     Load parameters from the command-line options for 'taskgraph decision'.
 251     This also applies per-project parameters, based on the given project.
 252
 253     """
 254     product_dir = graph_config["product-dir"]
 255
 256     parameters = {
 257         n: options[n]
 258         for n in [
 259             "base_repository",
 260             "base_ref",
 261             "base_rev",
 262             "head_repository",
 263             "head_rev",
 264             "head_ref",
 265             "head_tag",
 266             "project",
 267             "pushlog_id",
 268             "pushdate",
 269             "owner",
 270             "level",
 271             "repository_type",
 272             "target_tasks_method",
 273             "tasks_for",
 274         ]
 275         if n in options
 276     }
 277
 278     commit_message = get_hg_commit_message(os.path.join(GECKO, product_dir))
 279
 280     repo_path = os.getcwd()
 281     repo = get_repository(repo_path)
 282     parameters["base_ref"] = _determine_more_accurate_base_ref(
 283         repo,
 284         candidate_base_ref=options.get("base_ref"),
 285         head_ref=options.get("head_ref"),
 286         base_rev=options.get("base_rev"),
 287     )
 288
 289     parameters["base_rev"] = _determine_more_accurate_base_rev(
 290         repo,
 291         base_ref=parameters["base_ref"],
 292         candidate_base_rev=options.get("base_rev"),
 293         head_rev=options.get("head_rev"),
 294         env_prefix=_get_env_prefix(graph_config),
 295     )
 296
 297     # Define default filter list, as most configurations shouldn't need
 298     # custom filters.
 299     parameters["filters"] = [
 300         "target_tasks_method",
 301     ]
 302     parameters["enable_always_target"] = ["docker-image"]
 303     parameters["existing_tasks"] = {}
 304     parameters["do_not_optimize"] = []
 305     parameters["build_number"] = 1
 306     parameters["version"] = get_version(product_dir)
 307     parameters["app_version"] = get_app_version(product_dir)
 308     parameters["message"] = try_syntax_from_message(commit_message)
 309     parameters["hg_branch"] = get_hg_revision_branch(
 310         GECKO, revision=parameters["head_rev"]
 311     )
 312     parameters["files_changed"] = sorted(
 313         get_changed_files(parameters["head_repository"], parameters["head_rev"])
 314     )
 315     parameters["next_version"] = None
 316     parameters["optimize_strategies"] = None
 317     parameters["optimize_target_tasks"] = True
 318     parameters["phabricator_diff"] = None
 319     parameters["release_type"] = ""
 320     parameters["release_eta"] = ""
 321     parameters["release_enable_partner_repack"] = False
 322     parameters["release_enable_partner_attribution"] = False
 323     parameters["release_partners"] = []
 324     parameters["release_partner_config"] = {}
 325     parameters["release_partner_build_number"] = 1
 326     parameters["release_enable_emefree"] = False
 327     parameters["release_product"] = None
 328     parameters["required_signoffs"] = []
 329     parameters["signoff_urls"] = {}
 330     parameters["test_manifest_loader"] = "default"
 331     parameters["try_mode"] = None
 332     parameters["try_task_config"] = {}
 333     parameters["try_options"] = None
 334
 335     # owner must be an email, but sometimes (e.g., for ffxbld) it is not, in which
 336     # case, fake it
 337     if "@" not in parameters["owner"]:
 338         parameters["owner"] += "@noreply.mozilla.org"
 339
 340     # use the pushdate as build_date if given, else use current time
 341     parameters["build_date"] = parameters["pushdate"] or int(time.time())
 342     # moz_build_date is the build identifier based on build_date
 343     parameters["moz_build_date"] = time.strftime(
 344         "%Y%m%d%H%M%S", time.gmtime(parameters["build_date"])
 345     )
 346
 347     project = parameters["project"]
 348     try:
 349         parameters.update(PER_PROJECT_PARAMETERS[project])
 350     except KeyError:
 351         logger.warning(
 352             "using default project parameters; add {} to "
 353             "PER_PROJECT_PARAMETERS in {} to customize behavior "
 354             "for this project".format(project, __file__)
 355         )
 356         parameters.update(PER_PROJECT_PARAMETERS["default"])
 357
 358     # `target_tasks_method` has higher precedence than `project` parameters
 359     if options.get("target_tasks_method"):
 360         parameters["target_tasks_method"] = options["target_tasks_method"]
 361
 362     # ..but can be overridden by the commit message: if it contains the special
 363     # string "DONTBUILD" and this is an on-push decision task, then use the
 364     # special 'nothing' target task method.
 365     if "DONTBUILD" in commit_message and options["tasks_for"] == "hg-push":
 366         parameters["target_tasks_method"] = "nothing"
 367
 368     if options.get("include_push_tasks"):
 369         get_existing_tasks(options.get("rebuild_kinds", []), parameters, graph_config)
 370
 371     # If the target method is nightly, we should build partials. This means
 372     # knowing what has been released previously.
 373     # An empty release_history is fine, it just means no partials will be built
 374     parameters.setdefault("release_history", dict())
 375     if "nightly" in parameters.get("target_tasks_method", ""):
 376         parameters["release_history"] = populate_release_history("Firefox", project)
 377
 378     if options.get("try_task_config_file"):
 379         task_config_file = os.path.abspath(options.get("try_task_config_file"))
 380     else:
 381         # if try_task_config.json is present, load it
 382         task_config_file = os.path.join(os.getcwd(), "try_task_config.json")
 383
 384     # load try settings
 385     if "try" in project and options["tasks_for"] == "hg-push":
 386         set_try_config(parameters, task_config_file)
 387
 388     if options.get("optimize_target_tasks") is not None:
 389         parameters["optimize_target_tasks"] = options["optimize_target_tasks"]
 390
 391     # Determine if this should be a backstop push.
 392     parameters["backstop"] = is_backstop(parameters)
 393
 394     if "decision-parameters" in graph_config["taskgraph"]:
 395         find_object(graph_config["taskgraph"]["decision-parameters"])(
 396             graph_config, parameters
 397         )
 398
 399     result = Parameters(**parameters)
 400     result.check()
 401     return result
 402
 403
 404 def get_existing_tasks(rebuild_kinds, parameters, graph_config):
 405     """
 406     Find the decision task corresponding to the on-push graph, and return
 407     a mapping of labels to task-ids from it. This will skip the kinds specificed
 408     by `rebuild_kinds`.
 409     """
 410     try:
 411         decision_task = retry(
 412             find_decision_task,
 413             args=(parameters, graph_config),
 414             attempts=4,
 415             sleeptime=5 * 60,
 416         )
 417     except Exception:
 418         logger.exception("Didn't find existing push task.")
 419         sys.exit(1)
 420     _, task_graph = TaskGraph.from_json(
 421         get_artifact(decision_task, "public/full-task-graph.json")
 422     )
 423     parameters["existing_tasks"] = find_existing_tasks_from_previous_kinds(
 424         task_graph, [decision_task], rebuild_kinds
 425     )
 426
 427
 428 def set_try_config(parameters, task_config_file):
 429     if os.path.isfile(task_config_file):
 430         logger.info(f"using try tasks from {task_config_file}")
 431         with open(task_config_file) as fh:
 432             task_config = json.load(fh)
 433         task_config_version = task_config.pop("version", 1)
 434         if task_config_version == 1:
 435             parameters["try_mode"] = "try_task_config"
 436             parameters["try_task_config"] = task_config
 437         elif task_config_version == 2:
 438             parameters.update(task_config["parameters"])
 439             parameters["try_mode"] = "try_task_config"
 440         else:
 441             raise Exception(
 442                 f"Unknown `try_task_config.json` version: {task_config_version}"
 443             )
 444
 445     if "try:" in parameters["message"]:
 446         parameters["try_mode"] = "try_option_syntax"
 447         parameters.update(parse_message(parameters["message"]))
 448     else:
 449         parameters["try_options"] = None
 450
 451
 452 def set_decision_indexes(decision_task_id, params, graph_config):
 453     index_paths = []
 454     if params["backstop"]:
 455         # When two Decision tasks run at nearly the same time, it's possible
 456         # they both end up being backstops if the second checks the backstop
 457         # index before the first inserts it. Insert this index first to reduce
 458         # the chances of that happening.
 459         index_paths.insert(0, BACKSTOP_INDEX)
 460
 461     subs = params.copy()
 462     subs["trust-domain"] = graph_config["trust-domain"]
 463
 464     for index_path in index_paths:
 465         insert_index(index_path.format(**subs), decision_task_id, use_proxy=True)
 466
 467
 468 def write_artifact(filename, data):
 469     logger.info(f"writing artifact file `{filename}`")
 470     if not os.path.isdir(ARTIFACTS_DIR):
 471         os.mkdir(ARTIFACTS_DIR)
 472     path = os.path.join(ARTIFACTS_DIR, filename)
 473     if filename.endswith(".yml"):
 474         with open(path, "w") as f:
 475             yaml.safe_dump(data, f, allow_unicode=True, default_flow_style=False)
 476     elif filename.endswith(".json"):
 477         with open(path, "w") as f:
 478             json.dump(data, f, sort_keys=True, indent=2, separators=(",", ": "))
 479     elif filename.endswith(".json.gz"):
 480         import gzip
 481
 482         with gzip.open(path, "wb") as f:
 483             f.write(json.dumps(data).encode("utf-8"))
 484     else:
 485         raise TypeError(f"Don't know how to write to {filename}")
 486
 487
 488 def read_artifact(filename):
 489     path = os.path.join(ARTIFACTS_DIR, filename)
 490     if filename.endswith(".yml"):
 491         return load_yaml(path, filename)
 492     if filename.endswith(".json"):
 493         with open(path) as f:
 494             return json.load(f)
 495     if filename.endswith(".json.gz"):
 496         import gzip
 497
 498         with gzip.open(path, "rb") as f:
 499             return json.load(f.decode("utf-8"))
 500     else:
 501         raise TypeError(f"Don't know how to read {filename}")
 502
 503
 504 def rename_artifact(src, dest):
 505     os.rename(os.path.join(ARTIFACTS_DIR, src), os.path.join(ARTIFACTS_DIR, dest))