taskcluster/taskgraph/decision.py

   1 # -*- coding: utf-8 -*-
   2 # This Source Code Form is subject to the terms of the Mozilla Public
   3 # License, v. 2.0. If a copy of the MPL was not distributed with this
   4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
   5
   6 from __future__ import absolute_import, print_function, unicode_literals
   7
   8 import os
   9 import json
  10 import logging
  11 import time
  12 import sys
  13 from collections import defaultdict
  14
  15 import six
  16 from six import text_type
  17 from redo import retry
  18 import yaml
  19
  20 from . import GECKO
  21 from .actions import render_actions_json
  22 from .create import create_tasks
  23 from .generator import TaskGraphGenerator
  24 from .parameters import Parameters, get_version, get_app_version
  25 from .taskgraph import TaskGraph
  26 from taskgraph.util.python_path import find_object
  27 from .try_option_syntax import parse_message
  28 from .util.backstop import is_backstop
  29 from .util.bugbug import push_schedules
  30 from .util.chunking import resolver
  31 from .util.hg import get_hg_revision_branch, get_hg_commit_message
  32 from .util.partials import populate_release_history
  33 from .util.schema import validate_schema, Schema
  34 from .util.taskcluster import get_artifact, insert_index
  35 from .util.taskgraph import find_decision_task, find_existing_tasks_from_previous_kinds
  36 from .util.yaml import load_yaml
  37 from voluptuous import Required, Optional, Any
  38
  39
  40 logger = logging.getLogger(__name__)
  41
  42 ARTIFACTS_DIR = "artifacts"
  43
  44 # For each project, this gives a set of parameters specific to the project.
  45 # See `taskcluster/docs/parameters.rst` for information on parameters.
  46 PER_PROJECT_PARAMETERS = {
  47     "try": {
  48         "target_tasks_method": "try_tasks",
  49     },
  50     "try-comm-central": {
  51         "target_tasks_method": "try_tasks",
  52     },
  53     "kaios-try": {
  54         "target_tasks_method": "try_tasks",
  55     },
  56     "ash": {
  57         "target_tasks_method": "default",
  58     },
  59     "cedar": {
  60         "target_tasks_method": "default",
  61     },
  62     "oak": {
  63         "target_tasks_method": "nightly_desktop",
  64         "release_type": "nightly-oak",
  65     },
  66     "graphics": {
  67         "target_tasks_method": "graphics_tasks",
  68     },
  69     "autoland": {
  70         "optimize_strategies": "taskgraph.optimize:project.autoland",
  71         "target_tasks_method": "autoland_tasks",
  72         "test_manifest_loader": "bugbug",  # Remove this line to disable "manifest scheduling".
  73     },
  74     "mozilla-central": {
  75         "target_tasks_method": "mozilla_central_tasks",
  76         "release_type": "nightly",
  77     },
  78     "mozilla-beta": {
  79         "target_tasks_method": "mozilla_beta_tasks",
  80         "release_type": "beta",
  81     },
  82     "mozilla-release": {
  83         "target_tasks_method": "mozilla_release_tasks",
  84         "release_type": "release",
  85     },
  86     "mozilla-esr78": {
  87         "target_tasks_method": "mozilla_esr78_tasks",
  88         "release_type": "esr78",
  89     },
  90     "comm-central": {
  91         "target_tasks_method": "default",
  92         "release_type": "nightly",
  93     },
  94     "comm-beta": {
  95         "target_tasks_method": "mozilla_beta_tasks",
  96         "release_type": "beta",
  97     },
  98     "comm-esr78": {
  99         "target_tasks_method": "mozilla_esr78_tasks",
 100         "release_type": "release",
 101     },
 102     "pine": {
 103         "target_tasks_method": "pine_tasks",
 104     },
 105     "kaios": {
 106         "target_tasks_method": "kaios_tasks",
 107     },
 108     # the default parameters are used for projects that do not match above.
 109     "default": {
 110         "target_tasks_method": "default",
 111     },
 112 }
 113
 114 try_task_config_schema = Schema(
 115     {
 116         Required("tasks"): [text_type],
 117         Optional("browsertime"): bool,
 118         Optional("chemspill-prio"): bool,
 119         Optional("disable-pgo"): bool,
 120         Optional("env"): {text_type: text_type},
 121         Optional("gecko-profile"): bool,
 122         Optional(
 123             "perftest-options",
 124             description="Options passed from `mach perftest` to try.",
 125         ): object,
 126         Optional(
 127             "optimize-strategies",
 128             description="Alternative optimization strategies to use instead of the default. "
 129             "A module path pointing to a dict to be use as the `strategy_override` "
 130             "argument in `taskgraph.optimize.optimize_task_graph`.",
 131         ): text_type,
 132         Optional("rebuild"): int,
 133         Optional("tasks-regex"): {
 134             "include": Any(None, [text_type]),
 135             "exclude": Any(None, [text_type]),
 136         },
 137         Optional("use-artifact-builds"): bool,
 138         Optional(
 139             "worker-overrides",
 140             description="Mapping of worker alias to worker pools to use for those aliases.",
 141         ): {text_type: text_type},
 142         Optional("routes"): [text_type],
 143     }
 144 )
 145 """
 146 Schema for try_task_config.json files.
 147 """
 148
 149 try_task_config_schema_v2 = Schema(
 150     {
 151         Optional("parameters"): {text_type: object},
 152     }
 153 )
 154
 155
 156 def full_task_graph_to_runnable_jobs(full_task_json):
 157     runnable_jobs = {}
 158     for label, node in six.iteritems(full_task_json):
 159         if not ("extra" in node["task"] and "treeherder" in node["task"]["extra"]):
 160             continue
 161
 162         th = node["task"]["extra"]["treeherder"]
 163         runnable_jobs[label] = {"symbol": th["symbol"]}
 164
 165         for i in ("groupName", "groupSymbol", "collection"):
 166             if i in th:
 167                 runnable_jobs[label][i] = th[i]
 168         if th.get("machine", {}).get("platform"):
 169             runnable_jobs[label]["platform"] = th["machine"]["platform"]
 170     return runnable_jobs
 171
 172
 173 def full_task_graph_to_manifests_by_task(full_task_json):
 174     manifests_by_task = defaultdict(list)
 175     for label, node in six.iteritems(full_task_json):
 176         manifests = node["attributes"].get("test_manifests")
 177         if not manifests:
 178             continue
 179
 180         manifests_by_task[label].extend(manifests)
 181     return manifests_by_task
 182
 183
 184 def try_syntax_from_message(message):
 185     """
 186     Parse the try syntax out of a commit message, returning '' if none is
 187     found.
 188     """
 189     try_idx = message.find("try:")
 190     if try_idx == -1:
 191         return ""
 192     return message[try_idx:].split("\n", 1)[0]
 193
 194
 195 def taskgraph_decision(options, parameters=None):
 196     """
 197     Run the decision task.  This function implements `mach taskgraph decision`,
 198     and is responsible for
 199
 200      * processing decision task command-line options into parameters
 201      * running task-graph generation exactly the same way the other `mach
 202        taskgraph` commands do
 203      * generating a set of artifacts to memorialize the graph
 204      * calling TaskCluster APIs to create the graph
 205     """
 206
 207     parameters = parameters or (
 208         lambda graph_config: get_decision_parameters(graph_config, options)
 209     )
 210
 211     decision_task_id = os.environ["TASK_ID"]
 212
 213     # create a TaskGraphGenerator instance
 214     tgg = TaskGraphGenerator(
 215         root_dir=options.get("root"),
 216         parameters=parameters,
 217         decision_task_id=decision_task_id,
 218         write_artifacts=True,
 219     )
 220
 221     # set additional index paths for the decision task
 222     set_decision_indexes(decision_task_id, tgg.parameters, tgg.graph_config)
 223
 224     # write out the parameters used to generate this graph
 225     write_artifact("parameters.yml", dict(**tgg.parameters))
 226
 227     # write out the public/actions.json file
 228     write_artifact(
 229         "actions.json",
 230         render_actions_json(tgg.parameters, tgg.graph_config, decision_task_id),
 231     )
 232
 233     # write out the full graph for reference
 234     full_task_json = tgg.full_task_graph.to_json()
 235     write_artifact("full-task-graph.json", full_task_json)
 236
 237     # write out the public/runnable-jobs.json file
 238     write_artifact(
 239         "runnable-jobs.json", full_task_graph_to_runnable_jobs(full_task_json)
 240     )
 241
 242     # write out the public/manifests-by-task.json file
 243     write_artifact(
 244         "manifests-by-task.json.gz",
 245         full_task_graph_to_manifests_by_task(full_task_json),
 246     )
 247
 248     # write out the public/tests-by-manifest.json file
 249     write_artifact("tests-by-manifest.json.gz", resolver.tests_by_manifest)
 250
 251     # this is just a test to check whether the from_json() function is working
 252     _, _ = TaskGraph.from_json(full_task_json)
 253
 254     # write out the target task set to allow reproducing this as input
 255     write_artifact("target-tasks.json", list(tgg.target_task_set.tasks.keys()))
 256
 257     # write out the optimized task graph to describe what will actually happen,
 258     # and the map of labels to taskids
 259     write_artifact("task-graph.json", tgg.morphed_task_graph.to_json())
 260     write_artifact("label-to-taskid.json", tgg.label_to_taskid)
 261
 262     # write bugbug scheduling information if it was invoked
 263     if len(push_schedules) > 0:
 264         write_artifact("bugbug-push-schedules.json", push_schedules.popitem()[1])
 265
 266     # actually create the graph
 267     create_tasks(
 268         tgg.graph_config,
 269         tgg.morphed_task_graph,
 270         tgg.label_to_taskid,
 271         tgg.parameters,
 272         decision_task_id=decision_task_id,
 273     )
 274
 275
 276 def get_decision_parameters(graph_config, options):
 277     """
 278     Load parameters from the command-line options for 'taskgraph decision'.
 279     This also applies per-project parameters, based on the given project.
 280
 281     """
 282     product_dir = graph_config["product-dir"]
 283
 284     parameters = {
 285         n: options[n]
 286         for n in [
 287             "base_repository",
 288             "head_repository",
 289             "head_rev",
 290             "head_ref",
 291             "project",
 292             "pushlog_id",
 293             "pushdate",
 294             "owner",
 295             "level",
 296             "target_tasks_method",
 297             "tasks_for",
 298         ]
 299         if n in options
 300     }
 301
 302     for n in (
 303         "comm_base_repository",
 304         "comm_head_repository",
 305         "comm_head_rev",
 306         "comm_head_ref",
 307     ):
 308         if n in options and options[n] is not None:
 309             parameters[n] = options[n]
 310
 311     commit_message = get_hg_commit_message(os.path.join(GECKO, product_dir))
 312
 313     # Define default filter list, as most configurations shouldn't need
 314     # custom filters.
 315     parameters["filters"] = [
 316         "target_tasks_method",
 317     ]
 318     parameters["existing_tasks"] = {}
 319     parameters["do_not_optimize"] = []
 320     parameters["build_number"] = 1
 321     parameters["version"] = get_version(product_dir)
 322     parameters["app_version"] = get_app_version(product_dir)
 323     parameters["message"] = try_syntax_from_message(commit_message)
 324     parameters["hg_branch"] = get_hg_revision_branch(
 325         GECKO, revision=parameters["head_rev"]
 326     )
 327     parameters["next_version"] = None
 328     parameters["optimize_strategies"] = None
 329     parameters["optimize_target_tasks"] = True
 330     parameters["phabricator_diff"] = None
 331     parameters["release_type"] = ""
 332     parameters["release_eta"] = ""
 333     parameters["release_enable_partner_repack"] = False
 334     parameters["release_enable_partner_attribution"] = False
 335     parameters["release_partners"] = []
 336     parameters["release_partner_config"] = {}
 337     parameters["release_partner_build_number"] = 1
 338     parameters["release_enable_emefree"] = False
 339     parameters["release_product"] = None
 340     parameters["required_signoffs"] = []
 341     parameters["signoff_urls"] = {}
 342     parameters["test_manifest_loader"] = "default"
 343     parameters["try_mode"] = None
 344     parameters["try_task_config"] = {}
 345     parameters["try_options"] = None
 346
 347     # owner must be an email, but sometimes (e.g., for ffxbld) it is not, in which
 348     # case, fake it
 349     if "@" not in parameters["owner"]:
 350         parameters["owner"] += "@noreply.mozilla.org"
 351
 352     # use the pushdate as build_date if given, else use current time
 353     parameters["build_date"] = parameters["pushdate"] or int(time.time())
 354     # moz_build_date is the build identifier based on build_date
 355     parameters["moz_build_date"] = six.ensure_text(
 356         time.strftime("%Y%m%d%H%M%S", time.gmtime(parameters["build_date"]))
 357     )
 358
 359     project = parameters["project"]
 360     try:
 361         parameters.update(PER_PROJECT_PARAMETERS[project])
 362     except KeyError:
 363         logger.warning(
 364             "using default project parameters; add {} to "
 365             "PER_PROJECT_PARAMETERS in {} to customize behavior "
 366             "for this project".format(project, __file__)
 367         )
 368         parameters.update(PER_PROJECT_PARAMETERS["default"])
 369
 370     # `target_tasks_method` has higher precedence than `project` parameters
 371     if options.get("target_tasks_method"):
 372         parameters["target_tasks_method"] = options["target_tasks_method"]
 373
 374     # ..but can be overridden by the commit message: if it contains the special
 375     # string "DONTBUILD" and this is an on-push decision task, then use the
 376     # special 'nothing' target task method.
 377     if "DONTBUILD" in commit_message and options["tasks_for"] == "hg-push":
 378         parameters["target_tasks_method"] = "nothing"
 379
 380     if options.get("include_push_tasks"):
 381         get_existing_tasks(options.get("rebuild_kinds", []), parameters, graph_config)
 382
 383     # If the target method is nightly, we should build partials. This means
 384     # knowing what has been released previously.
 385     # An empty release_history is fine, it just means no partials will be built
 386     parameters.setdefault("release_history", dict())
 387     if "nightly" in parameters.get("target_tasks_method", ""):
 388         parameters["release_history"] = populate_release_history("Firefox", project)
 389
 390     if options.get("try_task_config_file"):
 391         task_config_file = os.path.abspath(options.get("try_task_config_file"))
 392     else:
 393         # if try_task_config.json is present, load it
 394         task_config_file = os.path.join(os.getcwd(), "try_task_config.json")
 395
 396     # load try settings
 397     if "try" in project and options["tasks_for"] == "hg-push":
 398         set_try_config(parameters, task_config_file)
 399
 400     if options.get("optimize_target_tasks") is not None:
 401         parameters["optimize_target_tasks"] = options["optimize_target_tasks"]
 402
 403     if "decision-parameters" in graph_config["taskgraph"]:
 404         find_object(graph_config["taskgraph"]["decision-parameters"])(
 405             graph_config, parameters
 406         )
 407
 408     # Determine if this should be a backstop push.
 409     parameters["backstop"] = is_backstop(parameters)
 410
 411     result = Parameters(**parameters)
 412     result.check()
 413     return result
 414
 415
 416 def get_existing_tasks(rebuild_kinds, parameters, graph_config):
 417     """
 418     Find the decision task corresponding to the on-push graph, and return
 419     a mapping of labels to task-ids from it. This will skip the kinds specificed
 420     by `rebuild_kinds`.
 421     """
 422     try:
 423         decision_task = retry(
 424             find_decision_task,
 425             args=(parameters, graph_config),
 426             attempts=4,
 427             sleeptime=5 * 60,
 428         )
 429     except Exception:
 430         logger.exception("Didn't find existing push task.")
 431         sys.exit(1)
 432     _, task_graph = TaskGraph.from_json(
 433         get_artifact(decision_task, "public/full-task-graph.json")
 434     )
 435     parameters["existing_tasks"] = find_existing_tasks_from_previous_kinds(
 436         task_graph, [decision_task], rebuild_kinds
 437     )
 438
 439
 440 def set_try_config(parameters, task_config_file):
 441     if os.path.isfile(task_config_file):
 442         logger.info("using try tasks from {}".format(task_config_file))
 443         with open(task_config_file, "r") as fh:
 444             task_config = json.load(fh)
 445         task_config_version = task_config.pop("version", 1)
 446         if task_config_version == 1:
 447             validate_schema(
 448                 try_task_config_schema,
 449                 task_config,
 450                 "Invalid v1 `try_task_config.json`.",
 451             )
 452             parameters["try_mode"] = "try_task_config"
 453             parameters["try_task_config"] = task_config
 454         elif task_config_version == 2:
 455             validate_schema(
 456                 try_task_config_schema_v2,
 457                 task_config,
 458                 "Invalid v2 `try_task_config.json`.",
 459             )
 460             parameters.update(task_config["parameters"])
 461             return
 462         else:
 463             raise Exception(
 464                 "Unknown `try_task_config.json` version: {}".format(task_config_version)
 465             )
 466
 467     if "try:" in parameters["message"]:
 468         parameters["try_mode"] = "try_option_syntax"
 469         parameters.update(parse_message(parameters["message"]))
 470     else:
 471         parameters["try_options"] = None
 472
 473     if parameters["try_mode"] == "try_task_config":
 474         # The user has explicitly requested a set of jobs, so run them all
 475         # regardless of optimization.  Their dependencies can be optimized,
 476         # though.
 477         parameters["optimize_target_tasks"] = False
 478     else:
 479         # For a try push with no task selection, apply the default optimization
 480         # process to all of the tasks.
 481         parameters["optimize_target_tasks"] = True
 482
 483
 484 def set_decision_indexes(decision_task_id, params, graph_config):
 485     index_paths = []
 486     if params["backstop"]:
 487         index_paths.append("{trust-domain}.v2.{project}.latest.taskgraph.backstop")
 488
 489     subs = params.copy()
 490     subs["trust-domain"] = graph_config["trust-domain"]
 491
 492     index_paths = [i.format(**subs) for i in index_paths]
 493     for index_path in index_paths:
 494         insert_index(index_path, decision_task_id, use_proxy=True)
 495
 496
 497 def write_artifact(filename, data):
 498     logger.info("writing artifact file `{}`".format(filename))
 499     if not os.path.isdir(ARTIFACTS_DIR):
 500         os.mkdir(ARTIFACTS_DIR)
 501     path = os.path.join(ARTIFACTS_DIR, filename)
 502     if filename.endswith(".yml"):
 503         with open(path, "w") as f:
 504             yaml.safe_dump(data, f, allow_unicode=True, default_flow_style=False)
 505     elif filename.endswith(".json"):
 506         with open(path, "w") as f:
 507             json.dump(data, f, sort_keys=True, indent=2, separators=(",", ": "))
 508     elif filename.endswith(".json.gz"):
 509         import gzip
 510
 511         with gzip.open(path, "wb") as f:
 512             f.write(json.dumps(data).encode("utf-8"))
 513     else:
 514         raise TypeError("Don't know how to write to {}".format(filename))
 515
 516
 517 def read_artifact(filename):
 518     path = os.path.join(ARTIFACTS_DIR, filename)
 519     if filename.endswith(".yml"):
 520         return load_yaml(path, filename)
 521     elif filename.endswith(".json"):
 522         with open(path, "r") as f:
 523             return json.load(f)
 524     elif filename.endswith(".json.gz"):
 525         import gzip
 526
 527         with gzip.open(path, "rb") as f:
 528             return json.load(f.decode("utf-8"))
 529     else:
 530         raise TypeError("Don't know how to read {}".format(filename))
 531
 532
 533 def rename_artifact(src, dest):
 534     os.rename(os.path.join(ARTIFACTS_DIR, src), os.path.join(ARTIFACTS_DIR, dest))