Bug 1890689 accumulate input in LargerReceiverBlockSizeThanDesiredBuffering GTest...
[gecko.git] / taskcluster / gecko_taskgraph / decision.py
blob9fd8a5b5c1854fafea31f4b26fbdcea825891f42
1 # This Source Code Form is subject to the terms of the Mozilla Public
2 # License, v. 2.0. If a copy of the MPL was not distributed with this
3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 import json
7 import logging
8 import os
9 import shutil
10 import sys
11 import time
12 from collections import defaultdict
14 import yaml
15 from redo import retry
16 from taskgraph import create
17 from taskgraph.create import create_tasks
19 # TODO: Let standalone taskgraph generate parameters instead of calling internals
20 from taskgraph.decision import (
21 _determine_more_accurate_base_ref,
22 _determine_more_accurate_base_rev,
23 _get_env_prefix,
25 from taskgraph.generator import TaskGraphGenerator
26 from taskgraph.parameters import Parameters
27 from taskgraph.taskgraph import TaskGraph
28 from taskgraph.util.python_path import find_object
29 from taskgraph.util.taskcluster import get_artifact
30 from taskgraph.util.vcs import get_repository
31 from taskgraph.util.yaml import load_yaml
33 from . import GECKO
34 from .actions import render_actions_json
35 from .files_changed import get_changed_files
36 from .parameters import get_app_version, get_version
37 from .try_option_syntax import parse_message
38 from .util.backstop import BACKSTOP_INDEX, is_backstop
39 from .util.bugbug import push_schedules
40 from .util.chunking import resolver
41 from .util.hg import get_hg_commit_message, get_hg_revision_branch
42 from .util.partials import populate_release_history
43 from .util.taskcluster import insert_index
44 from .util.taskgraph import find_decision_task, find_existing_tasks_from_previous_kinds
46 logger = logging.getLogger(__name__)
48 ARTIFACTS_DIR = "artifacts"
50 # For each project, this gives a set of parameters specific to the project.
51 # See `taskcluster/docs/parameters.rst` for information on parameters.
52 PER_PROJECT_PARAMETERS = {
53 "try": {
54 "enable_always_target": True,
55 "target_tasks_method": "try_tasks",
56 "release_type": "nightly",
58 "kaios-try": {
59 "target_tasks_method": "try_tasks",
61 "ash": {
62 "target_tasks_method": "default",
64 "cedar": {
65 "target_tasks_method": "default",
67 "holly": {
68 "enable_always_target": True,
69 "target_tasks_method": "holly_tasks",
71 "oak": {
72 "target_tasks_method": "default",
73 "release_type": "nightly-oak",
75 "graphics": {
76 "target_tasks_method": "graphics_tasks",
78 "autoland": {
79 "optimize_strategies": "gecko_taskgraph.optimize:project.autoland",
80 "target_tasks_method": "autoland_tasks",
81 "test_manifest_loader": "bugbug", # Remove this line to disable "manifest scheduling".
83 "mozilla-central": {
84 "target_tasks_method": "mozilla_central_tasks",
85 "release_type": "nightly",
87 "mozilla-beta": {
88 "target_tasks_method": "mozilla_beta_tasks",
89 "release_type": "beta",
91 "mozilla-release": {
92 "target_tasks_method": "mozilla_release_tasks",
93 "release_type": "release",
95 "mozilla-esr115": {
96 "target_tasks_method": "mozilla_esr115_tasks",
97 "release_type": "esr115",
99 "pine": {
100 "target_tasks_method": "pine_tasks",
101 "release_type": "nightly-pine",
103 "larch": {
104 "target_tasks_method": "larch_tasks",
105 "release_type": "nightly-larch",
107 "kaios": {
108 "target_tasks_method": "kaios_tasks",
110 "toolchains": {
111 "target_tasks_method": "mozilla_central_tasks",
113 # the default parameters are used for projects that do not match above.
114 "default": {
115 "target_tasks_method": "default",
120 def full_task_graph_to_runnable_jobs(full_task_json):
121 runnable_jobs = {}
122 for label, node in full_task_json.items():
123 if not ("extra" in node["task"] and "treeherder" in node["task"]["extra"]):
124 continue
126 th = node["task"]["extra"]["treeherder"]
127 runnable_jobs[label] = {"symbol": th["symbol"]}
129 for i in ("groupName", "groupSymbol", "collection"):
130 if i in th:
131 runnable_jobs[label][i] = th[i]
132 if th.get("machine", {}).get("platform"):
133 runnable_jobs[label]["platform"] = th["machine"]["platform"]
134 return runnable_jobs
137 def full_task_graph_to_manifests_by_task(full_task_json):
138 manifests_by_task = defaultdict(list)
139 for label, node in full_task_json.items():
140 manifests = node["attributes"].get("test_manifests")
141 if not manifests:
142 continue
144 manifests_by_task[label].extend(manifests)
145 return manifests_by_task
148 def try_syntax_from_message(message):
150 Parse the try syntax out of a commit message, returning '' if none is
151 found.
153 try_idx = message.find("try:")
154 if try_idx == -1:
155 return ""
156 return message[try_idx:].split("\n", 1)[0]
159 def taskgraph_decision(options, parameters=None):
161 Run the decision task. This function implements `mach taskgraph decision`,
162 and is responsible for
164 * processing decision task command-line options into parameters
165 * running task-graph generation exactly the same way the other `mach
166 taskgraph` commands do
167 * generating a set of artifacts to memorialize the graph
168 * calling TaskCluster APIs to create the graph
171 parameters = parameters or (
172 lambda graph_config: get_decision_parameters(graph_config, options)
175 decision_task_id = os.environ["TASK_ID"]
177 # create a TaskGraphGenerator instance
178 tgg = TaskGraphGenerator(
179 root_dir=options.get("root"),
180 parameters=parameters,
181 decision_task_id=decision_task_id,
182 write_artifacts=True,
185 if not create.testing:
186 # set additional index paths for the decision task
187 set_decision_indexes(decision_task_id, tgg.parameters, tgg.graph_config)
189 # write out the parameters used to generate this graph
190 write_artifact("parameters.yml", dict(**tgg.parameters))
192 # write out the public/actions.json file
193 write_artifact(
194 "actions.json",
195 render_actions_json(tgg.parameters, tgg.graph_config, decision_task_id),
198 # write out the full graph for reference
199 full_task_json = tgg.full_task_graph.to_json()
200 write_artifact("full-task-graph.json", full_task_json)
202 # write out the public/runnable-jobs.json file
203 write_artifact(
204 "runnable-jobs.json", full_task_graph_to_runnable_jobs(full_task_json)
207 # write out the public/manifests-by-task.json file
208 write_artifact(
209 "manifests-by-task.json.gz",
210 full_task_graph_to_manifests_by_task(full_task_json),
213 # write out the public/tests-by-manifest.json file
214 write_artifact("tests-by-manifest.json.gz", resolver.tests_by_manifest)
216 # this is just a test to check whether the from_json() function is working
217 _, _ = TaskGraph.from_json(full_task_json)
219 # write out the target task set to allow reproducing this as input
220 write_artifact("target-tasks.json", list(tgg.target_task_set.tasks.keys()))
222 # write out the optimized task graph to describe what will actually happen,
223 # and the map of labels to taskids
224 write_artifact("task-graph.json", tgg.morphed_task_graph.to_json())
225 write_artifact("label-to-taskid.json", tgg.label_to_taskid)
227 # write bugbug scheduling information if it was invoked
228 if len(push_schedules) > 0:
229 write_artifact("bugbug-push-schedules.json", push_schedules.popitem()[1])
231 # cache run-task & misc/fetch-content
232 scripts_root_dir = os.path.join(GECKO, "taskcluster/scripts")
233 run_task_file_path = os.path.join(scripts_root_dir, "run-task")
234 fetch_content_file_path = os.path.join(scripts_root_dir, "misc/fetch-content")
235 shutil.copy2(run_task_file_path, ARTIFACTS_DIR)
236 shutil.copy2(fetch_content_file_path, ARTIFACTS_DIR)
238 # actually create the graph
239 create_tasks(
240 tgg.graph_config,
241 tgg.morphed_task_graph,
242 tgg.label_to_taskid,
243 tgg.parameters,
244 decision_task_id=decision_task_id,
248 def get_decision_parameters(graph_config, options):
250 Load parameters from the command-line options for 'taskgraph decision'.
251 This also applies per-project parameters, based on the given project.
254 product_dir = graph_config["product-dir"]
256 parameters = {
257 n: options[n]
258 for n in [
259 "base_repository",
260 "base_ref",
261 "base_rev",
262 "head_repository",
263 "head_rev",
264 "head_ref",
265 "head_tag",
266 "project",
267 "pushlog_id",
268 "pushdate",
269 "owner",
270 "level",
271 "repository_type",
272 "target_tasks_method",
273 "tasks_for",
275 if n in options
278 commit_message = get_hg_commit_message(os.path.join(GECKO, product_dir))
280 repo_path = os.getcwd()
281 repo = get_repository(repo_path)
282 parameters["base_ref"] = _determine_more_accurate_base_ref(
283 repo,
284 candidate_base_ref=options.get("base_ref"),
285 head_ref=options.get("head_ref"),
286 base_rev=options.get("base_rev"),
289 parameters["base_rev"] = _determine_more_accurate_base_rev(
290 repo,
291 base_ref=parameters["base_ref"],
292 candidate_base_rev=options.get("base_rev"),
293 head_rev=options.get("head_rev"),
294 env_prefix=_get_env_prefix(graph_config),
297 # Define default filter list, as most configurations shouldn't need
298 # custom filters.
299 parameters["filters"] = [
300 "target_tasks_method",
302 parameters["enable_always_target"] = ["docker-image"]
303 parameters["existing_tasks"] = {}
304 parameters["do_not_optimize"] = []
305 parameters["build_number"] = 1
306 parameters["version"] = get_version(product_dir)
307 parameters["app_version"] = get_app_version(product_dir)
308 parameters["message"] = try_syntax_from_message(commit_message)
309 parameters["hg_branch"] = get_hg_revision_branch(
310 GECKO, revision=parameters["head_rev"]
312 parameters["files_changed"] = sorted(
313 get_changed_files(parameters["head_repository"], parameters["head_rev"])
315 parameters["next_version"] = None
316 parameters["optimize_strategies"] = None
317 parameters["optimize_target_tasks"] = True
318 parameters["phabricator_diff"] = None
319 parameters["release_type"] = ""
320 parameters["release_eta"] = ""
321 parameters["release_enable_partner_repack"] = False
322 parameters["release_enable_partner_attribution"] = False
323 parameters["release_partners"] = []
324 parameters["release_partner_config"] = {}
325 parameters["release_partner_build_number"] = 1
326 parameters["release_enable_emefree"] = False
327 parameters["release_product"] = None
328 parameters["required_signoffs"] = []
329 parameters["signoff_urls"] = {}
330 parameters["test_manifest_loader"] = "default"
331 parameters["try_mode"] = None
332 parameters["try_task_config"] = {}
333 parameters["try_options"] = None
335 # owner must be an email, but sometimes (e.g., for ffxbld) it is not, in which
336 # case, fake it
337 if "@" not in parameters["owner"]:
338 parameters["owner"] += "@noreply.mozilla.org"
340 # use the pushdate as build_date if given, else use current time
341 parameters["build_date"] = parameters["pushdate"] or int(time.time())
342 # moz_build_date is the build identifier based on build_date
343 parameters["moz_build_date"] = time.strftime(
344 "%Y%m%d%H%M%S", time.gmtime(parameters["build_date"])
347 project = parameters["project"]
348 try:
349 parameters.update(PER_PROJECT_PARAMETERS[project])
350 except KeyError:
351 logger.warning(
352 "using default project parameters; add {} to "
353 "PER_PROJECT_PARAMETERS in {} to customize behavior "
354 "for this project".format(project, __file__)
356 parameters.update(PER_PROJECT_PARAMETERS["default"])
358 # `target_tasks_method` has higher precedence than `project` parameters
359 if options.get("target_tasks_method"):
360 parameters["target_tasks_method"] = options["target_tasks_method"]
362 # ..but can be overridden by the commit message: if it contains the special
363 # string "DONTBUILD" and this is an on-push decision task, then use the
364 # special 'nothing' target task method.
365 if "DONTBUILD" in commit_message and options["tasks_for"] == "hg-push":
366 parameters["target_tasks_method"] = "nothing"
368 if options.get("include_push_tasks"):
369 get_existing_tasks(options.get("rebuild_kinds", []), parameters, graph_config)
371 # If the target method is nightly, we should build partials. This means
372 # knowing what has been released previously.
373 # An empty release_history is fine, it just means no partials will be built
374 parameters.setdefault("release_history", dict())
375 if "nightly" in parameters.get("target_tasks_method", ""):
376 parameters["release_history"] = populate_release_history("Firefox", project)
378 if options.get("try_task_config_file"):
379 task_config_file = os.path.abspath(options.get("try_task_config_file"))
380 else:
381 # if try_task_config.json is present, load it
382 task_config_file = os.path.join(os.getcwd(), "try_task_config.json")
384 # load try settings
385 if "try" in project and options["tasks_for"] == "hg-push":
386 set_try_config(parameters, task_config_file)
388 if options.get("optimize_target_tasks") is not None:
389 parameters["optimize_target_tasks"] = options["optimize_target_tasks"]
391 # Determine if this should be a backstop push.
392 parameters["backstop"] = is_backstop(parameters)
394 if "decision-parameters" in graph_config["taskgraph"]:
395 find_object(graph_config["taskgraph"]["decision-parameters"])(
396 graph_config, parameters
399 result = Parameters(**parameters)
400 result.check()
401 return result
404 def get_existing_tasks(rebuild_kinds, parameters, graph_config):
406 Find the decision task corresponding to the on-push graph, and return
407 a mapping of labels to task-ids from it. This will skip the kinds specificed
408 by `rebuild_kinds`.
410 try:
411 decision_task = retry(
412 find_decision_task,
413 args=(parameters, graph_config),
414 attempts=4,
415 sleeptime=5 * 60,
417 except Exception:
418 logger.exception("Didn't find existing push task.")
419 sys.exit(1)
420 _, task_graph = TaskGraph.from_json(
421 get_artifact(decision_task, "public/full-task-graph.json")
423 parameters["existing_tasks"] = find_existing_tasks_from_previous_kinds(
424 task_graph, [decision_task], rebuild_kinds
428 def set_try_config(parameters, task_config_file):
429 if os.path.isfile(task_config_file):
430 logger.info(f"using try tasks from {task_config_file}")
431 with open(task_config_file) as fh:
432 task_config = json.load(fh)
433 task_config_version = task_config.pop("version", 1)
434 if task_config_version == 1:
435 parameters["try_mode"] = "try_task_config"
436 parameters["try_task_config"] = task_config
437 elif task_config_version == 2:
438 parameters.update(task_config["parameters"])
439 parameters["try_mode"] = "try_task_config"
440 else:
441 raise Exception(
442 f"Unknown `try_task_config.json` version: {task_config_version}"
445 if "try:" in parameters["message"]:
446 parameters["try_mode"] = "try_option_syntax"
447 parameters.update(parse_message(parameters["message"]))
448 else:
449 parameters["try_options"] = None
452 def set_decision_indexes(decision_task_id, params, graph_config):
453 index_paths = []
454 if params["backstop"]:
455 # When two Decision tasks run at nearly the same time, it's possible
456 # they both end up being backstops if the second checks the backstop
457 # index before the first inserts it. Insert this index first to reduce
458 # the chances of that happening.
459 index_paths.insert(0, BACKSTOP_INDEX)
461 subs = params.copy()
462 subs["trust-domain"] = graph_config["trust-domain"]
464 for index_path in index_paths:
465 insert_index(index_path.format(**subs), decision_task_id, use_proxy=True)
468 def write_artifact(filename, data):
469 logger.info(f"writing artifact file `{filename}`")
470 if not os.path.isdir(ARTIFACTS_DIR):
471 os.mkdir(ARTIFACTS_DIR)
472 path = os.path.join(ARTIFACTS_DIR, filename)
473 if filename.endswith(".yml"):
474 with open(path, "w") as f:
475 yaml.safe_dump(data, f, allow_unicode=True, default_flow_style=False)
476 elif filename.endswith(".json"):
477 with open(path, "w") as f:
478 json.dump(data, f, sort_keys=True, indent=2, separators=(",", ": "))
479 elif filename.endswith(".json.gz"):
480 import gzip
482 with gzip.open(path, "wb") as f:
483 f.write(json.dumps(data).encode("utf-8"))
484 else:
485 raise TypeError(f"Don't know how to write to {filename}")
488 def read_artifact(filename):
489 path = os.path.join(ARTIFACTS_DIR, filename)
490 if filename.endswith(".yml"):
491 return load_yaml(path, filename)
492 if filename.endswith(".json"):
493 with open(path) as f:
494 return json.load(f)
495 if filename.endswith(".json.gz"):
496 import gzip
498 with gzip.open(path, "rb") as f:
499 return json.load(f.decode("utf-8"))
500 else:
501 raise TypeError(f"Don't know how to read {filename}")
504 def rename_artifact(src, dest):
505 os.rename(os.path.join(ARTIFACTS_DIR, src), os.path.join(ARTIFACTS_DIR, dest))