Backed out changeset 2fc34d798e24 (bug 1917771) for causing failures at baseline...
[gecko.git] / taskcluster / gecko_taskgraph / main.py
blob99840ebd31440afe617febaec7ecec88a1ff831f
1 # This Source Code Form is subject to the terms of the Mozilla Public
2 # License, v. 2.0. If a copy of the MPL was not distributed with this
3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 import argparse
6 import atexit
7 import json
8 import logging
9 import os
10 import shutil
11 import subprocess
12 import sys
13 import tempfile
14 import traceback
15 from pathlib import Path
16 from typing import Any, List
18 import appdirs
19 import yaml
20 from taskgraph.main import (
21 FORMAT_METHODS,
22 argument,
23 command,
24 commands,
25 dump_output,
26 generate_taskgraph,
29 from gecko_taskgraph import GECKO
30 from gecko_taskgraph.files_changed import get_locally_changed_files
33 def format_taskgraph_yaml(taskgraph):
34 from taskgraph.util.readonlydict import ReadOnlyDict
36 class TGDumper(yaml.SafeDumper):
37 def ignore_aliases(self, data):
38 return True
40 def represent_ro_dict(self, data):
41 return self.represent_dict(dict(data))
43 TGDumper.add_representer(ReadOnlyDict, TGDumper.represent_ro_dict)
45 return yaml.dump(taskgraph.to_json(), Dumper=TGDumper, default_flow_style=False)
48 FORMAT_METHODS["yaml"] = format_taskgraph_yaml
51 @command(
52 "tasks",
53 help="Show all tasks in the taskgraph.",
54 defaults={"graph_attr": "full_task_set"},
56 @command(
57 "full", help="Show the full taskgraph.", defaults={"graph_attr": "full_task_graph"}
59 @command(
60 "target",
61 help="Show the set of target tasks.",
62 defaults={"graph_attr": "target_task_set"},
64 @command(
65 "target-graph",
66 help="Show the target graph.",
67 defaults={"graph_attr": "target_task_graph"},
69 @command(
70 "optimized",
71 help="Show the optimized graph.",
72 defaults={"graph_attr": "optimized_task_graph"},
74 @command(
75 "morphed",
76 help="Show the morphed graph.",
77 defaults={"graph_attr": "morphed_task_graph"},
79 @argument("--root", "-r", help="root of the taskgraph definition relative to topsrcdir")
80 @argument("--quiet", "-q", action="store_true", help="suppress all logging output")
81 @argument(
82 "--verbose", "-v", action="store_true", help="include debug-level logging output"
84 @argument(
85 "--json",
86 "-J",
87 action="store_const",
88 dest="format",
89 const="json",
90 help="Output task graph as a JSON object",
92 @argument(
93 "--yaml",
94 "-Y",
95 action="store_const",
96 dest="format",
97 const="yaml",
98 help="Output task graph as a YAML object",
100 @argument(
101 "--labels",
102 "-L",
103 action="store_const",
104 dest="format",
105 const="labels",
106 help="Output the label for each task in the task graph (default)",
108 @argument(
109 "--parameters",
110 "-p",
111 default=None,
112 action="append",
113 help="Parameters to use for the generation. Can be a path to file (.yml or "
114 ".json; see `taskcluster/docs/parameters.rst`), a directory (containing "
115 "parameters files), a url, of the form `project=mozilla-central` to download "
116 "latest parameters file for the specified project from CI, or of the form "
117 "`task-id=<decision task id>` to download parameters from the specified "
118 "decision task. Can be specified multiple times, in which case multiple "
119 "generations will happen from the same invocation (one per parameters "
120 "specified).",
122 @argument(
123 "--force-local-files-changed",
124 default=False,
125 action="store_true",
126 help="Compute the 'files-changed' parameter from local version control, "
127 "even when explicitly using a parameter set that already has it defined. "
128 "Note that this is already the default behaviour when no parameters are "
129 "specified.",
131 @argument(
132 "--no-optimize",
133 dest="optimize",
134 action="store_false",
135 default="true",
136 help="do not remove tasks from the graph that are found in the "
137 "index (a.k.a. optimize the graph)",
139 @argument(
140 "-o",
141 "--output-file",
142 default=None,
143 help="file path to store generated output.",
145 @argument(
146 "--tasks-regex",
147 "--tasks",
148 default=None,
149 help="only return tasks with labels matching this regular " "expression.",
151 @argument(
152 "--exclude-key",
153 default=None,
154 dest="exclude_keys",
155 action="append",
156 help="Exclude the specified key (using dot notation) from the final result. "
157 "This is mainly useful with '--diff' to filter out expected differences.",
159 @argument(
160 "-k",
161 "--target-kind",
162 dest="target_kinds",
163 action="append",
164 default=[],
165 help="only return tasks that are of the given kind, or their dependencies.",
167 @argument(
168 "-F",
169 "--fast",
170 default=False,
171 action="store_true",
172 help="enable fast task generation for local debugging.",
174 @argument(
175 "--diff",
176 const="default",
177 nargs="?",
178 default=None,
179 help="Generate and diff the current taskgraph against another revision. "
180 "Without args the base revision will be used. A revision specifier such as "
181 "the hash or `.~1` (hg) or `HEAD~1` (git) can be used as well.",
183 @argument(
184 "-j",
185 "--max-workers",
186 dest="max_workers",
187 default=None,
188 type=int,
189 help="The maximum number of workers to use for parallel operations such as"
190 "when multiple parameters files are passed.",
192 def show_taskgraph(options):
193 from mozversioncontrol import get_repository_object as get_repository
194 from taskgraph.parameters import Parameters, parameters_loader
196 if options.pop("verbose", False):
197 logging.root.setLevel(logging.DEBUG)
199 repo = None
200 cur_ref = None
201 diffdir = None
202 output_file = options["output_file"]
204 if options["diff"]:
205 # --root argument is taskgraph's config at <repo>/taskcluster
206 repo_root = os.getcwd()
207 if options["root"]:
208 repo_root = f"{options['root']}/.."
209 repo = get_repository(repo_root)
211 if not repo.working_directory_clean():
212 print(
213 "abort: can't diff taskgraph with dirty working directory",
214 file=sys.stderr,
216 return 1
218 # We want to return the working directory to the current state
219 # as best we can after we're done. In all known cases, using
220 # branch or bookmark (which are both available on the VCS object)
221 # as `branch` is preferable to a specific revision.
222 cur_ref = repo.branch or repo.head_ref[:12]
224 diffdir = tempfile.mkdtemp()
225 atexit.register(
226 shutil.rmtree, diffdir
227 ) # make sure the directory gets cleaned up
228 options["output_file"] = os.path.join(
229 diffdir, f"{options['graph_attr']}_{cur_ref}"
231 print(f"Generating {options['graph_attr']} @ {cur_ref}", file=sys.stderr)
233 overrides = {
234 "target-kinds": options.get("target_kinds"),
236 parameters: List[Any[str, Parameters]] = options.pop("parameters")
237 if not parameters:
238 parameters = [
239 parameters_loader(None, strict=False, overrides=overrides)
240 ] # will use default values
242 # This is the default behaviour anyway, so no need to re-compute.
243 options["force_local_files_changed"] = False
245 elif options["force_local_files_changed"]:
246 overrides["files-changed"] = sorted(get_locally_changed_files(GECKO))
248 for param in parameters[:]:
249 if isinstance(param, str) and os.path.isdir(param):
250 parameters.remove(param)
251 parameters.extend(
253 p.as_posix()
254 for p in Path(param).iterdir()
255 if p.suffix in (".yml", ".json")
259 logdir = None
260 if len(parameters) > 1:
261 # Log to separate files for each process instead of stderr to
262 # avoid interleaving.
263 basename = os.path.basename(os.getcwd())
264 logdir = os.path.join(appdirs.user_log_dir("taskgraph"), basename)
265 if not os.path.isdir(logdir):
266 os.makedirs(logdir)
267 else:
268 # Only setup logging if we have a single parameter spec. Otherwise
269 # logging will go to files. This is also used as a hook for Gecko
270 # to setup its `mach` based logging.
271 setup_logging()
273 generate_taskgraph(options, parameters, overrides, logdir)
275 if options["diff"]:
276 assert diffdir is not None
277 assert repo is not None
279 # Reload taskgraph modules to pick up changes and clear global state.
280 for mod in sys.modules.copy():
281 if (
282 mod != __name__
283 and mod != "taskgraph.main"
284 and mod.split(".", 1)[0].endswith(("taskgraph", "mozbuild"))
286 del sys.modules[mod]
288 # Ensure gecko_taskgraph is ahead of taskcluster_taskgraph in sys.path.
289 # Without this, we may end up validating some things against the wrong
290 # schema.
291 import gecko_taskgraph # noqa
293 if options["diff"] == "default":
294 base_ref = repo.base_ref
295 else:
296 base_ref = options["diff"]
298 try:
299 repo.update(base_ref)
300 base_ref = repo.head_ref[:12]
301 options["output_file"] = os.path.join(
302 diffdir, f"{options['graph_attr']}_{base_ref}"
304 print(f"Generating {options['graph_attr']} @ {base_ref}", file=sys.stderr)
305 generate_taskgraph(options, parameters, overrides, logdir)
306 finally:
307 repo.update(cur_ref)
309 # Generate diff(s)
310 diffcmd = [
311 "diff",
312 "-U20",
313 "--report-identical-files",
314 f"--label={options['graph_attr']}@{base_ref}",
315 f"--label={options['graph_attr']}@{cur_ref}",
318 non_fatal_failures = []
319 for spec in parameters:
320 base_path = os.path.join(diffdir, f"{options['graph_attr']}_{base_ref}")
321 cur_path = os.path.join(diffdir, f"{options['graph_attr']}_{cur_ref}")
323 params_name = None
324 if len(parameters) > 1:
325 params_name = Parameters.format_spec(spec)
326 base_path += f"_{params_name}"
327 cur_path += f"_{params_name}"
329 # If the base or cur files are missing it means that generation
330 # failed. If one of them failed but not the other, the failure is
331 # likely due to the patch making changes to taskgraph in modules
332 # that don't get reloaded (safe to ignore). If both generations
333 # failed, there's likely a real issue.
334 base_missing = not os.path.isfile(base_path)
335 cur_missing = not os.path.isfile(cur_path)
336 if base_missing != cur_missing: # != is equivalent to XOR for booleans
337 non_fatal_failures.append(os.path.basename(base_path))
338 continue
340 try:
341 # If the output file(s) are missing, this command will raise
342 # CalledProcessError with a returncode > 1.
343 proc = subprocess.run(
344 diffcmd + [base_path, cur_path],
345 stdout=subprocess.PIPE,
346 stderr=subprocess.PIPE,
347 universal_newlines=True,
348 check=True,
350 diff_output = proc.stdout
351 returncode = 0
352 except subprocess.CalledProcessError as e:
353 # returncode 1 simply means diffs were found
354 if e.returncode != 1:
355 print(e.stderr, file=sys.stderr)
356 raise
357 diff_output = e.output
358 returncode = e.returncode
360 dump_output(
361 diff_output,
362 # Don't bother saving file if no diffs were found. Log to
363 # console in this case instead.
364 path=None if returncode == 0 else output_file,
365 params_spec=spec if len(parameters) > 1 else None,
368 if non_fatal_failures:
369 failstr = "\n ".join(sorted(non_fatal_failures))
370 print(
371 "WARNING: Diff skipped for the following generation{s} "
372 "due to failures:\n {failstr}".format(
373 s="s" if len(non_fatal_failures) > 1 else "", failstr=failstr
375 file=sys.stderr,
378 if options["format"] != "json":
379 print(
380 "If you were expecting differences in task bodies "
381 'you should pass "-J"\n',
382 file=sys.stderr,
385 if len(parameters) > 1:
386 print("See '{}' for logs".format(logdir), file=sys.stderr)
389 @command("build-image", help="Build a Docker image")
390 @argument("image_name", help="Name of the image to build")
391 @argument(
392 "-t", "--tag", help="tag that the image should be built as.", metavar="name:tag"
394 @argument(
395 "--context-only",
396 help="File name the context tarball should be written to."
397 "with this option it will only build the context.tar.",
398 metavar="context.tar",
400 def build_image(args):
401 from gecko_taskgraph.docker import build_context, build_image
403 if args["context_only"] is None:
404 build_image(args["image_name"], args["tag"], os.environ)
405 else:
406 build_context(args["image_name"], args["context_only"], os.environ)
409 @command(
410 "load-image",
411 help="Load a pre-built Docker image. Note that you need to "
412 "have docker installed and running for this to work.",
414 @argument(
415 "--task-id",
416 help="Load the image at public/image.tar.zst in this task, "
417 "rather than searching the index",
419 @argument(
420 "-t",
421 "--tag",
422 help="tag that the image should be loaded as. If not "
423 "image will be loaded with tag from the tarball",
424 metavar="name:tag",
426 @argument(
427 "image_name",
428 nargs="?",
429 help="Load the image of this name based on the current "
430 "contents of the tree (as built for mozilla-central "
431 "or mozilla-inbound)",
433 def load_image(args):
434 from gecko_taskgraph.docker import load_image_by_name, load_image_by_task_id
436 if not args.get("image_name") and not args.get("task_id"):
437 print("Specify either IMAGE-NAME or TASK-ID")
438 sys.exit(1)
439 try:
440 if args["task_id"]:
441 ok = load_image_by_task_id(args["task_id"], args.get("tag"))
442 else:
443 ok = load_image_by_name(args["image_name"], args.get("tag"))
444 if not ok:
445 sys.exit(1)
446 except Exception:
447 traceback.print_exc()
448 sys.exit(1)
451 @command("image-digest", help="Print the digest of a docker image.")
452 @argument(
453 "image_name",
454 help="Print the digest of the image of this name based on the current "
455 "contents of the tree.",
457 def image_digest(args):
458 from gecko_taskgraph.docker import get_image_digest
460 try:
461 digest = get_image_digest(args["image_name"])
462 print(digest)
463 except Exception:
464 traceback.print_exc()
465 sys.exit(1)
468 @command("decision", help="Run the decision task")
469 @argument("--root", "-r", help="root of the taskgraph definition relative to topsrcdir")
470 @argument(
471 "--message",
472 required=False,
473 help=argparse.SUPPRESS,
475 @argument(
476 "--project",
477 required=True,
478 help="Project to use for creating task graph. Example: --project=try",
480 @argument("--pushlog-id", dest="pushlog_id", required=True, default="0")
481 @argument("--pushdate", dest="pushdate", required=True, type=int, default=0)
482 @argument("--owner", required=True, help="email address of who owns this graph")
483 @argument("--level", required=True, help="SCM level of this repository")
484 @argument(
485 "--target-tasks-method", help="method for selecting the target tasks to generate"
487 @argument(
488 "--repository-type",
489 required=True,
490 help='Type of repository, either "hg" or "git"',
492 @argument("--base-repository", required=True, help='URL for "base" repository to clone')
493 @argument(
494 "--base-ref", default="", help='Reference of the revision in the "base" repository'
496 @argument(
497 "--base-rev",
498 default="",
499 help="Taskgraph decides what to do based on the revision range between "
500 "`--base-rev` and `--head-rev`. Value is determined automatically if not provided",
502 @argument(
503 "--head-repository",
504 required=True,
505 help='URL for "head" repository to fetch revision from',
507 @argument(
508 "--head-ref", required=True, help="Reference (this is same as rev usually for hg)"
510 @argument(
511 "--head-rev", required=True, help="Commit revision to use from head repository"
513 @argument("--head-tag", help="Tag attached to the revision", default="")
514 @argument(
515 "--tasks-for", required=True, help="the tasks_for value used to generate this task"
517 @argument("--try-task-config-file", help="path to try task configuration file")
518 def decision(options):
519 from gecko_taskgraph.decision import taskgraph_decision
521 taskgraph_decision(options)
524 @command("action-callback", description="Run action callback used by action tasks")
525 @argument(
526 "--root",
527 "-r",
528 default="taskcluster",
529 help="root of the taskgraph definition relative to topsrcdir",
531 def action_callback(options):
532 from gecko_taskgraph.actions import trigger_action_callback
533 from gecko_taskgraph.actions.util import get_parameters
535 try:
536 # the target task for this action (or null if it's a group action)
537 task_id = json.loads(os.environ.get("ACTION_TASK_ID", "null"))
538 # the target task group for this action
539 task_group_id = os.environ.get("ACTION_TASK_GROUP_ID", None)
540 input = json.loads(os.environ.get("ACTION_INPUT", "null"))
541 callback = os.environ.get("ACTION_CALLBACK", None)
542 root = options["root"]
544 parameters = get_parameters(task_group_id)
546 return trigger_action_callback(
547 task_group_id=task_group_id,
548 task_id=task_id,
549 input=input,
550 callback=callback,
551 parameters=parameters,
552 root=root,
553 test=False,
555 except Exception:
556 traceback.print_exc()
557 sys.exit(1)
560 @command("test-action-callback", description="Run an action callback in a testing mode")
561 @argument(
562 "--root",
563 "-r",
564 default="taskcluster",
565 help="root of the taskgraph definition relative to topsrcdir",
567 @argument(
568 "--parameters",
569 "-p",
570 default="",
571 help="parameters file (.yml or .json; see " "`taskcluster/docs/parameters.rst`)`",
573 @argument("--task-id", default=None, help="TaskId to which the action applies")
574 @argument(
575 "--task-group-id", default=None, help="TaskGroupId to which the action applies"
577 @argument("--input", default=None, help="Action input (.yml or .json)")
578 @argument("callback", default=None, help="Action callback name (Python function name)")
579 def test_action_callback(options):
580 import taskgraph.parameters
581 from taskgraph.config import load_graph_config
582 from taskgraph.util import yaml
584 import gecko_taskgraph.actions
586 def load_data(filename):
587 with open(filename) as f:
588 if filename.endswith(".yml"):
589 return yaml.load_stream(f)
590 if filename.endswith(".json"):
591 return json.load(f)
592 raise Exception(f"unknown filename {filename}")
594 try:
595 task_id = options["task_id"]
597 if options["input"]:
598 input = load_data(options["input"])
599 else:
600 input = None
602 root = options["root"]
603 graph_config = load_graph_config(root)
604 trust_domain = graph_config["trust-domain"]
605 graph_config.register()
607 parameters = taskgraph.parameters.load_parameters_file(
608 options["parameters"], strict=False, trust_domain=trust_domain
610 parameters.check()
612 return gecko_taskgraph.actions.trigger_action_callback(
613 task_group_id=options["task_group_id"],
614 task_id=task_id,
615 input=input,
616 callback=options["callback"],
617 parameters=parameters,
618 root=root,
619 test=True,
621 except Exception:
622 traceback.print_exc()
623 sys.exit(1)
626 def create_parser():
627 parser = argparse.ArgumentParser(description="Interact with taskgraph")
628 subparsers = parser.add_subparsers()
629 for _, (func, args, kwargs, defaults) in commands.items():
630 subparser = subparsers.add_parser(*args, **kwargs)
631 for arg in func.args:
632 subparser.add_argument(*arg[0], **arg[1])
633 subparser.set_defaults(command=func, **defaults)
634 return parser
637 def setup_logging():
638 logging.basicConfig(
639 format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
643 def main(args=sys.argv[1:]):
644 setup_logging()
645 parser = create_parser()
646 args = parser.parse_args(args)
647 try:
648 args.command(vars(args))
649 except Exception:
650 traceback.print_exc()
651 sys.exit(1)