no bug - Bumping Firefox l10n changesets r=release a=l10n-bump DONTBUILD CLOSED TREE
[gecko.git] / taskcluster / gecko_taskgraph / main.py
blobe9a353f246b32441a689c3eb3a3b717a99de955b
1 # This Source Code Form is subject to the terms of the Mozilla Public
2 # License, v. 2.0. If a copy of the MPL was not distributed with this
3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 import argparse
6 import atexit
7 import json
8 import logging
9 import os
10 import re
11 import shutil
12 import subprocess
13 import sys
14 import tempfile
15 import traceback
16 from collections import namedtuple
17 from concurrent.futures import ProcessPoolExecutor, as_completed
18 from pathlib import Path
19 from typing import Any, List
21 import appdirs
22 import yaml
24 Command = namedtuple("Command", ["func", "args", "kwargs", "defaults"])
25 commands = {}
28 def command(*args, **kwargs):
29 defaults = kwargs.pop("defaults", {})
31 def decorator(func):
32 commands[args[0]] = Command(func, args, kwargs, defaults)
33 return func
35 return decorator
38 def argument(*args, **kwargs):
39 def decorator(func):
40 if not hasattr(func, "args"):
41 func.args = []
42 func.args.append((args, kwargs))
43 return func
45 return decorator
48 def format_taskgraph_labels(taskgraph):
49 return "\n".join(
50 sorted(
51 taskgraph.tasks[index].label for index in taskgraph.graph.visit_postorder()
56 def format_taskgraph_json(taskgraph):
57 return json.dumps(
58 taskgraph.to_json(), sort_keys=True, indent=2, separators=(",", ": ")
62 def format_taskgraph_yaml(taskgraph):
63 from mozbuild.util import ReadOnlyDict
65 class TGDumper(yaml.SafeDumper):
66 def ignore_aliases(self, data):
67 return True
69 def represent_ro_dict(self, data):
70 return self.represent_dict(dict(data))
72 TGDumper.add_representer(ReadOnlyDict, TGDumper.represent_ro_dict)
74 return yaml.dump(taskgraph.to_json(), Dumper=TGDumper, default_flow_style=False)
77 def get_filtered_taskgraph(taskgraph, tasksregex, exclude_keys):
78 """
79 Filter all the tasks on basis of a regular expression
80 and returns a new TaskGraph object
81 """
82 from taskgraph.graph import Graph
83 from taskgraph.task import Task
84 from taskgraph.taskgraph import TaskGraph
86 if tasksregex:
87 named_links_dict = taskgraph.graph.named_links_dict()
88 filteredtasks = {}
89 filterededges = set()
90 regexprogram = re.compile(tasksregex)
92 for key in taskgraph.graph.visit_postorder():
93 task = taskgraph.tasks[key]
94 if regexprogram.match(task.label):
95 filteredtasks[key] = task
96 for depname, dep in named_links_dict[key].items():
97 if regexprogram.match(dep):
98 filterededges.add((key, dep, depname))
100 taskgraph = TaskGraph(filteredtasks, Graph(set(filteredtasks), filterededges))
102 if exclude_keys:
103 for label, task in taskgraph.tasks.items():
104 task_dict = task.to_json()
105 for key in exclude_keys:
106 obj = task_dict
107 attrs = key.split(".")
108 while attrs[0] in obj:
109 if len(attrs) == 1:
110 del obj[attrs[0]]
111 break
112 obj = obj[attrs[0]]
113 attrs = attrs[1:]
114 taskgraph.tasks[label] = Task.from_json(task_dict)
116 return taskgraph
119 FORMAT_METHODS = {
120 "labels": format_taskgraph_labels,
121 "json": format_taskgraph_json,
122 "yaml": format_taskgraph_yaml,
126 def get_taskgraph_generator(root, parameters):
127 """Helper function to make testing a little easier."""
128 from taskgraph.generator import TaskGraphGenerator
130 return TaskGraphGenerator(root_dir=root, parameters=parameters)
133 def format_taskgraph(options, parameters, logfile=None):
134 import taskgraph
135 from taskgraph.parameters import parameters_loader
137 if logfile:
138 handler = logging.FileHandler(logfile, mode="w")
139 if logging.root.handlers:
140 oldhandler = logging.root.handlers[-1]
141 logging.root.removeHandler(oldhandler)
142 handler.setFormatter(oldhandler.formatter)
143 logging.root.addHandler(handler)
145 if options["fast"]:
146 taskgraph.fast = True
148 if isinstance(parameters, str):
149 parameters = parameters_loader(
150 parameters,
151 overrides={"target-kinds": options.get("target_kinds")},
152 strict=False,
155 tgg = get_taskgraph_generator(options.get("root"), parameters)
157 tg = getattr(tgg, options["graph_attr"])
158 tg = get_filtered_taskgraph(tg, options["tasks_regex"], options["exclude_keys"])
159 format_method = FORMAT_METHODS[options["format"] or "labels"]
160 return format_method(tg)
163 def dump_output(out, path=None, params_spec=None):
164 from taskgraph.parameters import Parameters
166 params_name = Parameters.format_spec(params_spec)
167 fh = None
168 if path:
169 # Substitute params name into file path if necessary
170 if params_spec and "{params}" not in path:
171 name, ext = os.path.splitext(path)
172 name += "_{params}"
173 path = name + ext
175 path = path.format(params=params_name)
176 fh = open(path, "w")
177 else:
178 print(
179 "Dumping result with parameters from {}:".format(params_name),
180 file=sys.stderr,
182 print(out + "\n", file=fh)
185 def generate_taskgraph(options, parameters, logdir):
186 from taskgraph.parameters import Parameters
188 def logfile(spec):
189 """Determine logfile given a parameters specification."""
190 if logdir is None:
191 return None
192 return os.path.join(
193 logdir,
194 "{}_{}.log".format(options["graph_attr"], Parameters.format_spec(spec)),
197 # Don't bother using futures if there's only one parameter. This can make
198 # tracebacks a little more readable and avoids additional process overhead.
199 if len(parameters) == 1:
200 spec = parameters[0]
201 out = format_taskgraph(options, spec, logfile(spec))
202 dump_output(out, options["output_file"])
203 return
205 futures = {}
206 with ProcessPoolExecutor(max_workers=options["max_workers"]) as executor:
207 for spec in parameters:
208 f = executor.submit(format_taskgraph, options, spec, logfile(spec))
209 futures[f] = spec
211 for future in as_completed(futures):
212 output_file = options["output_file"]
213 spec = futures[future]
214 e = future.exception()
215 if e:
216 out = "".join(traceback.format_exception(type(e), e, e.__traceback__))
217 if options["diff"]:
218 # Dump to console so we don't accidentally diff the tracebacks.
219 output_file = None
220 else:
221 out = future.result()
223 dump_output(
224 out,
225 path=output_file,
226 params_spec=spec if len(parameters) > 1 else None,
230 @command(
231 "tasks",
232 help="Show all tasks in the taskgraph.",
233 defaults={"graph_attr": "full_task_set"},
235 @command(
236 "full", help="Show the full taskgraph.", defaults={"graph_attr": "full_task_graph"}
238 @command(
239 "target",
240 help="Show the set of target tasks.",
241 defaults={"graph_attr": "target_task_set"},
243 @command(
244 "target-graph",
245 help="Show the target graph.",
246 defaults={"graph_attr": "target_task_graph"},
248 @command(
249 "optimized",
250 help="Show the optimized graph.",
251 defaults={"graph_attr": "optimized_task_graph"},
253 @command(
254 "morphed",
255 help="Show the morphed graph.",
256 defaults={"graph_attr": "morphed_task_graph"},
258 @argument("--root", "-r", help="root of the taskgraph definition relative to topsrcdir")
259 @argument("--quiet", "-q", action="store_true", help="suppress all logging output")
260 @argument(
261 "--verbose", "-v", action="store_true", help="include debug-level logging output"
263 @argument(
264 "--json",
265 "-J",
266 action="store_const",
267 dest="format",
268 const="json",
269 help="Output task graph as a JSON object",
271 @argument(
272 "--yaml",
273 "-Y",
274 action="store_const",
275 dest="format",
276 const="yaml",
277 help="Output task graph as a YAML object",
279 @argument(
280 "--labels",
281 "-L",
282 action="store_const",
283 dest="format",
284 const="labels",
285 help="Output the label for each task in the task graph (default)",
287 @argument(
288 "--parameters",
289 "-p",
290 default=None,
291 action="append",
292 help="Parameters to use for the generation. Can be a path to file (.yml or "
293 ".json; see `taskcluster/docs/parameters.rst`), a directory (containing "
294 "parameters files), a url, of the form `project=mozilla-central` to download "
295 "latest parameters file for the specified project from CI, or of the form "
296 "`task-id=<decision task id>` to download parameters from the specified "
297 "decision task. Can be specified multiple times, in which case multiple "
298 "generations will happen from the same invocation (one per parameters "
299 "specified).",
301 @argument(
302 "--no-optimize",
303 dest="optimize",
304 action="store_false",
305 default="true",
306 help="do not remove tasks from the graph that are found in the "
307 "index (a.k.a. optimize the graph)",
309 @argument(
310 "-o",
311 "--output-file",
312 default=None,
313 help="file path to store generated output.",
315 @argument(
316 "--tasks-regex",
317 "--tasks",
318 default=None,
319 help="only return tasks with labels matching this regular " "expression.",
321 @argument(
322 "--exclude-key",
323 default=None,
324 dest="exclude_keys",
325 action="append",
326 help="Exclude the specified key (using dot notation) from the final result. "
327 "This is mainly useful with '--diff' to filter out expected differences.",
329 @argument(
330 "-k",
331 "--target-kind",
332 dest="target_kinds",
333 action="append",
334 default=[],
335 help="only return tasks that are of the given kind, or their dependencies.",
337 @argument(
338 "-F",
339 "--fast",
340 default=False,
341 action="store_true",
342 help="enable fast task generation for local debugging.",
344 @argument(
345 "--diff",
346 const="default",
347 nargs="?",
348 default=None,
349 help="Generate and diff the current taskgraph against another revision. "
350 "Without args the base revision will be used. A revision specifier such as "
351 "the hash or `.~1` (hg) or `HEAD~1` (git) can be used as well.",
353 @argument(
354 "-j",
355 "--max-workers",
356 dest="max_workers",
357 default=None,
358 type=int,
359 help="The maximum number of workers to use for parallel operations such as"
360 "when multiple parameters files are passed.",
362 def show_taskgraph(options):
363 from mozversioncontrol import get_repository_object as get_repository
364 from taskgraph.parameters import Parameters, parameters_loader
366 if options.pop("verbose", False):
367 logging.root.setLevel(logging.DEBUG)
369 repo = None
370 cur_ref = None
371 diffdir = None
372 output_file = options["output_file"]
374 if options["diff"]:
375 # --root argument is taskgraph's config at <repo>/taskcluster/ci
376 repo_root = os.getcwd()
377 if options["root"]:
378 repo_root = f"{options['root']}/../.."
379 repo = get_repository(repo_root)
381 if not repo.working_directory_clean():
382 print(
383 "abort: can't diff taskgraph with dirty working directory",
384 file=sys.stderr,
386 return 1
388 # We want to return the working directory to the current state
389 # as best we can after we're done. In all known cases, using
390 # branch or bookmark (which are both available on the VCS object)
391 # as `branch` is preferable to a specific revision.
392 cur_ref = repo.branch or repo.head_ref[:12]
394 diffdir = tempfile.mkdtemp()
395 atexit.register(
396 shutil.rmtree, diffdir
397 ) # make sure the directory gets cleaned up
398 options["output_file"] = os.path.join(
399 diffdir, f"{options['graph_attr']}_{cur_ref}"
401 print(f"Generating {options['graph_attr']} @ {cur_ref}", file=sys.stderr)
403 parameters: List[Any[str, Parameters]] = options.pop("parameters")
404 if not parameters:
405 overrides = {
406 "target-kinds": options.get("target_kinds"),
408 parameters = [
409 parameters_loader(None, strict=False, overrides=overrides)
410 ] # will use default values
412 for param in parameters[:]:
413 if isinstance(param, str) and os.path.isdir(param):
414 parameters.remove(param)
415 parameters.extend(
417 p.as_posix()
418 for p in Path(param).iterdir()
419 if p.suffix in (".yml", ".json")
423 logdir = None
424 if len(parameters) > 1:
425 # Log to separate files for each process instead of stderr to
426 # avoid interleaving.
427 basename = os.path.basename(os.getcwd())
428 logdir = os.path.join(appdirs.user_log_dir("taskgraph"), basename)
429 if not os.path.isdir(logdir):
430 os.makedirs(logdir)
431 else:
432 # Only setup logging if we have a single parameter spec. Otherwise
433 # logging will go to files. This is also used as a hook for Gecko
434 # to setup its `mach` based logging.
435 setup_logging()
437 generate_taskgraph(options, parameters, logdir)
439 if options["diff"]:
440 assert diffdir is not None
441 assert repo is not None
443 # Reload taskgraph modules to pick up changes and clear global state.
444 for mod in sys.modules.copy():
445 if mod != __name__ and mod.split(".", 1)[0].endswith(
446 ("taskgraph", "mozbuild")
448 del sys.modules[mod]
450 # Ensure gecko_taskgraph is ahead of taskcluster_taskgraph in sys.path.
451 # Without this, we may end up validating some things against the wrong
452 # schema.
453 import gecko_taskgraph # noqa
455 if options["diff"] == "default":
456 base_ref = repo.base_ref
457 else:
458 base_ref = options["diff"]
460 try:
461 repo.update(base_ref)
462 base_ref = repo.head_ref[:12]
463 options["output_file"] = os.path.join(
464 diffdir, f"{options['graph_attr']}_{base_ref}"
466 print(f"Generating {options['graph_attr']} @ {base_ref}", file=sys.stderr)
467 generate_taskgraph(options, parameters, logdir)
468 finally:
469 repo.update(cur_ref)
471 # Generate diff(s)
472 diffcmd = [
473 "diff",
474 "-U20",
475 "--report-identical-files",
476 f"--label={options['graph_attr']}@{base_ref}",
477 f"--label={options['graph_attr']}@{cur_ref}",
480 non_fatal_failures = []
481 for spec in parameters:
482 base_path = os.path.join(diffdir, f"{options['graph_attr']}_{base_ref}")
483 cur_path = os.path.join(diffdir, f"{options['graph_attr']}_{cur_ref}")
485 params_name = None
486 if len(parameters) > 1:
487 params_name = Parameters.format_spec(spec)
488 base_path += f"_{params_name}"
489 cur_path += f"_{params_name}"
491 # If the base or cur files are missing it means that generation
492 # failed. If one of them failed but not the other, the failure is
493 # likely due to the patch making changes to taskgraph in modules
494 # that don't get reloaded (safe to ignore). If both generations
495 # failed, there's likely a real issue.
496 base_missing = not os.path.isfile(base_path)
497 cur_missing = not os.path.isfile(cur_path)
498 if base_missing != cur_missing: # != is equivalent to XOR for booleans
499 non_fatal_failures.append(os.path.basename(base_path))
500 continue
502 try:
503 # If the output file(s) are missing, this command will raise
504 # CalledProcessError with a returncode > 1.
505 proc = subprocess.run(
506 diffcmd + [base_path, cur_path],
507 stdout=subprocess.PIPE,
508 stderr=subprocess.PIPE,
509 universal_newlines=True,
510 check=True,
512 diff_output = proc.stdout
513 returncode = 0
514 except subprocess.CalledProcessError as e:
515 # returncode 1 simply means diffs were found
516 if e.returncode != 1:
517 print(e.stderr, file=sys.stderr)
518 raise
519 diff_output = e.output
520 returncode = e.returncode
522 dump_output(
523 diff_output,
524 # Don't bother saving file if no diffs were found. Log to
525 # console in this case instead.
526 path=None if returncode == 0 else output_file,
527 params_spec=spec if len(parameters) > 1 else None,
530 if non_fatal_failures:
531 failstr = "\n ".join(sorted(non_fatal_failures))
532 print(
533 "WARNING: Diff skipped for the following generation{s} "
534 "due to failures:\n {failstr}".format(
535 s="s" if len(non_fatal_failures) > 1 else "", failstr=failstr
537 file=sys.stderr,
540 if options["format"] != "json":
541 print(
542 "If you were expecting differences in task bodies "
543 'you should pass "-J"\n',
544 file=sys.stderr,
547 if len(parameters) > 1:
548 print("See '{}' for logs".format(logdir), file=sys.stderr)
551 @command("build-image", help="Build a Docker image")
552 @argument("image_name", help="Name of the image to build")
553 @argument(
554 "-t", "--tag", help="tag that the image should be built as.", metavar="name:tag"
556 @argument(
557 "--context-only",
558 help="File name the context tarball should be written to."
559 "with this option it will only build the context.tar.",
560 metavar="context.tar",
562 def build_image(args):
563 from gecko_taskgraph.docker import build_context, build_image
565 if args["context_only"] is None:
566 build_image(args["image_name"], args["tag"], os.environ)
567 else:
568 build_context(args["image_name"], args["context_only"], os.environ)
571 @command(
572 "load-image",
573 help="Load a pre-built Docker image. Note that you need to "
574 "have docker installed and running for this to work.",
576 @argument(
577 "--task-id",
578 help="Load the image at public/image.tar.zst in this task, "
579 "rather than searching the index",
581 @argument(
582 "-t",
583 "--tag",
584 help="tag that the image should be loaded as. If not "
585 "image will be loaded with tag from the tarball",
586 metavar="name:tag",
588 @argument(
589 "image_name",
590 nargs="?",
591 help="Load the image of this name based on the current "
592 "contents of the tree (as built for mozilla-central "
593 "or mozilla-inbound)",
595 def load_image(args):
596 from gecko_taskgraph.docker import load_image_by_name, load_image_by_task_id
598 if not args.get("image_name") and not args.get("task_id"):
599 print("Specify either IMAGE-NAME or TASK-ID")
600 sys.exit(1)
601 try:
602 if args["task_id"]:
603 ok = load_image_by_task_id(args["task_id"], args.get("tag"))
604 else:
605 ok = load_image_by_name(args["image_name"], args.get("tag"))
606 if not ok:
607 sys.exit(1)
608 except Exception:
609 traceback.print_exc()
610 sys.exit(1)
613 @command("image-digest", help="Print the digest of a docker image.")
614 @argument(
615 "image_name",
616 help="Print the digest of the image of this name based on the current "
617 "contents of the tree.",
619 def image_digest(args):
620 from gecko_taskgraph.docker import get_image_digest
622 try:
623 digest = get_image_digest(args["image_name"])
624 print(digest)
625 except Exception:
626 traceback.print_exc()
627 sys.exit(1)
630 @command("decision", help="Run the decision task")
631 @argument("--root", "-r", help="root of the taskgraph definition relative to topsrcdir")
632 @argument(
633 "--message",
634 required=False,
635 help=argparse.SUPPRESS,
637 @argument(
638 "--project",
639 required=True,
640 help="Project to use for creating task graph. Example: --project=try",
642 @argument("--pushlog-id", dest="pushlog_id", required=True, default="0")
643 @argument("--pushdate", dest="pushdate", required=True, type=int, default=0)
644 @argument("--owner", required=True, help="email address of who owns this graph")
645 @argument("--level", required=True, help="SCM level of this repository")
646 @argument(
647 "--target-tasks-method", help="method for selecting the target tasks to generate"
649 @argument(
650 "--repository-type",
651 required=True,
652 help='Type of repository, either "hg" or "git"',
654 @argument("--base-repository", required=True, help='URL for "base" repository to clone')
655 @argument(
656 "--base-ref", default="", help='Reference of the revision in the "base" repository'
658 @argument(
659 "--base-rev",
660 default="",
661 help="Taskgraph decides what to do based on the revision range between "
662 "`--base-rev` and `--head-rev`. Value is determined automatically if not provided",
664 @argument(
665 "--head-repository",
666 required=True,
667 help='URL for "head" repository to fetch revision from',
669 @argument(
670 "--head-ref", required=True, help="Reference (this is same as rev usually for hg)"
672 @argument(
673 "--head-rev", required=True, help="Commit revision to use from head repository"
675 @argument("--head-tag", help="Tag attached to the revision", default="")
676 @argument(
677 "--tasks-for", required=True, help="the tasks_for value used to generate this task"
679 @argument("--try-task-config-file", help="path to try task configuration file")
680 def decision(options):
681 from gecko_taskgraph.decision import taskgraph_decision
683 taskgraph_decision(options)
686 @command("action-callback", description="Run action callback used by action tasks")
687 @argument(
688 "--root",
689 "-r",
690 default="taskcluster/ci",
691 help="root of the taskgraph definition relative to topsrcdir",
693 def action_callback(options):
694 from gecko_taskgraph.actions import trigger_action_callback
695 from gecko_taskgraph.actions.util import get_parameters
697 try:
698 # the target task for this action (or null if it's a group action)
699 task_id = json.loads(os.environ.get("ACTION_TASK_ID", "null"))
700 # the target task group for this action
701 task_group_id = os.environ.get("ACTION_TASK_GROUP_ID", None)
702 input = json.loads(os.environ.get("ACTION_INPUT", "null"))
703 callback = os.environ.get("ACTION_CALLBACK", None)
704 root = options["root"]
706 parameters = get_parameters(task_group_id)
708 return trigger_action_callback(
709 task_group_id=task_group_id,
710 task_id=task_id,
711 input=input,
712 callback=callback,
713 parameters=parameters,
714 root=root,
715 test=False,
717 except Exception:
718 traceback.print_exc()
719 sys.exit(1)
722 @command("test-action-callback", description="Run an action callback in a testing mode")
723 @argument(
724 "--root",
725 "-r",
726 default="taskcluster/ci",
727 help="root of the taskgraph definition relative to topsrcdir",
729 @argument(
730 "--parameters",
731 "-p",
732 default="",
733 help="parameters file (.yml or .json; see " "`taskcluster/docs/parameters.rst`)`",
735 @argument("--task-id", default=None, help="TaskId to which the action applies")
736 @argument(
737 "--task-group-id", default=None, help="TaskGroupId to which the action applies"
739 @argument("--input", default=None, help="Action input (.yml or .json)")
740 @argument("callback", default=None, help="Action callback name (Python function name)")
741 def test_action_callback(options):
742 import taskgraph.parameters
743 from taskgraph.config import load_graph_config
744 from taskgraph.util import yaml
746 import gecko_taskgraph.actions
748 def load_data(filename):
749 with open(filename) as f:
750 if filename.endswith(".yml"):
751 return yaml.load_stream(f)
752 if filename.endswith(".json"):
753 return json.load(f)
754 raise Exception(f"unknown filename {filename}")
756 try:
757 task_id = options["task_id"]
759 if options["input"]:
760 input = load_data(options["input"])
761 else:
762 input = None
764 root = options["root"]
765 graph_config = load_graph_config(root)
766 trust_domain = graph_config["trust-domain"]
767 graph_config.register()
769 parameters = taskgraph.parameters.load_parameters_file(
770 options["parameters"], strict=False, trust_domain=trust_domain
772 parameters.check()
774 return gecko_taskgraph.actions.trigger_action_callback(
775 task_group_id=options["task_group_id"],
776 task_id=task_id,
777 input=input,
778 callback=options["callback"],
779 parameters=parameters,
780 root=root,
781 test=True,
783 except Exception:
784 traceback.print_exc()
785 sys.exit(1)
788 def create_parser():
789 parser = argparse.ArgumentParser(description="Interact with taskgraph")
790 subparsers = parser.add_subparsers()
791 for _, (func, args, kwargs, defaults) in commands.items():
792 subparser = subparsers.add_parser(*args, **kwargs)
793 for arg in func.args:
794 subparser.add_argument(*arg[0], **arg[1])
795 subparser.set_defaults(command=func, **defaults)
796 return parser
799 def setup_logging():
800 logging.basicConfig(
801 format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
805 def main(args=sys.argv[1:]):
806 setup_logging()
807 parser = create_parser()
808 args = parser.parse_args(args)
809 try:
810 args.command(vars(args))
811 except Exception:
812 traceback.print_exc()
813 sys.exit(1)