Bug 1755481: correct documentation of `nsIClipboard::getData`. r=mccr8
[gecko.git] / taskcluster / gecko_taskgraph / main.py
blobb51941c8267d32711faff1a2cd30d5b855b4a18b
1 # This Source Code Form is subject to the terms of the Mozilla Public
2 # License, v. 2.0. If a copy of the MPL was not distributed with this
3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 import atexit
6 import os
7 import re
8 import shutil
9 import subprocess
10 import sys
11 import tempfile
12 import traceback
13 import argparse
14 import logging
15 import json
16 from collections import namedtuple
17 from concurrent.futures import ProcessPoolExecutor, as_completed
18 from pathlib import Path
19 from typing import Any, List
21 import appdirs
22 import yaml
24 Command = namedtuple("Command", ["func", "args", "kwargs", "defaults"])
25 commands = {}
28 def command(*args, **kwargs):
29 defaults = kwargs.pop("defaults", {})
31 def decorator(func):
32 commands[args[0]] = Command(func, args, kwargs, defaults)
33 return func
35 return decorator
38 def argument(*args, **kwargs):
39 def decorator(func):
40 if not hasattr(func, "args"):
41 func.args = []
42 func.args.append((args, kwargs))
43 return func
45 return decorator
48 def format_taskgraph_labels(taskgraph):
49 return "\n".join(
50 taskgraph.tasks[index].label for index in taskgraph.graph.visit_postorder()
54 def format_taskgraph_json(taskgraph):
55 return json.dumps(
56 taskgraph.to_json(), sort_keys=True, indent=2, separators=(",", ": ")
60 def format_taskgraph_yaml(taskgraph):
61 return yaml.safe_dump(taskgraph.to_json(), default_flow_style=False)
64 def get_filtered_taskgraph(taskgraph, tasksregex):
65 """
66 Filter all the tasks on basis of a regular expression
67 and returns a new TaskGraph object
68 """
69 from gecko_taskgraph.graph import Graph
70 from gecko_taskgraph.taskgraph import TaskGraph
72 # return original taskgraph if no regular expression is passed
73 if not tasksregex:
74 return taskgraph
75 named_links_dict = taskgraph.graph.named_links_dict()
76 filteredtasks = {}
77 filterededges = set()
78 regexprogram = re.compile(tasksregex)
80 for key in taskgraph.graph.visit_postorder():
81 task = taskgraph.tasks[key]
82 if regexprogram.match(task.label):
83 filteredtasks[key] = task
84 for depname, dep in named_links_dict[key].items():
85 if regexprogram.match(dep):
86 filterededges.add((key, dep, depname))
87 filtered_taskgraph = TaskGraph(
88 filteredtasks, Graph(set(filteredtasks), filterededges)
90 return filtered_taskgraph
93 FORMAT_METHODS = {
94 "labels": format_taskgraph_labels,
95 "json": format_taskgraph_json,
96 "yaml": format_taskgraph_yaml,
100 def get_taskgraph_generator(root, parameters):
101 """Helper function to make testing a little easier."""
102 from gecko_taskgraph.generator import TaskGraphGenerator
104 return TaskGraphGenerator(root_dir=root, parameters=parameters)
107 def format_taskgraph(options, parameters, logfile=None):
108 import gecko_taskgraph
109 from taskgraph.parameters import parameters_loader
111 if logfile:
112 oldhandler = logging.root.handlers[-1]
113 logging.root.removeHandler(oldhandler)
115 handler = logging.FileHandler(logfile, mode="w")
116 handler.setFormatter(oldhandler.formatter)
117 logging.root.addHandler(handler)
119 if options["fast"]:
120 gecko_taskgraph.fast = True
122 if isinstance(parameters, str):
123 parameters = parameters_loader(
124 parameters,
125 overrides={"target-kind": options.get("target_kind")},
126 strict=False,
129 tgg = get_taskgraph_generator(options.get("root"), parameters)
131 tg = getattr(tgg, options["graph_attr"])
132 tg = get_filtered_taskgraph(tg, options["tasks_regex"])
133 format_method = FORMAT_METHODS[options["format"] or "labels"]
134 return format_method(tg)
137 def dump_output(out, path=None, params_spec=None):
138 from taskgraph.parameters import Parameters
140 params_name = Parameters.format_spec(params_spec)
141 fh = None
142 if path:
143 # Substitute params name into file path if necessary
144 if params_spec and "{params}" not in path:
145 name, ext = os.path.splitext(path)
146 name += "_{params}"
147 path = name + ext
149 path = path.format(params=params_name)
150 fh = open(path, "w")
151 else:
152 print(
153 "Dumping result with parameters from {}:".format(params_name),
154 file=sys.stderr,
156 print(out + "\n", file=fh)
159 def generate_taskgraph(options, parameters, logdir):
160 from taskgraph.parameters import Parameters
162 def logfile(spec):
163 """Determine logfile given a parameters specification."""
164 if logdir is None:
165 return None
166 return os.path.join(
167 logdir,
168 "{}_{}.log".format(options["graph_attr"], Parameters.format_spec(spec)),
171 # Don't bother using futures if there's only one parameter. This can make
172 # tracebacks a little more readable and avoids additional process overhead.
173 if len(parameters) == 1:
174 spec = parameters[0]
175 out = format_taskgraph(options, spec, logfile(spec))
176 dump_output(out, options["output_file"])
177 return
179 futures = {}
180 with ProcessPoolExecutor() as executor:
181 for spec in parameters:
182 f = executor.submit(format_taskgraph, options, spec, logfile(spec))
183 futures[f] = spec
185 for future in as_completed(futures):
186 output_file = options["output_file"]
187 spec = futures[future]
188 e = future.exception()
189 if e:
190 out = "".join(traceback.format_exception(type(e), e, e.__traceback__))
191 if options["diff"]:
192 # Dump to console so we don't accidentally diff the tracebacks.
193 output_file = None
194 else:
195 out = future.result()
197 dump_output(
198 out,
199 path=output_file,
200 params_spec=spec if len(parameters) > 1 else None,
204 @command(
205 "tasks",
206 help="Show all tasks in the taskgraph.",
207 defaults={"graph_attr": "full_task_set"},
209 @command(
210 "full", help="Show the full taskgraph.", defaults={"graph_attr": "full_task_graph"}
212 @command(
213 "target",
214 help="Show the set of target tasks.",
215 defaults={"graph_attr": "target_task_set"},
217 @command(
218 "target-graph",
219 help="Show the target graph.",
220 defaults={"graph_attr": "target_task_graph"},
222 @command(
223 "optimized",
224 help="Show the optimized graph.",
225 defaults={"graph_attr": "optimized_task_graph"},
227 @command(
228 "morphed",
229 help="Show the morphed graph.",
230 defaults={"graph_attr": "morphed_task_graph"},
232 @argument("--root", "-r", help="root of the taskgraph definition relative to topsrcdir")
233 @argument("--quiet", "-q", action="store_true", help="suppress all logging output")
234 @argument(
235 "--verbose", "-v", action="store_true", help="include debug-level logging output"
237 @argument(
238 "--json",
239 "-J",
240 action="store_const",
241 dest="format",
242 const="json",
243 help="Output task graph as a JSON object",
245 @argument(
246 "--yaml",
247 "-Y",
248 action="store_const",
249 dest="format",
250 const="yaml",
251 help="Output task graph as a YAML object",
253 @argument(
254 "--labels",
255 "-L",
256 action="store_const",
257 dest="format",
258 const="labels",
259 help="Output the label for each task in the task graph (default)",
261 @argument(
262 "--parameters",
263 "-p",
264 default=None,
265 action="append",
266 help="Parameters to use for the generation. Can be a path to file (.yml or "
267 ".json; see `taskcluster/docs/parameters.rst`), a directory (containing "
268 "parameters files), a url, of the form `project=mozilla-central` to download "
269 "latest parameters file for the specified project from CI, or of the form "
270 "`task-id=<decision task id>` to download parameters from the specified "
271 "decision task. Can be specified multiple times, in which case multiple "
272 "generations will happen from the same invocation (one per parameters "
273 "specified).",
275 @argument(
276 "--no-optimize",
277 dest="optimize",
278 action="store_false",
279 default="true",
280 help="do not remove tasks from the graph that are found in the "
281 "index (a.k.a. optimize the graph)",
283 @argument(
284 "-o",
285 "--output-file",
286 default=None,
287 help="file path to store generated output.",
289 @argument(
290 "--tasks-regex",
291 "--tasks",
292 default=None,
293 help="only return tasks with labels matching this regular " "expression.",
295 @argument(
296 "--target-kind",
297 default=None,
298 help="only return tasks that are of the given kind, or their dependencies.",
300 @argument(
301 "-F",
302 "--fast",
303 default=False,
304 action="store_true",
305 help="enable fast task generation for local debugging.",
307 @argument(
308 "--diff",
309 const="default",
310 nargs="?",
311 default=None,
312 help="Generate and diff the current taskgraph against another revision. "
313 "Without args the base revision will be used. A revision specifier such as "
314 "the hash or `.~1` (hg) or `HEAD~1` (git) can be used as well.",
316 def show_taskgraph(options):
317 from mozversioncontrol import get_repository_object as get_repository
318 from taskgraph.parameters import Parameters, parameters_loader
320 if options.pop("verbose", False):
321 logging.root.setLevel(logging.DEBUG)
323 repo = None
324 cur_ref = None
325 diffdir = None
326 output_file = options["output_file"]
328 if options["diff"]:
329 repo = get_repository(os.getcwd())
331 if not repo.working_directory_clean():
332 print(
333 "abort: can't diff taskgraph with dirty working directory",
334 file=sys.stderr,
336 return 1
338 # We want to return the working directory to the current state
339 # as best we can after we're done. In all known cases, using
340 # branch or bookmark (which are both available on the VCS object)
341 # as `branch` is preferable to a specific revision.
342 cur_ref = repo.branch or repo.head_ref[:12]
344 diffdir = tempfile.mkdtemp()
345 atexit.register(
346 shutil.rmtree, diffdir
347 ) # make sure the directory gets cleaned up
348 options["output_file"] = os.path.join(
349 diffdir, f"{options['graph_attr']}_{cur_ref}"
351 print(f"Generating {options['graph_attr']} @ {cur_ref}", file=sys.stderr)
353 parameters: List[Any[str, Parameters]] = options.pop("parameters")
354 if not parameters:
355 overrides = {
356 "target-kind": options.get("target_kind"),
358 parameters = [
359 parameters_loader(None, strict=False, overrides=overrides)
360 ] # will use default values
362 for param in parameters[:]:
363 if isinstance(param, str) and os.path.isdir(param):
364 parameters.remove(param)
365 parameters.extend(
367 p.as_posix()
368 for p in Path(param).iterdir()
369 if p.suffix in (".yml", ".json")
373 logdir = None
374 if len(parameters) > 1:
375 # Log to separate files for each process instead of stderr to
376 # avoid interleaving.
377 basename = os.path.basename(os.getcwd())
378 logdir = os.path.join(appdirs.user_log_dir("taskgraph"), basename)
379 if not os.path.isdir(logdir):
380 os.makedirs(logdir)
381 else:
382 # Only setup logging if we have a single parameter spec. Otherwise
383 # logging will go to files. This is also used as a hook for Gecko
384 # to setup its `mach` based logging.
385 setup_logging()
387 generate_taskgraph(options, parameters, logdir)
389 if options["diff"]:
390 assert diffdir is not None
391 assert repo is not None
393 # Reload taskgraph modules to pick up changes and clear global state.
394 for mod in sys.modules.copy():
395 if mod != __name__ and mod.split(".", 1)[0].endswith("taskgraph"):
396 del sys.modules[mod]
398 if options["diff"] == "default":
399 base_ref = repo.base_ref
400 else:
401 base_ref = options["diff"]
403 try:
404 repo.update(base_ref)
405 base_ref = repo.head_ref[:12]
406 options["output_file"] = os.path.join(
407 diffdir, f"{options['graph_attr']}_{base_ref}"
409 print(f"Generating {options['graph_attr']} @ {base_ref}", file=sys.stderr)
410 generate_taskgraph(options, parameters, logdir)
411 finally:
412 repo.update(cur_ref)
414 # Generate diff(s)
415 diffcmd = [
416 "diff",
417 "-U20",
418 "--report-identical-files",
419 f"--label={options['graph_attr']}@{base_ref}",
420 f"--label={options['graph_attr']}@{cur_ref}",
423 non_fatal_failures = []
424 for spec in parameters:
425 base_path = os.path.join(diffdir, f"{options['graph_attr']}_{base_ref}")
426 cur_path = os.path.join(diffdir, f"{options['graph_attr']}_{cur_ref}")
428 params_name = None
429 if len(parameters) > 1:
430 params_name = Parameters.format_spec(spec)
431 base_path += f"_{params_name}"
432 cur_path += f"_{params_name}"
434 # If the base or cur files are missing it means that generation
435 # failed. If one of them failed but not the other, the failure is
436 # likely due to the patch making changes to taskgraph in modules
437 # that don't get reloaded (safe to ignore). If both generations
438 # failed, there's likely a real issue.
439 base_missing = not os.path.isfile(base_path)
440 cur_missing = not os.path.isfile(cur_path)
441 if base_missing != cur_missing: # != is equivalent to XOR for booleans
442 non_fatal_failures.append(os.path.basename(base_path))
443 continue
445 try:
446 # If the output file(s) are missing, this command will raise
447 # CalledProcessError with a returncode > 1.
448 proc = subprocess.run(
449 diffcmd + [base_path, cur_path],
450 stdout=subprocess.PIPE,
451 stderr=subprocess.PIPE,
452 universal_newlines=True,
453 check=True,
455 diff_output = proc.stdout
456 returncode = 0
457 except subprocess.CalledProcessError as e:
458 # returncode 1 simply means diffs were found
459 if e.returncode != 1:
460 print(e.stderr, file=sys.stderr)
461 raise
462 diff_output = e.output
463 returncode = e.returncode
465 dump_output(
466 diff_output,
467 # Don't bother saving file if no diffs were found. Log to
468 # console in this case instead.
469 path=None if returncode == 0 else output_file,
470 params_spec=spec if len(parameters) > 1 else None,
473 if non_fatal_failures:
474 failstr = "\n ".join(sorted(non_fatal_failures))
475 print(
476 "WARNING: Diff skipped for the following generation{s} "
477 "due to failures:\n {failstr}".format(
478 s="s" if len(non_fatal_failures) > 1 else "", failstr=failstr
480 file=sys.stderr,
483 if options["format"] != "json":
484 print(
485 "If you were expecting differences in task bodies "
486 'you should pass "-J"\n',
487 file=sys.stderr,
490 if len(parameters) > 1:
491 print("See '{}' for logs".format(logdir), file=sys.stderr)
494 @command("build-image", help="Build a Docker image")
495 @argument("image_name", help="Name of the image to build")
496 @argument(
497 "-t", "--tag", help="tag that the image should be built as.", metavar="name:tag"
499 @argument(
500 "--context-only",
501 help="File name the context tarball should be written to."
502 "with this option it will only build the context.tar.",
503 metavar="context.tar",
505 def build_image(args):
506 from gecko_taskgraph.docker import build_image, build_context
508 if args["context_only"] is None:
509 build_image(args["image_name"], args["tag"], os.environ)
510 else:
511 build_context(args["image_name"], args["context_only"], os.environ)
514 @command(
515 "load-image",
516 help="Load a pre-built Docker image. Note that you need to "
517 "have docker installed and running for this to work.",
519 @argument(
520 "--task-id",
521 help="Load the image at public/image.tar.zst in this task, "
522 "rather than searching the index",
524 @argument(
525 "-t",
526 "--tag",
527 help="tag that the image should be loaded as. If not "
528 "image will be loaded with tag from the tarball",
529 metavar="name:tag",
531 @argument(
532 "image_name",
533 nargs="?",
534 help="Load the image of this name based on the current "
535 "contents of the tree (as built for mozilla-central "
536 "or mozilla-inbound)",
538 def load_image(args):
539 from gecko_taskgraph.docker import load_image_by_name, load_image_by_task_id
541 if not args.get("image_name") and not args.get("task_id"):
542 print("Specify either IMAGE-NAME or TASK-ID")
543 sys.exit(1)
544 try:
545 if args["task_id"]:
546 ok = load_image_by_task_id(args["task_id"], args.get("tag"))
547 else:
548 ok = load_image_by_name(args["image_name"], args.get("tag"))
549 if not ok:
550 sys.exit(1)
551 except Exception:
552 traceback.print_exc()
553 sys.exit(1)
556 @command("image-digest", help="Print the digest of a docker image.")
557 @argument(
558 "image_name",
559 help="Print the digest of the image of this name based on the current "
560 "contents of the tree.",
562 def image_digest(args):
563 from gecko_taskgraph.docker import get_image_digest
565 try:
566 digest = get_image_digest(args["image_name"])
567 print(digest)
568 except Exception:
569 traceback.print_exc()
570 sys.exit(1)
573 @command("decision", help="Run the decision task")
574 @argument("--root", "-r", help="root of the taskgraph definition relative to topsrcdir")
575 @argument(
576 "--message",
577 required=False,
578 help=argparse.SUPPRESS,
580 @argument(
581 "--project",
582 required=True,
583 help="Project to use for creating task graph. Example: --project=try",
585 @argument("--pushlog-id", dest="pushlog_id", required=True, default="0")
586 @argument("--pushdate", dest="pushdate", required=True, type=int, default=0)
587 @argument("--owner", required=True, help="email address of who owns this graph")
588 @argument("--level", required=True, help="SCM level of this repository")
589 @argument(
590 "--target-tasks-method", help="method for selecting the target tasks to generate"
592 @argument(
593 "--repository-type",
594 required=True,
595 help='Type of repository, either "hg" or "git"',
597 @argument("--base-repository", required=True, help='URL for "base" repository to clone')
598 @argument(
599 "--head-repository",
600 required=True,
601 help='URL for "head" repository to fetch revision from',
603 @argument(
604 "--head-ref", required=True, help="Reference (this is same as rev usually for hg)"
606 @argument(
607 "--head-rev", required=True, help="Commit revision to use from head repository"
609 @argument("--head-tag", help="Tag attached to the revision", default="")
610 @argument(
611 "--tasks-for", required=True, help="the tasks_for value used to generate this task"
613 @argument("--try-task-config-file", help="path to try task configuration file")
614 def decision(options):
615 from gecko_taskgraph.decision import taskgraph_decision
617 taskgraph_decision(options)
620 @command("action-callback", description="Run action callback used by action tasks")
621 @argument(
622 "--root",
623 "-r",
624 default="taskcluster/ci",
625 help="root of the taskgraph definition relative to topsrcdir",
627 def action_callback(options):
628 from gecko_taskgraph.actions import trigger_action_callback
629 from gecko_taskgraph.actions.util import get_parameters
631 try:
632 # the target task for this action (or null if it's a group action)
633 task_id = json.loads(os.environ.get("ACTION_TASK_ID", "null"))
634 # the target task group for this action
635 task_group_id = os.environ.get("ACTION_TASK_GROUP_ID", None)
636 input = json.loads(os.environ.get("ACTION_INPUT", "null"))
637 callback = os.environ.get("ACTION_CALLBACK", None)
638 root = options["root"]
640 parameters = get_parameters(task_group_id)
642 return trigger_action_callback(
643 task_group_id=task_group_id,
644 task_id=task_id,
645 input=input,
646 callback=callback,
647 parameters=parameters,
648 root=root,
649 test=False,
651 except Exception:
652 traceback.print_exc()
653 sys.exit(1)
656 @command("test-action-callback", description="Run an action callback in a testing mode")
657 @argument(
658 "--root",
659 "-r",
660 default="taskcluster/ci",
661 help="root of the taskgraph definition relative to topsrcdir",
663 @argument(
664 "--parameters",
665 "-p",
666 default="",
667 help="parameters file (.yml or .json; see " "`taskcluster/docs/parameters.rst`)`",
669 @argument("--task-id", default=None, help="TaskId to which the action applies")
670 @argument(
671 "--task-group-id", default=None, help="TaskGroupId to which the action applies"
673 @argument("--input", default=None, help="Action input (.yml or .json)")
674 @argument("callback", default=None, help="Action callback name (Python function name)")
675 def test_action_callback(options):
676 import taskgraph.parameters
677 import gecko_taskgraph.actions
678 from taskgraph.util import yaml
679 from gecko_taskgraph.config import load_graph_config
681 def load_data(filename):
682 with open(filename) as f:
683 if filename.endswith(".yml"):
684 return yaml.load_stream(f)
685 elif filename.endswith(".json"):
686 return json.load(f)
687 else:
688 raise Exception(f"unknown filename {filename}")
690 try:
691 task_id = options["task_id"]
693 if options["input"]:
694 input = load_data(options["input"])
695 else:
696 input = None
698 root = options["root"]
699 graph_config = load_graph_config(root)
700 trust_domain = graph_config["trust-domain"]
701 graph_config.register()
703 parameters = taskgraph.parameters.load_parameters_file(
704 options["parameters"], strict=False, trust_domain=trust_domain
706 parameters.check()
708 return gecko_taskgraph.actions.trigger_action_callback(
709 task_group_id=options["task_group_id"],
710 task_id=task_id,
711 input=input,
712 callback=options["callback"],
713 parameters=parameters,
714 root=root,
715 test=True,
717 except Exception:
718 traceback.print_exc()
719 sys.exit(1)
722 def create_parser():
723 parser = argparse.ArgumentParser(description="Interact with taskgraph")
724 subparsers = parser.add_subparsers()
725 for _, (func, args, kwargs, defaults) in commands.items():
726 subparser = subparsers.add_parser(*args, **kwargs)
727 for arg in func.args:
728 subparser.add_argument(*arg[0], **arg[1])
729 subparser.set_defaults(command=func, **defaults)
730 return parser
733 def setup_logging():
734 logging.basicConfig(
735 format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
739 def main(args=sys.argv[1:]):
740 setup_logging()
741 parser = create_parser()
742 args = parser.parse_args(args)
743 try:
744 args.command(vars(args))
745 except Exception:
746 traceback.print_exc()
747 sys.exit(1)