2 # This file is part of the GROMACS molecular simulation package.
4 # Copyright (c) 2019,2020, by the GROMACS development team, led by
5 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 # and including many others, as listed in the AUTHORS file in the
7 # top-level source directory and at http://www.gromacs.org.
9 # GROMACS is free software; you can redistribute it and/or
10 # modify it under the terms of the GNU Lesser General Public License
11 # as published by the Free Software Foundation; either version 2.1
12 # of the License, or (at your option) any later version.
14 # GROMACS is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 # Lesser General Public License for more details.
19 # You should have received a copy of the GNU Lesser General Public
20 # License along with GROMACS; if not, see
21 # http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 # If you want to redistribute modifications to GROMACS, please
25 # consider that scientific software is very special. Version
26 # control is crucial - bugs must be traceable. We will be happy to
27 # consider code for inclusion in the official distribution, but
28 # derived work must not be called official GROMACS. Details are found
29 # in the README & COPYING files - if they are missing, get the
30 # official version at http://www.gromacs.org.
32 # To help us fund GROMACS development, we humbly ask that you cite
33 # the research papers on the package. Check out http://www.gromacs.org.
36 Provide command line operation.
39 __all__
= ['commandline_operation']
46 from gmxapi
import exceptions
47 from gmxapi
import logger
as root_logger
48 from gmxapi
.datamodel
import NDArray
49 from gmxapi
.operation
import OutputCollectionDescription
52 logger
= root_logger
.getChild('commandline')
53 logger
.info('Importing {}'.format(__name__
))
56 # Create an Operation that consumes a list and a boolean to produce a string and an integer.
58 # Wrap the defined function using a decorator that
59 # * strips the `output` parameter from the signature
60 # * provides `output` publishing proxy to the inner function and
61 # * produce a result with attributes for
62 # * file: mapping of output flags to output filenames
63 # * stdout: process STDOUT
64 # * stderr: porcess STDERR
65 # * returncode: integer return code of wrapped command
67 # Note that the existence of the 'file' output map is expressed here, but
68 # the keys of the map are not implicit or set by the wrapped function.
69 # For the map to be non-empty, it must be defined before the resulting helper
72 # TODO: Operation returns the output object when called with the shorter signature.
74 @gmx.function_wrapper(output
={'stdout': str,
77 def cli(command
: NDArray
, shell
: bool, output
: OutputCollectionDescription
, stdin
: str = ''):
78 """Execute a command line program in a subprocess.
80 Configure an executable in a subprocess. Executes when run in an execution
81 Context, as part of a work graph or via gmx.run(). Runs in the current
84 Shell processing is not enabled, but can be considered for a future version.
85 This means that shell expansions such as environment variables, globbing (`*`),
86 and other special symbols (like `~` for home directory) are not available.
87 This allows a simpler and more robust implementation, as well as a better
88 ability to uniquely identify the effects of a command line operation. If you
89 think this disallows important use cases, please let us know.
92 command: a tuple (or list) to be the subprocess arguments, including `executable`
93 output: mapping of command line flags to output filename arguments
94 shell: unused (provides forward-compatibility)
95 stdin (str): String input to send to STDIN (terminal input) of the executable.
97 Multi-line text sent to *stdin* should be joined into a single string
98 (e.g. ``'\n'.join(list_of_strings) + '\n'``).
99 If multiple strings are provided to *stdin*, gmxapi will assume an ensemble,
100 and will run one operation for each provided string.
102 Only string input (:py:func:str) to *stdin* is currently supported.
103 If you have a use case that requires streaming input or binary input,
104 please open an issue or contact the author(s).
106 Arguments are iteratively added to the command line with standard Python
107 iteration, so you should use a tuple or list even if you have only one parameter.
108 I.e. If you provide a string with `arguments="asdf"` then it will be passed as
109 `... "a" "s" "d" "f"`. To pass a single string argument, `arguments=("asdf")`
110 or `arguments=["asdf"]`.
112 `input` and `output` should be a dictionary with string keys, where the keys
113 name command line "flags" or options.
116 Execute a command named `exe` that takes a flagged option for file name
117 (stored in a local Python variable `my_filename`) and an `origin` flag
118 that uses the next three arguments to define a vector.
120 >>> my_filename = "somefilename"
121 >>> result = cli(('exe', '--origin', 1.0, 2.0, 3.0, '-f', my_filename), shell=False)
122 >>> assert hasattr(result, 'file')
123 >>> assert hasattr(result, 'stdout')
124 >>> assert hasattr(result, 'stderr')
125 >>> assert hasattr(result, 'returncode')
128 A data structure with attributes for each of the results `file`, `stdout`, `stderr`, and `returncode`
130 Result object attributes:
131 * `file`: the mapping of CLI flags to filename strings resulting from the `output` kwarg
132 * `stdout`: A string mapping from process STDOUT.
133 * `stderr`: A string mapping from process STDERR; it will be the
134 error output (if any) if the process failed.
135 * `returncode`: return code of the subprocess.
138 # In the operation implementation, we expect the `shell` parameter to be intercepted by the
139 # wrapper and set to False.
141 raise exceptions
.UsageError("Operation does not support shell processing.")
146 if isinstance(command
, (str, bytes
)):
148 command
= list([arg
for arg
in command
])
150 executable
= shutil
.which(command
[0])
151 if executable
is None:
152 raise exceptions
.ValueError('"{}" is not found or not executable.'.format(command
[0]))
153 command
[0] = executable
155 # TODO: (FR9) Can OS input/output filehandles be a responsibility of
156 # the code providing 'resources'?
160 logger
.debug('executing subprocess')
162 completed_process
= subprocess
.run(command
,
166 stdout
=subprocess
.PIPE
,
167 stderr
=subprocess
.PIPE
,
169 universal_newlines
=True
171 returncode
= completed_process
.returncode
172 # TODO: Resource management code should manage a safe data object for `output`.
173 logger
.debug('STDOUT:')
174 if completed_process
.stderr
is not None:
175 for line
in completed_process
.stdout
.split('\n'):
178 logger
.debug('STDOUT is empty')
179 logger
.debug('STDERR:')
180 if completed_process
.stderr
is not None:
181 for line
in completed_process
.stderr
.split('\n'):
184 logger
.debug('STDERR is empty')
186 stdout
= completed_process
.stdout
187 stderr
= completed_process
.stderr
189 except subprocess
.CalledProcessError
as e
:
190 logger
.info("commandline operation had non-zero return status"
191 "when calling {}".format(e
.cmd
))
194 returncode
= e
.returncode
197 output
.stdout
= stdout
198 output
.stderr
= stderr
199 output
.returncode
= returncode
202 # TODO: (FR4) Make this a formal operation to properly handle gmxapi data dependencies.
203 # The consumer of this operation has an NDArray input. filemap may contain gmxapi data flow
204 # aspects that we want the framework to handle for us.
205 def filemap_to_flag_list(filemap
: dict = None):
206 """Convert a map of command line flags and filenames to a list of command line arguments.
208 Used to map inputs and outputs of command line tools to and from gmxapi data handles.
209 User provides mappings of flags and filenames so that gmxapi can construct an
210 executable command line.
212 Primary use case is implicit. commandline_operation() instantiates this operation based on
213 user input, and sends the output to cli()
216 filemap: key-value map of command line flags and filename arguments
219 list of strings and/or gmxapi data references
222 if filemap
is not None:
223 for key
, value
in filemap
.items():
224 # Note that the value may be a string, a list, an ndarray, or a future
225 if not isinstance(value
, (list, tuple, NDArray
)):
226 if hasattr(value
, 'result') and value
.dtype
== NDArray
:
228 elif hasattr(value
, 'result') and value
.dtype
!= NDArray
:
229 # TODO: Fix this ugly hack when we have proper Future slicing and can make NDArray futures.
230 result_function
= value
.result
231 value
.result
= lambda function
=result_function
: [function()]
234 result
= gmx
.join_arrays(front
=result
, back
=gmx
.join_arrays(front
=[key
], back
=value
))
238 # TODO: (FR4) Use generating function or decorator that can validate kwargs?
239 # TODO: (FR4) Outputs need to be fully formed and typed in the object returned
240 # from the helper (decorated function).
241 def commandline_operation(executable
=None,
243 input_files
: dict = None,
244 output_files
: dict = None,
247 """Helper function to define a new operation that executes a subprocess in gmxapi data flow.
249 Define a new Operation for a particular executable and input/output parameter set.
250 Generate a chain of operations to process the named key word arguments and handle
251 input/output data dependencies.
254 executable: name of an executable on the path
255 arguments: list of positional arguments to insert at ``argv[1]``
256 input_files: mapping of command-line flags to input file names
257 output_files: mapping of command-line flags to output file names
258 stdin (str): String input to send to STDIN (terminal input) of the executable (optional).
260 Multi-line text sent to *stdin* should be joined into a single string.
263 commandline_operation(..., stdin='\\n'.join(list_of_strings) + '\\n')
265 If multiple strings are provided to *stdin*, gmxapi will assume an ensemble,
266 and will run one operation for each provided string.
268 Only string input (:py:func:`str`) to *stdin* is currently supported.
269 If you have a use case that requires streaming input or binary input,
270 please open an issue or contact the author(s).
273 The output node of the resulting operation handle contains
275 * ``file``: the mapping of CLI flags to filename strings resulting from the ``output_files`` kwarg
276 * ``stdout``: A string mapping from process STDOUT.
277 * ``stderr``: A string mapping from process STDERR; it will be the
278 error output (if any) if the process failed.
279 * ``returncode``: return code of the subprocess.
283 # Implementation details: When used in a script, this function returns an
284 # instance of an operation. However, because of the dynamic specification of
285 # inputs and outputs, each invocation may have the overhead of defining new
286 # types to express the data flow topology, regardless of the executable.
287 # If this overhead is problematic, consider exposing the intermediate step
288 # at which the Operation is fully specified to facilitate reuse.
291 # 1. Define a new operation with outputs from `cli()` plus `file` from `output_files`
293 # output_files is essentially passed through, but we need assurance that results
294 # will not be published until the rest of the operation has run (i.e. the cli() executable.)
296 # Warning: decorating a local function like this is counter to the notion of Operations
297 # as portable (importable, serializable/deserializable). The big picture here needs
298 # some more consideration.
299 # TODO: (NOW) Distinguish portable Operations from relocatable Futures.
300 # There is nothing antithetical about objects implementing gmxapi data interfaces
301 # that are only resolvable by a certain Context as long as that Context can convey
302 # the results to another Context upon request. Re-instantiating Operations is
303 # only one way of relocating Futures. In this case, though, the dynamic creation of
304 # merged_ops doesn't seem right, and commandline_operation should probably be
305 # a proper Operation.
307 # TODO: (FR4+) Characterize the `file` dictionary key type:
308 # explicitly sequences rather than maybe-string/maybe-sequence-of-strings
309 @gmx.function_wrapper(output
={'stdout': str,
313 def merged_ops(stdout
: str = None,
315 returncode
: int = None,
317 output
: OutputCollectionDescription
= None):
318 assert stdout
is not None
319 assert stderr
is not None
320 assert returncode
is not None
321 assert file is not None
322 assert output
is not None
323 output
.returncode
= returncode
324 output
.stdout
= stdout
325 output
.stderr
= stderr
332 # 2. Prepare data flow.
334 if input_files
is None:
336 if output_files
is None:
338 if isinstance(arguments
, (str, bytes
)):
339 arguments
= [arguments
]
340 command
= gmx
.concatenate_lists([[executable
],
342 filemap_to_flag_list(input_files
),
343 filemap_to_flag_list(output_files
)])
344 shell
= gmx
.make_constant(False)
345 cli_args
= {'command': command
,
347 cli_args
.update(**kwargs
)
348 if stdin
is not None:
349 cli_args
['stdin'] = str(stdin
)
352 # 3. Merge operations
354 # Note: Without a `label` argument, repeated calls to cli(**cli_args) should
355 # produce references to the same unique resource. Creating this handle
356 # separately should not be necessary, but we've got a way to go until we have the
357 # fingerprinting and Context resource management we need for that.
358 # TODO: ``label`` kwarg
359 # TODO: input fingerprinting
360 cli_result
= cli(**cli_args
)
361 merged_result
= merged_ops(stdout
=cli_result
.output
.stdout
,
362 stderr
=cli_result
.output
.stderr
,
363 returncode
=cli_result
.output
.returncode
,
367 # Return an object with an OutputCollection granting access to outputs of
368 # cli() and of output_files (as "file")