Move non-analysis tools from gmxana to gmxpreprocess
[gromacs.git] / src / gromacs / gmxpreprocess / insert-molecules.cpp
blob268b56e125bfea63d613b9513a5c24e3640f862e
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "gmxpre.h"
39 #include "insert-molecules.h"
41 #include <algorithm>
42 #include <memory>
43 #include <set>
44 #include <string>
45 #include <vector>
47 #include "gromacs/commandline/cmdlineoptionsmodule.h"
48 #include "gromacs/fileio/confio.h"
49 #include "gromacs/fileio/filetypes.h"
50 #include "gromacs/fileio/xvgr.h"
51 #include "gromacs/gmxlib/conformation-utilities.h"
52 #include "gromacs/gmxpreprocess/makeexclusiondistances.h"
53 #include "gromacs/math/functions.h"
54 #include "gromacs/math/utilities.h"
55 #include "gromacs/math/vec.h"
56 #include "gromacs/options/basicoptions.h"
57 #include "gromacs/options/filenameoption.h"
58 #include "gromacs/options/ioptionscontainer.h"
59 #include "gromacs/pbcutil/pbc.h"
60 #include "gromacs/random/threefry.h"
61 #include "gromacs/random/uniformrealdistribution.h"
62 #include "gromacs/selection/nbsearch.h"
63 #include "gromacs/selection/selection.h"
64 #include "gromacs/selection/selectioncollection.h"
65 #include "gromacs/selection/selectionoption.h"
66 #include "gromacs/selection/selectionoptionbehavior.h"
67 #include "gromacs/topology/atomprop.h"
68 #include "gromacs/topology/atoms.h"
69 #include "gromacs/topology/atomsbuilder.h"
70 #include "gromacs/topology/mtop_util.h"
71 #include "gromacs/topology/symtab.h"
72 #include "gromacs/topology/topology.h"
73 #include "gromacs/trajectory/trajectoryframe.h"
74 #include "gromacs/trajectoryanalysis/topologyinformation.h"
75 #include "gromacs/utility/cstringutil.h"
76 #include "gromacs/utility/exceptions.h"
77 #include "gromacs/utility/fatalerror.h"
78 #include "gromacs/utility/smalloc.h"
79 #include "gromacs/utility/unique_cptr.h"
81 using gmx::RVec;
83 /* enum for random rotations of inserted solutes */
84 enum RotationType {
85 en_rotXYZ, en_rotZ, en_rotNone
87 const char *const cRotationEnum[] = {"xyz", "z", "none"};
89 static void center_molecule(gmx::ArrayRef<RVec> x)
91 rvec center = {0};
92 for (auto &xi : x)
94 rvec_inc(center, xi);
96 svmul(1.0/x.size(), center, center);
97 for (auto &xi : x)
99 rvec_dec(xi, center);
103 static void generate_trial_conf(gmx::ArrayRef<RVec> xin,
104 const rvec offset, RotationType enum_rot,
105 gmx::DefaultRandomEngine * rng,
106 std::vector<RVec> *xout)
108 gmx::UniformRealDistribution<real> dist(0, 2.0*M_PI);
109 xout->assign(xin.begin(), xin.end());
111 real alfa = 0.0, beta = 0.0, gamma = 0.0;
112 switch (enum_rot)
114 case en_rotXYZ:
115 alfa = dist(*rng);
116 beta = dist(*rng);
117 gamma = dist(*rng);
118 break;
119 case en_rotZ:
120 alfa = beta = 0.;
121 gamma = dist(*rng);
122 break;
123 case en_rotNone:
124 alfa = beta = gamma = 0.;
125 break;
127 if (enum_rot == en_rotXYZ || enum_rot == en_rotZ)
129 rotate_conf(xout->size(), as_rvec_array(xout->data()), nullptr, alfa, beta, gamma);
131 for (size_t i = 0; i < xout->size(); ++i)
133 rvec_inc((*xout)[i], offset);
137 static bool isInsertionAllowed(gmx::AnalysisNeighborhoodSearch *search,
138 const std::vector<real> &exclusionDistances,
139 const std::vector<RVec> &x,
140 const std::vector<real> &exclusionDistances_insrt,
141 const t_atoms &atoms,
142 const std::set<int> &removableAtoms,
143 gmx::AtomsRemover *remover)
145 gmx::AnalysisNeighborhoodPositions pos(x);
146 gmx::AnalysisNeighborhoodPairSearch pairSearch = search->startPairSearch(pos);
147 gmx::AnalysisNeighborhoodPair pair;
148 while (pairSearch.findNextPair(&pair))
150 const real r1 = exclusionDistances[pair.refIndex()];
151 const real r2 = exclusionDistances_insrt[pair.testIndex()];
152 if (pair.distance2() < gmx::square(r1 + r2))
154 if (removableAtoms.count(pair.refIndex()) == 0)
156 return false;
158 // TODO: If molecule information is available, this should ideally
159 // use it to remove whole molecules.
160 remover->markResidue(atoms, pair.refIndex(), true);
163 return true;
166 static void insert_mols(int nmol_insrt, int ntry, int seed,
167 real defaultDistance, real scaleFactor,
168 t_atoms *atoms, t_symtab *symtab, std::vector<RVec> *x,
169 const std::set<int> &removableAtoms,
170 const t_atoms &atoms_insrt, gmx::ArrayRef<RVec> x_insrt,
171 int ePBC, matrix box,
172 const std::string &posfn, const rvec deltaR,
173 RotationType enum_rot)
175 fprintf(stderr, "Initialising inter-atomic distances...\n");
176 AtomProperties aps;
177 std::vector<real> exclusionDistances(
178 makeExclusionDistances(atoms, &aps, defaultDistance, scaleFactor));
179 const std::vector<real> exclusionDistances_insrt(
180 makeExclusionDistances(&atoms_insrt, &aps, defaultDistance, scaleFactor));
182 const real maxInsertRadius
183 = *std::max_element(exclusionDistances_insrt.begin(),
184 exclusionDistances_insrt.end());
185 real maxRadius = maxInsertRadius;
186 if (!exclusionDistances.empty())
188 const real maxExistingRadius
189 = *std::max_element(exclusionDistances.begin(),
190 exclusionDistances.end());
191 maxRadius = std::max(maxInsertRadius, maxExistingRadius);
194 // TODO: Make all of this exception-safe.
195 gmx::AnalysisNeighborhood nb;
196 nb.setCutoff(maxInsertRadius + maxRadius);
199 if (seed == 0)
201 seed = static_cast<int>(gmx::makeRandomSeed());
203 fprintf(stderr, "Using random seed %d\n", seed);
205 gmx::DefaultRandomEngine rng(seed);
207 t_pbc pbc;
208 set_pbc(&pbc, ePBC, box);
210 /* With -ip, take nmol_insrt from file posfn */
211 double **rpos = nullptr;
212 const bool insertAtPositions = !posfn.empty();
213 if (insertAtPositions)
215 int ncol;
216 nmol_insrt = read_xvg(posfn.c_str(), &rpos, &ncol);
217 if (ncol != 3)
219 gmx_fatal(FARGS, "Expected 3 columns (x/y/z coordinates) in file %s\n",
220 posfn.c_str());
222 fprintf(stderr, "Read %d positions from file %s\n\n",
223 nmol_insrt, posfn.c_str());
226 gmx::AtomsBuilder builder(atoms, symtab);
227 gmx::AtomsRemover remover(*atoms);
229 const int finalAtomCount = atoms->nr + nmol_insrt * atoms_insrt.nr;
230 const int finalResidueCount = atoms->nres + nmol_insrt * atoms_insrt.nres;
231 builder.reserve(finalAtomCount, finalResidueCount);
232 x->reserve(finalAtomCount);
233 exclusionDistances.reserve(finalAtomCount);
236 std::vector<RVec> x_n(x_insrt.size());
238 int mol = 0;
239 int trial = 0;
240 int firstTrial = 0;
241 int failed = 0;
242 gmx::UniformRealDistribution<real> dist;
244 while (mol < nmol_insrt && trial < ntry*nmol_insrt)
246 rvec offset_x;
247 if (!insertAtPositions)
249 // Insert at random positions.
250 offset_x[XX] = box[XX][XX] * dist(rng);
251 offset_x[YY] = box[YY][YY] * dist(rng);
252 offset_x[ZZ] = box[ZZ][ZZ] * dist(rng);
254 else
256 // Skip a position if ntry trials were not successful.
257 if (trial >= firstTrial + ntry)
259 fprintf(stderr, " skipped position (%.3f, %.3f, %.3f)\n",
260 rpos[XX][mol], rpos[YY][mol], rpos[ZZ][mol]);
261 ++mol;
262 ++failed;
263 firstTrial = trial;
264 continue;
266 // Insert at positions taken from option -ip file.
267 offset_x[XX] = rpos[XX][mol] + deltaR[XX]*(2 * dist(rng)-1);
268 offset_x[YY] = rpos[YY][mol] + deltaR[YY]*(2 * dist(rng)-1);
269 offset_x[ZZ] = rpos[ZZ][mol] + deltaR[ZZ]*(2 * dist(rng)-1);
271 fprintf(stderr, "\rTry %d", ++trial);
272 fflush(stderr);
274 generate_trial_conf(x_insrt, offset_x, enum_rot, &rng, &x_n);
275 gmx::AnalysisNeighborhoodPositions pos(*x);
276 gmx::AnalysisNeighborhoodSearch search = nb.initSearch(&pbc, pos);
277 if (isInsertionAllowed(&search, exclusionDistances, x_n, exclusionDistances_insrt,
278 *atoms, removableAtoms, &remover))
280 x->insert(x->end(), x_n.begin(), x_n.end());
281 exclusionDistances.insert(exclusionDistances.end(),
282 exclusionDistances_insrt.begin(),
283 exclusionDistances_insrt.end());
284 builder.mergeAtoms(atoms_insrt);
285 ++mol;
286 firstTrial = trial;
287 fprintf(stderr, " success (now %d atoms)!\n", builder.currentAtomCount());
291 fprintf(stderr, "\n");
292 /* print number of molecules added */
293 fprintf(stderr, "Added %d molecules (out of %d requested)\n",
294 mol - failed, nmol_insrt);
296 const int originalAtomCount = atoms->nr;
297 const int originalResidueCount = atoms->nres;
298 remover.refreshAtomCount(*atoms);
299 remover.removeMarkedElements(x);
300 remover.removeMarkedAtoms(atoms);
301 if (atoms->nr < originalAtomCount)
303 fprintf(stderr, "Replaced %d residues (%d atoms)\n",
304 originalResidueCount - atoms->nres,
305 originalAtomCount - atoms->nr);
308 if (rpos != nullptr)
310 for (int i = 0; i < DIM; ++i)
312 sfree(rpos[i]);
314 sfree(rpos);
318 namespace gmx
321 namespace
324 class InsertMolecules : public ICommandLineOptionsModule, public ITopologyProvider
326 public:
327 InsertMolecules()
328 : bBox_(false), nmolIns_(0), nmolTry_(10), seed_(0),
329 defaultDistance_(0.105), scaleFactor_(0.57), enumRot_(en_rotXYZ)
331 clear_rvec(newBox_);
332 clear_rvec(deltaR_);
335 // From ITopologyProvider
336 gmx_mtop_t *getTopology(bool /*required*/) override { return topInfo_.mtop(); }
337 int getAtomCount() override { return 0; }
339 // From ICommandLineOptionsModule
340 void init(CommandLineModuleSettings * /*settings*/) override
343 void initOptions(IOptionsContainer *options,
344 ICommandLineOptionsModuleSettings *settings) override;
345 void optionsFinished() override;
346 int run() override;
348 private:
349 SelectionCollection selections_;
351 std::string inputConfFile_;
352 std::string insertConfFile_;
353 std::string positionFile_;
354 std::string outputConfFile_;
355 rvec newBox_;
356 bool bBox_;
357 int nmolIns_;
358 int nmolTry_;
359 int seed_;
360 real defaultDistance_;
361 real scaleFactor_;
362 rvec deltaR_;
363 RotationType enumRot_;
364 Selection replaceSel_;
366 TopologyInformation topInfo_;
369 void InsertMolecules::initOptions(IOptionsContainer *options,
370 ICommandLineOptionsModuleSettings *settings)
372 const char *const desc[] = {
373 "[THISMODULE] inserts [TT]-nmol[tt] copies of the system specified in",
374 "the [TT]-ci[tt] input file. The insertions take place either into",
375 "vacant space in the solute conformation given with [TT]-f[tt], or",
376 "into an empty box given by [TT]-box[tt]. Specifying both [TT]-f[tt]",
377 "and [TT]-box[tt] behaves like [TT]-f[tt], but places a new box",
378 "around the solute before insertions. Any velocities present are",
379 "discarded.",
381 "It is possible to also insert into a solvated configuration and",
382 "replace solvent atoms with the inserted atoms. To do this, use",
383 "[TT]-replace[tt] to specify a selection that identifies the atoms",
384 "that can be replaced. The tool assumes that all molecules in this",
385 "selection consist of single residues: each residue from this",
386 "selection that overlaps with the inserted molecules will be removed",
387 "instead of preventing insertion.",
389 "By default, the insertion positions are random (with initial seed",
390 "specified by [TT]-seed[tt]). The program iterates until [TT]-nmol[tt]",
391 "molecules have been inserted in the box. Molecules are not inserted",
392 "where the distance between any existing atom and any atom of the",
393 "inserted molecule is less than the sum based on the van der Waals",
394 "radii of both atoms. A database ([TT]vdwradii.dat[tt]) of van der",
395 "Waals radii is read by the program, and the resulting radii scaled",
396 "by [TT]-scale[tt]. If radii are not found in the database, those",
397 "atoms are assigned the (pre-scaled) distance [TT]-radius[tt].",
398 "Note that the usefulness of those radii depends on the atom names,",
399 "and thus varies widely with force field.",
401 "A total of [TT]-nmol[tt] * [TT]-try[tt] insertion attempts are made",
402 "before giving up. Increase [TT]-try[tt] if you have several small",
403 "holes to fill. Option [TT]-rot[tt] specifies whether the insertion",
404 "molecules are randomly oriented before insertion attempts.",
406 "Alternatively, the molecules can be inserted only at positions defined in",
407 "positions.dat ([TT]-ip[tt]). That file should have 3 columns (x,y,z),",
408 "that give the displacements compared to the input molecule position",
409 "([TT]-ci[tt]). Hence, if that file should contain the absolute",
410 "positions, the molecule must be centered on (0,0,0) before using",
411 "[THISMODULE] (e.g. from [gmx-editconf] [TT]-center[tt]).",
412 "Comments in that file starting with # are ignored. Option [TT]-dr[tt]",
413 "defines the maximally allowed displacements during insertial trials.",
414 "[TT]-try[tt] and [TT]-rot[tt] work as in the default mode (see above)."
417 settings->setHelpText(desc);
419 std::shared_ptr<SelectionOptionBehavior> selectionOptionBehavior(
420 new SelectionOptionBehavior(&selections_, this));
421 settings->addOptionsBehavior(selectionOptionBehavior);
423 // TODO: Replace use of legacyType.
424 options->addOption(FileNameOption("f")
425 .legacyType(efSTX).inputFile()
426 .store(&inputConfFile_)
427 .defaultBasename("protein")
428 .description("Existing configuration to insert into"));
429 options->addOption(FileNameOption("ci")
430 .legacyType(efSTX).inputFile().required()
431 .store(&insertConfFile_)
432 .defaultBasename("insert")
433 .description("Configuration to insert"));
434 options->addOption(FileNameOption("ip")
435 .filetype(eftGenericData).inputFile()
436 .store(&positionFile_)
437 .defaultBasename("positions")
438 .description("Predefined insertion trial positions"));
439 options->addOption(FileNameOption("o")
440 .legacyType(efSTO).outputFile().required()
441 .store(&outputConfFile_)
442 .defaultBasename("out")
443 .description("Output configuration after insertion"));
445 options->addOption(SelectionOption("replace").onlyAtoms()
446 .store(&replaceSel_)
447 .description("Atoms that can be removed if overlapping"));
448 selectionOptionBehavior->initOptions(options);
450 options->addOption(RealOption("box").vector()
451 .store(newBox_).storeIsSet(&bBox_)
452 .description("Box size (in nm)"));
453 options->addOption(IntegerOption("nmol")
454 .store(&nmolIns_)
455 .description("Number of extra molecules to insert"));
456 options->addOption(IntegerOption("try")
457 .store(&nmolTry_)
458 .description("Try inserting [TT]-nmol[tt] times [TT]-try[tt] times"));
459 options->addOption(IntegerOption("seed")
460 .store(&seed_)
461 .description("Random generator seed (0 means generate)"));
462 options->addOption(RealOption("radius")
463 .store(&defaultDistance_)
464 .description("Default van der Waals distance"));
465 options->addOption(RealOption("scale")
466 .store(&scaleFactor_)
467 .description("Scale factor to multiply Van der Waals radii from the database in share/gromacs/top/vdwradii.dat. The default value of 0.57 yields density close to 1000 g/l for proteins in water."));
468 options->addOption(RealOption("dr").vector()
469 .store(deltaR_)
470 .description("Allowed displacement in x/y/z from positions in [TT]-ip[tt] file"));
471 options->addOption(EnumOption<RotationType>("rot").enumValue(cRotationEnum)
472 .store(&enumRot_)
473 .description("Rotate inserted molecules randomly"));
476 void InsertMolecules::optionsFinished()
478 if (nmolIns_ <= 0 && positionFile_.empty())
480 GMX_THROW(InconsistentInputError("Either -nmol must be larger than 0, "
481 "or positions must be given with -ip."));
483 if (inputConfFile_.empty() && !bBox_)
485 GMX_THROW(InconsistentInputError("When no solute (-f) is specified, "
486 "a box size (-box) must be specified."));
488 if (replaceSel_.isValid() && inputConfFile_.empty())
490 GMX_THROW(InconsistentInputError("Replacement (-replace) only makes sense "
491 "together with an existing configuration (-f)."));
494 if (!inputConfFile_.empty())
496 fprintf(stderr, "Reading solute configuration\n");
497 topInfo_.fillFromInputFile(inputConfFile_);
498 if (topInfo_.mtop()->natoms == 0)
500 fprintf(stderr, "Note: no atoms in %s\n", inputConfFile_.c_str());
505 int InsertMolecules::run()
507 const char *outputTitle = topInfo_.name();
508 std::vector<RVec> xOutput;
509 matrix box = {{ 0 }};
510 if (topInfo_.hasTopology())
512 xOutput = copyOf(topInfo_.x());
513 topInfo_.getBox(box);
515 else
517 xOutput.resize(topInfo_.mtop()->natoms);
519 auto atomsSolute = topInfo_.copyAtoms();
520 std::set<int> removableAtoms;
521 if (replaceSel_.isValid())
523 t_pbc pbc;
524 set_pbc(&pbc, topInfo_.ePBC(), box);
525 t_trxframe *fr;
526 snew(fr, 1);
527 fr->natoms = topInfo_.mtop()->natoms;
528 fr->bX = TRUE;
529 fr->x = as_rvec_array(xOutput.data());
530 selections_.evaluate(fr, &pbc);
531 sfree(fr);
532 removableAtoms.insert(replaceSel_.atomIndices().begin(),
533 replaceSel_.atomIndices().end());
534 // TODO: It could be nice to check that removableAtoms contains full
535 // residues, since we anyways remove whole residues instead of
536 // individual atoms.
539 int ePBCForOutput = topInfo_.ePBC();
540 if (bBox_)
542 ePBCForOutput = epbcXYZ;
543 clear_mat(box);
544 box[XX][XX] = newBox_[XX];
545 box[YY][YY] = newBox_[YY];
546 box[ZZ][ZZ] = newBox_[ZZ];
548 if (det(box) == 0)
550 gmx_fatal(FARGS, "Undefined solute box.\nCreate one with gmx editconf "
551 "or give explicit -box command line option");
554 fprintf(stderr, "Reading molecule configuration\n");
555 TopologyInformation topInfoForInsertedMolecule;
556 topInfoForInsertedMolecule.fillFromInputFile(insertConfFile_);
557 auto atomsInserted = topInfoForInsertedMolecule.atoms();
558 std::vector<RVec> xInserted = copyOf(topInfoForInsertedMolecule.x());
560 if (topInfoForInsertedMolecule.mtop()->natoms == 0)
562 gmx_fatal(FARGS, "No molecule in %s, please check your input",
563 insertConfFile_.c_str());
565 if (outputTitle == nullptr)
567 outputTitle = topInfoForInsertedMolecule.name();
569 if (positionFile_.empty())
571 center_molecule(xInserted);
574 auto symtabInserted = duplicateSymtab(&topInfo_.mtop()->symtab);
575 const sfree_guard symtabInsertedGuard(symtabInserted);
576 /* add nmol_ins molecules of atoms_ins
577 in random orientation at random place */
578 insert_mols(nmolIns_, nmolTry_, seed_, defaultDistance_, scaleFactor_,
579 atomsSolute.get(), symtabInserted, &xOutput, removableAtoms, *atomsInserted, xInserted,
580 ePBCForOutput, box, positionFile_, deltaR_, enumRot_);
582 /* write new configuration to file confout */
583 fprintf(stderr, "Writing generated configuration to %s\n",
584 outputConfFile_.c_str());
585 write_sto_conf(outputConfFile_.c_str(), outputTitle, atomsSolute.get(),
586 as_rvec_array(xOutput.data()), nullptr, ePBCForOutput, box);
588 /* print size of generated configuration */
589 fprintf(stderr, "\nOutput configuration contains %d atoms in %d residues\n",
590 atomsSolute->nr, atomsSolute->nres);
591 done_symtab(symtabInserted);
592 return 0;
595 } // namespace
598 const char* InsertMoleculesInfo::name()
600 static const char* name = "insert-molecules";
601 return name;
604 const char* InsertMoleculesInfo::shortDescription()
606 static const char* shortDescription =
607 "Insert molecules into existing vacancies";
608 return shortDescription;
611 ICommandLineOptionsModulePointer InsertMoleculesInfo::create()
613 return ICommandLineOptionsModulePointer(new InsertMolecules());
616 } // namespace gmx