Introduced system preparation section to user guide
[gromacs.git] / src / gromacs / gmxpreprocess / insert-molecules.cpp
blob019327750f78017892159246d42ca6004eca5a41
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "gmxpre.h"
39 #include "insert-molecules.h"
41 #include <algorithm>
42 #include <memory>
43 #include <set>
44 #include <string>
45 #include <vector>
47 #include "gromacs/commandline/cmdlineoptionsmodule.h"
48 #include "gromacs/fileio/confio.h"
49 #include "gromacs/fileio/filetypes.h"
50 #include "gromacs/fileio/xvgr.h"
51 #include "gromacs/gmxlib/conformation-utilities.h"
52 #include "gromacs/gmxpreprocess/read-conformation.h"
53 #include "gromacs/math/functions.h"
54 #include "gromacs/math/utilities.h"
55 #include "gromacs/math/vec.h"
56 #include "gromacs/options/basicoptions.h"
57 #include "gromacs/options/filenameoption.h"
58 #include "gromacs/options/ioptionscontainer.h"
59 #include "gromacs/pbcutil/pbc.h"
60 #include "gromacs/random/threefry.h"
61 #include "gromacs/random/uniformrealdistribution.h"
62 #include "gromacs/selection/nbsearch.h"
63 #include "gromacs/selection/selection.h"
64 #include "gromacs/selection/selectioncollection.h"
65 #include "gromacs/selection/selectionoption.h"
66 #include "gromacs/selection/selectionoptionbehavior.h"
67 #include "gromacs/topology/atomprop.h"
68 #include "gromacs/topology/atoms.h"
69 #include "gromacs/topology/atomsbuilder.h"
70 #include "gromacs/topology/topology.h"
71 #include "gromacs/trajectory/trajectoryframe.h"
72 #include "gromacs/utility/cstringutil.h"
73 #include "gromacs/utility/exceptions.h"
74 #include "gromacs/utility/fatalerror.h"
75 #include "gromacs/utility/smalloc.h"
77 using gmx::RVec;
79 /* enum for random rotations of inserted solutes */
80 enum RotationType {
81 en_rotXYZ, en_rotZ, en_rotNone
83 const char *const cRotationEnum[] = {"xyz", "z", "none"};
85 static void center_molecule(std::vector<RVec> *x)
87 const size_t atomCount = x->size();
88 rvec center;
89 clear_rvec(center);
90 for (size_t i = 0; i < atomCount; ++i)
92 rvec_inc(center, (*x)[i]);
94 svmul(1.0/atomCount, center, center);
95 for (size_t i = 0; i < atomCount; ++i)
97 rvec_dec((*x)[i], center);
101 static void generate_trial_conf(const std::vector<RVec> &xin,
102 const rvec offset, RotationType enum_rot,
103 gmx::DefaultRandomEngine * rng,
104 std::vector<RVec> *xout)
106 gmx::UniformRealDistribution<real> dist(0, 2.0*M_PI);
107 *xout = xin;
109 real alfa = 0.0, beta = 0.0, gamma = 0.0;
110 switch (enum_rot)
112 case en_rotXYZ:
113 alfa = dist(*rng);
114 beta = dist(*rng);
115 gamma = dist(*rng);
116 break;
117 case en_rotZ:
118 alfa = beta = 0.;
119 gamma = dist(*rng);
120 break;
121 case en_rotNone:
122 alfa = beta = gamma = 0.;
123 break;
125 if (enum_rot == en_rotXYZ || enum_rot == en_rotZ)
127 rotate_conf(xout->size(), as_rvec_array(xout->data()), NULL, alfa, beta, gamma);
129 for (size_t i = 0; i < xout->size(); ++i)
131 rvec_inc((*xout)[i], offset);
135 static bool isInsertionAllowed(gmx::AnalysisNeighborhoodSearch *search,
136 const std::vector<real> &exclusionDistances,
137 const std::vector<RVec> &x,
138 const std::vector<real> &exclusionDistances_insrt,
139 const t_atoms &atoms,
140 const std::set<int> &removableAtoms,
141 gmx::AtomsRemover *remover)
143 gmx::AnalysisNeighborhoodPositions pos(x);
144 gmx::AnalysisNeighborhoodPairSearch pairSearch = search->startPairSearch(pos);
145 gmx::AnalysisNeighborhoodPair pair;
146 while (pairSearch.findNextPair(&pair))
148 const real r1 = exclusionDistances[pair.refIndex()];
149 const real r2 = exclusionDistances_insrt[pair.testIndex()];
150 if (pair.distance2() < gmx::square(r1 + r2))
152 if (removableAtoms.count(pair.refIndex()) == 0)
154 return false;
156 // TODO: If molecule information is available, this should ideally
157 // use it to remove whole molecules.
158 remover->markResidue(atoms, pair.refIndex(), true);
161 return true;
164 static void insert_mols(int nmol_insrt, int ntry, int seed,
165 real defaultDistance, real scaleFactor,
166 t_topology *top, std::vector<RVec> *x,
167 const std::set<int> &removableAtoms,
168 const t_atoms &atoms_insrt, const std::vector<RVec> &x_insrt,
169 int ePBC, matrix box,
170 const std::string &posfn, const rvec deltaR,
171 RotationType enum_rot)
173 fprintf(stderr, "Initialising inter-atomic distances...\n");
174 gmx_atomprop_t aps = gmx_atomprop_init();
175 std::vector<real> exclusionDistances(
176 makeExclusionDistances(&top->atoms, aps, defaultDistance, scaleFactor));
177 const std::vector<real> exclusionDistances_insrt(
178 makeExclusionDistances(&atoms_insrt, aps, defaultDistance, scaleFactor));
179 gmx_atomprop_destroy(aps);
181 const real maxInsertRadius
182 = *std::max_element(exclusionDistances_insrt.begin(),
183 exclusionDistances_insrt.end());
184 real maxRadius = maxInsertRadius;
185 if (!exclusionDistances.empty())
187 const real maxExistingRadius
188 = *std::max_element(exclusionDistances.begin(),
189 exclusionDistances.end());
190 maxRadius = std::max(maxInsertRadius, maxExistingRadius);
193 // TODO: Make all of this exception-safe.
194 gmx::AnalysisNeighborhood nb;
195 nb.setCutoff(maxInsertRadius + maxRadius);
198 if (seed == 0)
200 seed = static_cast<int>(gmx::makeRandomSeed());
202 fprintf(stderr, "Using random seed %d\n", seed);
204 gmx::DefaultRandomEngine rng(seed);
206 t_pbc pbc;
207 set_pbc(&pbc, ePBC, box);
209 /* With -ip, take nmol_insrt from file posfn */
210 double **rpos = NULL;
211 const bool insertAtPositions = !posfn.empty();
212 if (insertAtPositions)
214 int ncol;
215 nmol_insrt = read_xvg(posfn.c_str(), &rpos, &ncol);
216 if (ncol != 3)
218 gmx_fatal(FARGS, "Expected 3 columns (x/y/z coordinates) in file %s\n",
219 posfn.c_str());
221 fprintf(stderr, "Read %d positions from file %s\n\n",
222 nmol_insrt, posfn.c_str());
225 gmx::AtomsBuilder builder(&top->atoms, &top->symtab);
226 gmx::AtomsRemover remover(top->atoms);
228 const int finalAtomCount = top->atoms.nr + nmol_insrt * atoms_insrt.nr;
229 const int finalResidueCount = top->atoms.nres + nmol_insrt * atoms_insrt.nres;
230 builder.reserve(finalAtomCount, finalResidueCount);
231 x->reserve(finalAtomCount);
232 exclusionDistances.reserve(finalAtomCount);
235 std::vector<RVec> x_n(x_insrt.size());
237 int mol = 0;
238 int trial = 0;
239 int firstTrial = 0;
240 int failed = 0;
241 gmx::UniformRealDistribution<real> dist;
243 while (mol < nmol_insrt && trial < ntry*nmol_insrt)
245 // cppcheck 1.72 complains about uninitialized variables in the
246 // assignments below otherwise...
247 rvec offset_x = {0};
248 if (!insertAtPositions)
250 // Insert at random positions.
251 offset_x[XX] = box[XX][XX] * dist(rng);
252 offset_x[YY] = box[YY][YY] * dist(rng);
253 offset_x[ZZ] = box[ZZ][ZZ] * dist(rng);
255 else
257 // Skip a position if ntry trials were not successful.
258 if (trial >= firstTrial + ntry)
260 fprintf(stderr, " skipped position (%.3f, %.3f, %.3f)\n",
261 rpos[XX][mol], rpos[YY][mol], rpos[ZZ][mol]);
262 ++mol;
263 ++failed;
264 firstTrial = trial;
265 continue;
267 // Insert at positions taken from option -ip file.
268 offset_x[XX] = rpos[XX][mol] + deltaR[XX]*(2 * dist(rng)-1);
269 offset_x[YY] = rpos[YY][mol] + deltaR[YY]*(2 * dist(rng)-1);
270 offset_x[ZZ] = rpos[ZZ][mol] + deltaR[ZZ]*(2 * dist(rng)-1);
272 fprintf(stderr, "\rTry %d", ++trial);
273 fflush(stderr);
275 generate_trial_conf(x_insrt, offset_x, enum_rot, &rng, &x_n);
276 gmx::AnalysisNeighborhoodPositions pos(*x);
277 gmx::AnalysisNeighborhoodSearch search = nb.initSearch(&pbc, pos);
278 if (isInsertionAllowed(&search, exclusionDistances, x_n, exclusionDistances_insrt,
279 top->atoms, removableAtoms, &remover))
281 x->insert(x->end(), x_n.begin(), x_n.end());
282 exclusionDistances.insert(exclusionDistances.end(),
283 exclusionDistances_insrt.begin(),
284 exclusionDistances_insrt.end());
285 builder.mergeAtoms(atoms_insrt);
286 ++mol;
287 firstTrial = trial;
288 fprintf(stderr, " success (now %d atoms)!\n", builder.currentAtomCount());
292 fprintf(stderr, "\n");
293 /* print number of molecules added */
294 fprintf(stderr, "Added %d molecules (out of %d requested)\n",
295 mol - failed, nmol_insrt);
297 const int originalAtomCount = top->atoms.nr;
298 const int originalResidueCount = top->atoms.nres;
299 remover.refreshAtomCount(top->atoms);
300 remover.removeMarkedElements(x);
301 remover.removeMarkedAtoms(&top->atoms);
302 if (top->atoms.nr < originalAtomCount)
304 fprintf(stderr, "Replaced %d residues (%d atoms)\n",
305 originalResidueCount - top->atoms.nres,
306 originalAtomCount - top->atoms.nr);
309 if (rpos != NULL)
311 for (int i = 0; i < DIM; ++i)
313 sfree(rpos[i]);
315 sfree(rpos);
319 namespace gmx
322 namespace
325 class InsertMolecules : public ICommandLineOptionsModule, public ITopologyProvider
327 public:
328 InsertMolecules()
329 : bBox_(false), nmolIns_(0), nmolTry_(10), seed_(0),
330 defaultDistance_(0.105), scaleFactor_(0.57), enumRot_(en_rotXYZ),
331 top_(NULL), ePBC_(-1)
333 clear_rvec(newBox_);
334 clear_rvec(deltaR_);
335 clear_mat(box_);
337 virtual ~InsertMolecules()
339 if (top_ != NULL)
341 done_top(top_);
342 sfree(top_);
346 // From ITopologyProvider
347 virtual t_topology *getTopology(bool /*required*/) { return top_; }
348 virtual int getAtomCount() { return 0; }
350 // From ICommandLineOptionsModule
351 virtual void init(CommandLineModuleSettings * /*settings*/)
354 virtual void initOptions(IOptionsContainer *options,
355 ICommandLineOptionsModuleSettings *settings);
356 virtual void optionsFinished();
357 virtual int run();
359 private:
360 void loadSolute();
362 SelectionCollection selections_;
364 std::string inputConfFile_;
365 std::string insertConfFile_;
366 std::string positionFile_;
367 std::string outputConfFile_;
368 rvec newBox_;
369 bool bBox_;
370 int nmolIns_;
371 int nmolTry_;
372 int seed_;
373 real defaultDistance_;
374 real scaleFactor_;
375 rvec deltaR_;
376 RotationType enumRot_;
377 Selection replaceSel_;
379 t_topology *top_;
380 std::vector<RVec> x_;
381 matrix box_;
382 int ePBC_;
385 void InsertMolecules::initOptions(IOptionsContainer *options,
386 ICommandLineOptionsModuleSettings *settings)
388 const char *const desc[] = {
389 "[THISMODULE] inserts [TT]-nmol[tt] copies of the system specified in",
390 "the [TT]-ci[tt] input file. The insertions take place either into",
391 "vacant space in the solute conformation given with [TT]-f[tt], or",
392 "into an empty box given by [TT]-box[tt]. Specifying both [TT]-f[tt]",
393 "and [TT]-box[tt] behaves like [TT]-f[tt], but places a new box",
394 "around the solute before insertions. Any velocities present are",
395 "discarded.",
397 "It is possible to also insert into a solvated configuration and",
398 "replace solvent atoms with the inserted atoms. To do this, use",
399 "[TT]-replace[tt] to specify a selection that identifies the atoms",
400 "that can be replaced. The tool assumes that all molecules in this",
401 "selection consist of single residues: each residue from this",
402 "selection that overlaps with the inserted molecules will be removed",
403 "instead of preventing insertion.",
405 "By default, the insertion positions are random (with initial seed",
406 "specified by [TT]-seed[tt]). The program iterates until [TT]-nmol[tt]",
407 "molecules have been inserted in the box. Molecules are not inserted",
408 "where the distance between any existing atom and any atom of the",
409 "inserted molecule is less than the sum based on the van der Waals",
410 "radii of both atoms. A database ([TT]vdwradii.dat[tt]) of van der",
411 "Waals radii is read by the program, and the resulting radii scaled",
412 "by [TT]-scale[tt]. If radii are not found in the database, those",
413 "atoms are assigned the (pre-scaled) distance [TT]-radius[tt].",
414 "Note that the usefulness of those radii depends on the atom names,",
415 "and thus varies widely with force field.",
417 "A total of [TT]-nmol[tt] * [TT]-try[tt] insertion attempts are made",
418 "before giving up. Increase [TT]-try[tt] if you have several small",
419 "holes to fill. Option [TT]-rot[tt] specifies whether the insertion",
420 "molecules are randomly oriented before insertion attempts.",
422 "Alternatively, the molecules can be inserted only at positions defined in",
423 "positions.dat ([TT]-ip[tt]). That file should have 3 columns (x,y,z),",
424 "that give the displacements compared to the input molecule position",
425 "([TT]-ci[tt]). Hence, if that file should contain the absolute",
426 "positions, the molecule must be centered on (0,0,0) before using",
427 "[THISMODULE] (e.g. from [gmx-editconf] [TT]-center[tt]).",
428 "Comments in that file starting with # are ignored. Option [TT]-dr[tt]",
429 "defines the maximally allowed displacements during insertial trials.",
430 "[TT]-try[tt] and [TT]-rot[tt] work as in the default mode (see above)."
433 settings->setHelpText(desc);
435 std::shared_ptr<SelectionOptionBehavior> selectionOptionBehavior(
436 new SelectionOptionBehavior(&selections_, this));
437 settings->addOptionsBehavior(selectionOptionBehavior);
439 // TODO: Replace use of legacyType.
440 options->addOption(FileNameOption("f")
441 .legacyType(efSTX).inputFile()
442 .store(&inputConfFile_)
443 .defaultBasename("protein")
444 .description("Existing configuration to insert into"));
445 options->addOption(FileNameOption("ci")
446 .legacyType(efSTX).inputFile().required()
447 .store(&insertConfFile_)
448 .defaultBasename("insert")
449 .description("Configuration to insert"));
450 options->addOption(FileNameOption("ip")
451 .filetype(eftGenericData).inputFile()
452 .store(&positionFile_)
453 .defaultBasename("positions")
454 .description("Predefined insertion trial positions"));
455 options->addOption(FileNameOption("o")
456 .legacyType(efSTO).outputFile().required()
457 .store(&outputConfFile_)
458 .defaultBasename("out")
459 .description("Output configuration after insertion"));
461 options->addOption(SelectionOption("replace").onlyAtoms()
462 .store(&replaceSel_)
463 .description("Atoms that can be removed if overlapping"));
464 selectionOptionBehavior->initOptions(options);
466 options->addOption(RealOption("box").vector()
467 .store(newBox_).storeIsSet(&bBox_)
468 .description("Box size (in nm)"));
469 options->addOption(IntegerOption("nmol")
470 .store(&nmolIns_)
471 .description("Number of extra molecules to insert"));
472 options->addOption(IntegerOption("try")
473 .store(&nmolTry_)
474 .description("Try inserting [TT]-nmol[tt] times [TT]-try[tt] times"));
475 options->addOption(IntegerOption("seed")
476 .store(&seed_)
477 .description("Random generator seed (0 means generate)"));
478 options->addOption(RealOption("radius")
479 .store(&defaultDistance_)
480 .description("Default van der Waals distance"));
481 options->addOption(RealOption("scale")
482 .store(&scaleFactor_)
483 .description("Scale factor to multiply Van der Waals radii from the database in share/gromacs/top/vdwradii.dat. The default value of 0.57 yields density close to 1000 g/l for proteins in water."));
484 options->addOption(RealOption("dr").vector()
485 .store(deltaR_)
486 .description("Allowed displacement in x/y/z from positions in [TT]-ip[tt] file"));
487 options->addOption(EnumOption<RotationType>("rot").enumValue(cRotationEnum)
488 .store(&enumRot_)
489 .description("Rotate inserted molecules randomly"));
492 void InsertMolecules::optionsFinished()
494 if (nmolIns_ <= 0 && positionFile_.empty())
496 GMX_THROW(InconsistentInputError("Either -nmol must be larger than 0, "
497 "or positions must be given with -ip."));
499 if (inputConfFile_.empty() && !bBox_)
501 GMX_THROW(InconsistentInputError("When no solute (-f) is specified, "
502 "a box size (-box) must be specified."));
504 if (replaceSel_.isValid() && inputConfFile_.empty())
506 GMX_THROW(InconsistentInputError("Replacement (-replace) only makes sense "
507 "together with an existing configuration (-f)."));
510 snew(top_, 1);
511 if (!inputConfFile_.empty())
513 readConformation(inputConfFile_.c_str(), top_, &x_, NULL,
514 &ePBC_, box_, "solute");
515 if (top_->atoms.nr == 0)
517 fprintf(stderr, "Note: no atoms in %s\n", inputConfFile_.c_str());
522 int InsertMolecules::run()
524 std::set<int> removableAtoms;
525 if (replaceSel_.isValid())
527 t_pbc pbc;
528 set_pbc(&pbc, ePBC_, box_);
529 t_trxframe *fr;
530 snew(fr, 1);
531 fr->natoms = x_.size();
532 fr->bX = TRUE;
533 fr->x = as_rvec_array(x_.data());
534 selections_.evaluate(fr, &pbc);
535 sfree(fr);
536 removableAtoms.insert(replaceSel_.atomIndices().begin(),
537 replaceSel_.atomIndices().end());
538 // TODO: It could be nice to check that removableAtoms contains full
539 // residues, since we anyways remove whole residues instead of
540 // individual atoms.
543 if (bBox_)
545 ePBC_ = epbcXYZ;
546 clear_mat(box_);
547 box_[XX][XX] = newBox_[XX];
548 box_[YY][YY] = newBox_[YY];
549 box_[ZZ][ZZ] = newBox_[ZZ];
551 if (det(box_) == 0)
553 gmx_fatal(FARGS, "Undefined solute box.\nCreate one with gmx editconf "
554 "or give explicit -box command line option");
557 t_topology *top_insrt;
558 std::vector<RVec> x_insrt;
559 snew(top_insrt, 1);
561 int ePBC_dummy;
562 matrix box_dummy;
563 readConformation(insertConfFile_.c_str(), top_insrt, &x_insrt,
564 NULL, &ePBC_dummy, box_dummy, "molecule");
565 if (top_insrt->atoms.nr == 0)
567 gmx_fatal(FARGS, "No molecule in %s, please check your input",
568 insertConfFile_.c_str());
570 if (top_->name == NULL)
572 top_->name = top_insrt->name;
574 if (positionFile_.empty())
576 center_molecule(&x_insrt);
580 /* add nmol_ins molecules of atoms_ins
581 in random orientation at random place */
582 insert_mols(nmolIns_, nmolTry_, seed_, defaultDistance_, scaleFactor_,
583 top_, &x_, removableAtoms, top_insrt->atoms, x_insrt,
584 ePBC_, box_, positionFile_, deltaR_, enumRot_);
586 /* write new configuration to file confout */
587 fprintf(stderr, "Writing generated configuration to %s\n",
588 outputConfFile_.c_str());
589 write_sto_conf(outputConfFile_.c_str(), *top_->name, &top_->atoms,
590 as_rvec_array(x_.data()), NULL, ePBC_, box_);
592 /* print size of generated configuration */
593 fprintf(stderr, "\nOutput configuration contains %d atoms in %d residues\n",
594 top_->atoms.nr, top_->atoms.nres);
596 done_top(top_insrt);
597 sfree(top_insrt);
599 return 0;
602 } // namespace
604 const char InsertMoleculesInfo::name[] = "insert-molecules";
605 const char InsertMoleculesInfo::shortDescription[] =
606 "Insert molecules into existing vacancies";
607 ICommandLineOptionsModulePointer InsertMoleculesInfo::create()
609 return ICommandLineOptionsModulePointer(new InsertMolecules());
612 } // namespace gmx