Add AWH biasing module + tests
[gromacs.git] / src / gromacs / gmxpreprocess / insert-molecules.cpp
blob90de8972be21c5cd5dab93f8532e2682ec5b8ac8
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "gmxpre.h"
39 #include "insert-molecules.h"
41 #include <algorithm>
42 #include <memory>
43 #include <set>
44 #include <string>
45 #include <vector>
47 #include "gromacs/commandline/cmdlineoptionsmodule.h"
48 #include "gromacs/fileio/confio.h"
49 #include "gromacs/fileio/filetypes.h"
50 #include "gromacs/fileio/xvgr.h"
51 #include "gromacs/gmxlib/conformation-utilities.h"
52 #include "gromacs/gmxpreprocess/read-conformation.h"
53 #include "gromacs/math/functions.h"
54 #include "gromacs/math/utilities.h"
55 #include "gromacs/math/vec.h"
56 #include "gromacs/options/basicoptions.h"
57 #include "gromacs/options/filenameoption.h"
58 #include "gromacs/options/ioptionscontainer.h"
59 #include "gromacs/pbcutil/pbc.h"
60 #include "gromacs/random/threefry.h"
61 #include "gromacs/random/uniformrealdistribution.h"
62 #include "gromacs/selection/nbsearch.h"
63 #include "gromacs/selection/selection.h"
64 #include "gromacs/selection/selectioncollection.h"
65 #include "gromacs/selection/selectionoption.h"
66 #include "gromacs/selection/selectionoptionbehavior.h"
67 #include "gromacs/topology/atomprop.h"
68 #include "gromacs/topology/atoms.h"
69 #include "gromacs/topology/atomsbuilder.h"
70 #include "gromacs/topology/mtop_util.h"
71 #include "gromacs/topology/topology.h"
72 #include "gromacs/trajectory/trajectoryframe.h"
73 #include "gromacs/utility/cstringutil.h"
74 #include "gromacs/utility/exceptions.h"
75 #include "gromacs/utility/fatalerror.h"
76 #include "gromacs/utility/smalloc.h"
78 using gmx::RVec;
80 /* enum for random rotations of inserted solutes */
81 enum RotationType {
82 en_rotXYZ, en_rotZ, en_rotNone
84 const char *const cRotationEnum[] = {"xyz", "z", "none"};
86 static void center_molecule(std::vector<RVec> *x)
88 const size_t atomCount = x->size();
89 rvec center;
90 clear_rvec(center);
91 for (size_t i = 0; i < atomCount; ++i)
93 rvec_inc(center, (*x)[i]);
95 svmul(1.0/atomCount, center, center);
96 for (size_t i = 0; i < atomCount; ++i)
98 rvec_dec((*x)[i], center);
102 static void generate_trial_conf(const std::vector<RVec> &xin,
103 const rvec offset, RotationType enum_rot,
104 gmx::DefaultRandomEngine * rng,
105 std::vector<RVec> *xout)
107 gmx::UniformRealDistribution<real> dist(0, 2.0*M_PI);
108 *xout = xin;
110 real alfa = 0.0, beta = 0.0, gamma = 0.0;
111 switch (enum_rot)
113 case en_rotXYZ:
114 alfa = dist(*rng);
115 beta = dist(*rng);
116 gamma = dist(*rng);
117 break;
118 case en_rotZ:
119 alfa = beta = 0.;
120 gamma = dist(*rng);
121 break;
122 case en_rotNone:
123 alfa = beta = gamma = 0.;
124 break;
126 if (enum_rot == en_rotXYZ || enum_rot == en_rotZ)
128 rotate_conf(xout->size(), as_rvec_array(xout->data()), nullptr, alfa, beta, gamma);
130 for (size_t i = 0; i < xout->size(); ++i)
132 rvec_inc((*xout)[i], offset);
136 static bool isInsertionAllowed(gmx::AnalysisNeighborhoodSearch *search,
137 const std::vector<real> &exclusionDistances,
138 const std::vector<RVec> &x,
139 const std::vector<real> &exclusionDistances_insrt,
140 const t_atoms &atoms,
141 const std::set<int> &removableAtoms,
142 gmx::AtomsRemover *remover)
144 gmx::AnalysisNeighborhoodPositions pos(x);
145 gmx::AnalysisNeighborhoodPairSearch pairSearch = search->startPairSearch(pos);
146 gmx::AnalysisNeighborhoodPair pair;
147 while (pairSearch.findNextPair(&pair))
149 const real r1 = exclusionDistances[pair.refIndex()];
150 const real r2 = exclusionDistances_insrt[pair.testIndex()];
151 if (pair.distance2() < gmx::square(r1 + r2))
153 if (removableAtoms.count(pair.refIndex()) == 0)
155 return false;
157 // TODO: If molecule information is available, this should ideally
158 // use it to remove whole molecules.
159 remover->markResidue(atoms, pair.refIndex(), true);
162 return true;
165 static void insert_mols(int nmol_insrt, int ntry, int seed,
166 real defaultDistance, real scaleFactor,
167 t_atoms *atoms, t_symtab *symtab, std::vector<RVec> *x,
168 const std::set<int> &removableAtoms,
169 const t_atoms &atoms_insrt, const std::vector<RVec> &x_insrt,
170 int ePBC, matrix box,
171 const std::string &posfn, const rvec deltaR,
172 RotationType enum_rot)
174 fprintf(stderr, "Initialising inter-atomic distances...\n");
175 gmx_atomprop_t aps = gmx_atomprop_init();
176 std::vector<real> exclusionDistances(
177 makeExclusionDistances(atoms, aps, defaultDistance, scaleFactor));
178 const std::vector<real> exclusionDistances_insrt(
179 makeExclusionDistances(&atoms_insrt, aps, defaultDistance, scaleFactor));
180 gmx_atomprop_destroy(aps);
182 const real maxInsertRadius
183 = *std::max_element(exclusionDistances_insrt.begin(),
184 exclusionDistances_insrt.end());
185 real maxRadius = maxInsertRadius;
186 if (!exclusionDistances.empty())
188 const real maxExistingRadius
189 = *std::max_element(exclusionDistances.begin(),
190 exclusionDistances.end());
191 maxRadius = std::max(maxInsertRadius, maxExistingRadius);
194 // TODO: Make all of this exception-safe.
195 gmx::AnalysisNeighborhood nb;
196 nb.setCutoff(maxInsertRadius + maxRadius);
199 if (seed == 0)
201 seed = static_cast<int>(gmx::makeRandomSeed());
203 fprintf(stderr, "Using random seed %d\n", seed);
205 gmx::DefaultRandomEngine rng(seed);
207 t_pbc pbc;
208 set_pbc(&pbc, ePBC, box);
210 /* With -ip, take nmol_insrt from file posfn */
211 double **rpos = nullptr;
212 const bool insertAtPositions = !posfn.empty();
213 if (insertAtPositions)
215 int ncol;
216 nmol_insrt = read_xvg(posfn.c_str(), &rpos, &ncol);
217 if (ncol != 3)
219 gmx_fatal(FARGS, "Expected 3 columns (x/y/z coordinates) in file %s\n",
220 posfn.c_str());
222 fprintf(stderr, "Read %d positions from file %s\n\n",
223 nmol_insrt, posfn.c_str());
226 gmx::AtomsBuilder builder(atoms, symtab);
227 gmx::AtomsRemover remover(*atoms);
229 const int finalAtomCount = atoms->nr + nmol_insrt * atoms_insrt.nr;
230 const int finalResidueCount = atoms->nres + nmol_insrt * atoms_insrt.nres;
231 builder.reserve(finalAtomCount, finalResidueCount);
232 x->reserve(finalAtomCount);
233 exclusionDistances.reserve(finalAtomCount);
236 std::vector<RVec> x_n(x_insrt.size());
238 int mol = 0;
239 int trial = 0;
240 int firstTrial = 0;
241 int failed = 0;
242 gmx::UniformRealDistribution<real> dist;
244 while (mol < nmol_insrt && trial < ntry*nmol_insrt)
246 // cppcheck 1.72 complains about uninitialized variables in the
247 // assignments below otherwise...
248 rvec offset_x = {0};
249 if (!insertAtPositions)
251 // Insert at random positions.
252 offset_x[XX] = box[XX][XX] * dist(rng);
253 offset_x[YY] = box[YY][YY] * dist(rng);
254 offset_x[ZZ] = box[ZZ][ZZ] * dist(rng);
256 else
258 // Skip a position if ntry trials were not successful.
259 if (trial >= firstTrial + ntry)
261 fprintf(stderr, " skipped position (%.3f, %.3f, %.3f)\n",
262 rpos[XX][mol], rpos[YY][mol], rpos[ZZ][mol]);
263 ++mol;
264 ++failed;
265 firstTrial = trial;
266 continue;
268 // Insert at positions taken from option -ip file.
269 offset_x[XX] = rpos[XX][mol] + deltaR[XX]*(2 * dist(rng)-1);
270 offset_x[YY] = rpos[YY][mol] + deltaR[YY]*(2 * dist(rng)-1);
271 offset_x[ZZ] = rpos[ZZ][mol] + deltaR[ZZ]*(2 * dist(rng)-1);
273 fprintf(stderr, "\rTry %d", ++trial);
274 fflush(stderr);
276 generate_trial_conf(x_insrt, offset_x, enum_rot, &rng, &x_n);
277 gmx::AnalysisNeighborhoodPositions pos(*x);
278 gmx::AnalysisNeighborhoodSearch search = nb.initSearch(&pbc, pos);
279 if (isInsertionAllowed(&search, exclusionDistances, x_n, exclusionDistances_insrt,
280 *atoms, removableAtoms, &remover))
282 x->insert(x->end(), x_n.begin(), x_n.end());
283 exclusionDistances.insert(exclusionDistances.end(),
284 exclusionDistances_insrt.begin(),
285 exclusionDistances_insrt.end());
286 builder.mergeAtoms(atoms_insrt);
287 ++mol;
288 firstTrial = trial;
289 fprintf(stderr, " success (now %d atoms)!\n", builder.currentAtomCount());
293 fprintf(stderr, "\n");
294 /* print number of molecules added */
295 fprintf(stderr, "Added %d molecules (out of %d requested)\n",
296 mol - failed, nmol_insrt);
298 const int originalAtomCount = atoms->nr;
299 const int originalResidueCount = atoms->nres;
300 remover.refreshAtomCount(*atoms);
301 remover.removeMarkedElements(x);
302 remover.removeMarkedAtoms(atoms);
303 if (atoms->nr < originalAtomCount)
305 fprintf(stderr, "Replaced %d residues (%d atoms)\n",
306 originalResidueCount - atoms->nres,
307 originalAtomCount - atoms->nr);
310 if (rpos != nullptr)
312 for (int i = 0; i < DIM; ++i)
314 sfree(rpos[i]);
316 sfree(rpos);
320 namespace gmx
323 namespace
326 class InsertMolecules : public ICommandLineOptionsModule, public ITopologyProvider
328 public:
329 InsertMolecules()
330 : bBox_(false), nmolIns_(0), nmolTry_(10), seed_(0),
331 defaultDistance_(0.105), scaleFactor_(0.57), enumRot_(en_rotXYZ),
332 top_(nullptr), ePBC_(-1)
334 clear_rvec(newBox_);
335 clear_rvec(deltaR_);
336 clear_mat(box_);
338 virtual ~InsertMolecules()
340 if (top_ != nullptr)
342 done_mtop(top_);
343 sfree(top_);
347 // From ITopologyProvider
348 virtual gmx_mtop_t *getTopology(bool /*required*/) { return top_; }
349 virtual int getAtomCount() { return 0; }
351 // From ICommandLineOptionsModule
352 virtual void init(CommandLineModuleSettings * /*settings*/)
355 virtual void initOptions(IOptionsContainer *options,
356 ICommandLineOptionsModuleSettings *settings);
357 virtual void optionsFinished();
358 virtual int run();
360 private:
361 void loadSolute();
363 SelectionCollection selections_;
365 std::string inputConfFile_;
366 std::string insertConfFile_;
367 std::string positionFile_;
368 std::string outputConfFile_;
369 rvec newBox_;
370 bool bBox_;
371 int nmolIns_;
372 int nmolTry_;
373 int seed_;
374 real defaultDistance_;
375 real scaleFactor_;
376 rvec deltaR_;
377 RotationType enumRot_;
378 Selection replaceSel_;
380 gmx_mtop_t *top_;
381 std::vector<RVec> x_;
382 matrix box_;
383 int ePBC_;
386 void InsertMolecules::initOptions(IOptionsContainer *options,
387 ICommandLineOptionsModuleSettings *settings)
389 const char *const desc[] = {
390 "[THISMODULE] inserts [TT]-nmol[tt] copies of the system specified in",
391 "the [TT]-ci[tt] input file. The insertions take place either into",
392 "vacant space in the solute conformation given with [TT]-f[tt], or",
393 "into an empty box given by [TT]-box[tt]. Specifying both [TT]-f[tt]",
394 "and [TT]-box[tt] behaves like [TT]-f[tt], but places a new box",
395 "around the solute before insertions. Any velocities present are",
396 "discarded.",
398 "It is possible to also insert into a solvated configuration and",
399 "replace solvent atoms with the inserted atoms. To do this, use",
400 "[TT]-replace[tt] to specify a selection that identifies the atoms",
401 "that can be replaced. The tool assumes that all molecules in this",
402 "selection consist of single residues: each residue from this",
403 "selection that overlaps with the inserted molecules will be removed",
404 "instead of preventing insertion.",
406 "By default, the insertion positions are random (with initial seed",
407 "specified by [TT]-seed[tt]). The program iterates until [TT]-nmol[tt]",
408 "molecules have been inserted in the box. Molecules are not inserted",
409 "where the distance between any existing atom and any atom of the",
410 "inserted molecule is less than the sum based on the van der Waals",
411 "radii of both atoms. A database ([TT]vdwradii.dat[tt]) of van der",
412 "Waals radii is read by the program, and the resulting radii scaled",
413 "by [TT]-scale[tt]. If radii are not found in the database, those",
414 "atoms are assigned the (pre-scaled) distance [TT]-radius[tt].",
415 "Note that the usefulness of those radii depends on the atom names,",
416 "and thus varies widely with force field.",
418 "A total of [TT]-nmol[tt] * [TT]-try[tt] insertion attempts are made",
419 "before giving up. Increase [TT]-try[tt] if you have several small",
420 "holes to fill. Option [TT]-rot[tt] specifies whether the insertion",
421 "molecules are randomly oriented before insertion attempts.",
423 "Alternatively, the molecules can be inserted only at positions defined in",
424 "positions.dat ([TT]-ip[tt]). That file should have 3 columns (x,y,z),",
425 "that give the displacements compared to the input molecule position",
426 "([TT]-ci[tt]). Hence, if that file should contain the absolute",
427 "positions, the molecule must be centered on (0,0,0) before using",
428 "[THISMODULE] (e.g. from [gmx-editconf] [TT]-center[tt]).",
429 "Comments in that file starting with # are ignored. Option [TT]-dr[tt]",
430 "defines the maximally allowed displacements during insertial trials.",
431 "[TT]-try[tt] and [TT]-rot[tt] work as in the default mode (see above)."
434 settings->setHelpText(desc);
436 std::shared_ptr<SelectionOptionBehavior> selectionOptionBehavior(
437 new SelectionOptionBehavior(&selections_, this));
438 settings->addOptionsBehavior(selectionOptionBehavior);
440 // TODO: Replace use of legacyType.
441 options->addOption(FileNameOption("f")
442 .legacyType(efSTX).inputFile()
443 .store(&inputConfFile_)
444 .defaultBasename("protein")
445 .description("Existing configuration to insert into"));
446 options->addOption(FileNameOption("ci")
447 .legacyType(efSTX).inputFile().required()
448 .store(&insertConfFile_)
449 .defaultBasename("insert")
450 .description("Configuration to insert"));
451 options->addOption(FileNameOption("ip")
452 .filetype(eftGenericData).inputFile()
453 .store(&positionFile_)
454 .defaultBasename("positions")
455 .description("Predefined insertion trial positions"));
456 options->addOption(FileNameOption("o")
457 .legacyType(efSTO).outputFile().required()
458 .store(&outputConfFile_)
459 .defaultBasename("out")
460 .description("Output configuration after insertion"));
462 options->addOption(SelectionOption("replace").onlyAtoms()
463 .store(&replaceSel_)
464 .description("Atoms that can be removed if overlapping"));
465 selectionOptionBehavior->initOptions(options);
467 options->addOption(RealOption("box").vector()
468 .store(newBox_).storeIsSet(&bBox_)
469 .description("Box size (in nm)"));
470 options->addOption(IntegerOption("nmol")
471 .store(&nmolIns_)
472 .description("Number of extra molecules to insert"));
473 options->addOption(IntegerOption("try")
474 .store(&nmolTry_)
475 .description("Try inserting [TT]-nmol[tt] times [TT]-try[tt] times"));
476 options->addOption(IntegerOption("seed")
477 .store(&seed_)
478 .description("Random generator seed (0 means generate)"));
479 options->addOption(RealOption("radius")
480 .store(&defaultDistance_)
481 .description("Default van der Waals distance"));
482 options->addOption(RealOption("scale")
483 .store(&scaleFactor_)
484 .description("Scale factor to multiply Van der Waals radii from the database in share/gromacs/top/vdwradii.dat. The default value of 0.57 yields density close to 1000 g/l for proteins in water."));
485 options->addOption(RealOption("dr").vector()
486 .store(deltaR_)
487 .description("Allowed displacement in x/y/z from positions in [TT]-ip[tt] file"));
488 options->addOption(EnumOption<RotationType>("rot").enumValue(cRotationEnum)
489 .store(&enumRot_)
490 .description("Rotate inserted molecules randomly"));
493 void InsertMolecules::optionsFinished()
495 if (nmolIns_ <= 0 && positionFile_.empty())
497 GMX_THROW(InconsistentInputError("Either -nmol must be larger than 0, "
498 "or positions must be given with -ip."));
500 if (inputConfFile_.empty() && !bBox_)
502 GMX_THROW(InconsistentInputError("When no solute (-f) is specified, "
503 "a box size (-box) must be specified."));
505 if (replaceSel_.isValid() && inputConfFile_.empty())
507 GMX_THROW(InconsistentInputError("Replacement (-replace) only makes sense "
508 "together with an existing configuration (-f)."));
511 snew(top_, 1);
512 if (!inputConfFile_.empty())
514 readConformation(inputConfFile_.c_str(), top_, &x_, nullptr,
515 &ePBC_, box_, "solute");
516 if (top_->natoms == 0)
518 fprintf(stderr, "Note: no atoms in %s\n", inputConfFile_.c_str());
523 int InsertMolecules::run()
525 std::set<int> removableAtoms;
526 if (replaceSel_.isValid())
528 t_pbc pbc;
529 set_pbc(&pbc, ePBC_, box_);
530 t_trxframe *fr;
531 snew(fr, 1);
532 fr->natoms = x_.size();
533 fr->bX = TRUE;
534 fr->x = as_rvec_array(x_.data());
535 selections_.evaluate(fr, &pbc);
536 sfree(fr);
537 removableAtoms.insert(replaceSel_.atomIndices().begin(),
538 replaceSel_.atomIndices().end());
539 // TODO: It could be nice to check that removableAtoms contains full
540 // residues, since we anyways remove whole residues instead of
541 // individual atoms.
544 if (bBox_)
546 ePBC_ = epbcXYZ;
547 clear_mat(box_);
548 box_[XX][XX] = newBox_[XX];
549 box_[YY][YY] = newBox_[YY];
550 box_[ZZ][ZZ] = newBox_[ZZ];
552 if (det(box_) == 0)
554 gmx_fatal(FARGS, "Undefined solute box.\nCreate one with gmx editconf "
555 "or give explicit -box command line option");
558 t_topology *top_insrt;
559 std::vector<RVec> x_insrt;
560 snew(top_insrt, 1);
562 int ePBC_dummy;
563 matrix box_dummy;
564 readConformation(insertConfFile_.c_str(), top_insrt, &x_insrt,
565 nullptr, &ePBC_dummy, box_dummy, "molecule");
566 if (top_insrt->atoms.nr == 0)
568 gmx_fatal(FARGS, "No molecule in %s, please check your input",
569 insertConfFile_.c_str());
571 if (top_->name == nullptr)
573 top_->name = top_insrt->name;
575 if (positionFile_.empty())
577 center_molecule(&x_insrt);
581 // TODO: Adapt to use mtop throughout.
582 t_atoms atoms = gmx_mtop_global_atoms(top_);
584 /* add nmol_ins molecules of atoms_ins
585 in random orientation at random place */
586 insert_mols(nmolIns_, nmolTry_, seed_, defaultDistance_, scaleFactor_,
587 &atoms, &top_->symtab, &x_, removableAtoms, top_insrt->atoms, x_insrt,
588 ePBC_, box_, positionFile_, deltaR_, enumRot_);
590 /* write new configuration to file confout */
591 fprintf(stderr, "Writing generated configuration to %s\n",
592 outputConfFile_.c_str());
593 write_sto_conf(outputConfFile_.c_str(), *top_->name, &atoms,
594 as_rvec_array(x_.data()), nullptr, ePBC_, box_);
596 /* print size of generated configuration */
597 fprintf(stderr, "\nOutput configuration contains %d atoms in %d residues\n",
598 atoms.nr, atoms.nres);
600 done_atom(&atoms);
601 done_top(top_insrt);
602 sfree(top_insrt);
604 return 0;
607 } // namespace
609 const char InsertMoleculesInfo::name[] = "insert-molecules";
610 const char InsertMoleculesInfo::shortDescription[] =
611 "Insert molecules into existing vacancies";
612 ICommandLineOptionsModulePointer InsertMoleculesInfo::create()
614 return ICommandLineOptionsModulePointer(new InsertMolecules());
617 } // namespace gmx