Fix segfault in gmx genrestr.
[gromacs.git] / src / gromacs / gmxpreprocess / ter_db.cpp
blobf9434ba01be5a2ce094c68de724070ccd9c77ffa
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2017,2018,2019, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "gmxpre.h"
39 #include "ter_db.h"
41 #include <cctype>
42 #include <cstring>
44 #include <algorithm>
45 #include <string>
46 #include <vector>
48 #include "gromacs/fileio/gmxfio.h"
49 #include "gromacs/gmxpreprocess/fflibutil.h"
50 #include "gromacs/gmxpreprocess/gpp_atomtype.h"
51 #include "gromacs/gmxpreprocess/grompp_impl.h"
52 #include "gromacs/gmxpreprocess/h_db.h"
53 #include "gromacs/gmxpreprocess/notset.h"
54 #include "gromacs/gmxpreprocess/toputil.h"
55 #include "gromacs/utility/cstringutil.h"
56 #include "gromacs/utility/fatalerror.h"
57 #include "gromacs/utility/futil.h"
58 #include "gromacs/utility/smalloc.h"
59 #include "gromacs/utility/strdb.h"
61 #include "hackblock.h"
62 #include "resall.h"
64 /* use bonded types definitions in hackblock.h */
65 #define ekwRepl (ebtsNR + 1)
66 #define ekwAdd (ebtsNR + 2)
67 #define ekwDel (ebtsNR + 3)
68 #define ekwNR 3
69 static const char* kw_names[ekwNR] = { "replace", "add", "delete" };
71 static int find_kw(char* keyw)
73 int i;
75 for (i = 0; i < ebtsNR; i++)
77 if (gmx_strcasecmp(btsNames[i], keyw) == 0)
79 return i;
82 for (i = 0; i < ekwNR; i++)
84 if (gmx_strcasecmp(kw_names[i], keyw) == 0)
86 return ebtsNR + 1 + i;
90 return NOTSET;
93 #define FATAL() gmx_fatal(FARGS, "Reading Termini Database: not enough items on line\n%s", line)
95 static void read_atom(char* line, bool bAdd, std::string* nname, t_atom* a, PreprocessingAtomTypes* atype, int* cgnr)
97 int nr, i;
98 char buf[5][30];
99 double m, q;
101 /* This code is messy, because of support for different formats:
102 * for replace: [new name] <atom type> <m> <q> [cgnr (old format)]
103 * for add: <atom type> <m> <q> [cgnr]
105 nr = sscanf(line, "%s %s %s %s %s", buf[0], buf[1], buf[2], buf[3], buf[4]);
107 /* Here there an ambiguity due to the old replace format with cgnr,
108 * which was read for years, but ignored in the rest of the code.
109 * We have to assume that the atom type does not start with a digit
110 * to make a line with 4 entries uniquely interpretable.
112 if (!bAdd && nr == 4 && isdigit(buf[1][0]))
114 nr = 3;
117 if (nr < 3 || nr > 4)
119 gmx_fatal(FARGS,
120 "Reading Termini Database: expected %d or %d items of atom data in stead of %d "
121 "on line\n%s",
122 3, 4, nr, line);
124 i = 0;
125 if (!bAdd)
127 if (nr == 4)
129 *nname = buf[i++];
131 else
133 *nname = "";
136 a->type = atype->atomTypeFromName(buf[i++]);
137 sscanf(buf[i++], "%lf", &m);
138 a->m = m;
139 sscanf(buf[i++], "%lf", &q);
140 a->q = q;
141 if (bAdd && nr == 4)
143 sscanf(buf[i++], "%d", cgnr);
145 else
147 *cgnr = NOTSET;
151 static void print_atom(FILE* out, const t_atom& a, PreprocessingAtomTypes* atype)
153 fprintf(out, "\t%s\t%g\t%g\n", atype->atomNameFromAtomType(a.type), a.m, a.q);
156 static void print_ter_db(const char* ff,
157 char C,
158 gmx::ArrayRef<const MoleculePatchDatabase> tb,
159 PreprocessingAtomTypes* atype)
161 std::string buf = gmx::formatString("%s-%c.tdb", ff, C);
162 FILE* out = gmx_fio_fopen(buf.c_str(), "w");
164 for (const auto& modification : tb)
166 fprintf(out, "[ %s ]\n", modification.name.c_str());
168 if (std::any_of(modification.hack.begin(), modification.hack.end(),
169 [](const auto& mod) { return mod.type() == MoleculePatchType::Replace; }))
171 fprintf(out, "[ %s ]\n", kw_names[ekwRepl - ebtsNR - 1]);
172 for (const auto& hack : modification.hack)
174 if (hack.type() == MoleculePatchType::Replace)
176 fprintf(out, "%s\t", hack.oname.c_str());
177 print_atom(out, hack.atom.back(), atype);
181 if (std::any_of(modification.hack.begin(), modification.hack.end(),
182 [](const auto& mod) { return mod.type() == MoleculePatchType::Add; }))
184 fprintf(out, "[ %s ]\n", kw_names[ekwAdd - ebtsNR - 1]);
185 for (const auto& hack : modification.hack)
187 if (hack.type() == MoleculePatchType::Add)
189 print_ab(out, hack, hack.nname.c_str());
190 print_atom(out, hack.atom.back(), atype);
194 if (std::any_of(modification.hack.begin(), modification.hack.end(),
195 [](const auto& mod) { return mod.type() == MoleculePatchType::Delete; }))
197 fprintf(out, "[ %s ]\n", kw_names[ekwDel - ebtsNR - 1]);
198 for (const auto& hack : modification.hack)
200 if (hack.type() == MoleculePatchType::Delete)
202 fprintf(out, "%s\n", hack.oname.c_str());
206 for (int bt = 0; bt < ebtsNR; bt++)
208 if (!modification.rb[bt].b.empty())
210 fprintf(out, "[ %s ]\n", btsNames[bt]);
211 for (const auto& b : modification.rb[bt].b)
213 for (int k = 0; k < btsNiatoms[bt]; k++)
215 fprintf(out, "%s%s", k ? "\t" : "", b.a[k].c_str());
217 if (!b.s.empty())
219 fprintf(out, "\t%s", b.s.c_str());
221 fprintf(out, "\n");
225 fprintf(out, "\n");
227 gmx_fio_fclose(out);
230 static void read_ter_db_file(const char* fn,
231 std::vector<MoleculePatchDatabase>* tbptr,
232 PreprocessingAtomTypes* atype)
234 char filebase[STRLEN];
235 char header[STRLEN], buf[STRLEN], line[STRLEN];
237 fflib_filename_base(fn, filebase, STRLEN);
238 /* Remove the C/N termini extension */
239 char* ptr = strrchr(filebase, '.');
240 if (ptr != nullptr)
242 ptr[0] = '\0';
245 FILE* in = fflib_open(fn);
247 int kwnr = NOTSET;
248 get_a_line(in, line, STRLEN);
249 MoleculePatchDatabase* block = nullptr;
250 while (!feof(in))
252 if (get_header(line, header))
254 /* this is a new block, or a new keyword */
255 kwnr = find_kw(header);
257 if (kwnr == NOTSET)
259 tbptr->emplace_back(MoleculePatchDatabase());
260 block = &tbptr->back();
261 clearModificationBlock(block);
262 block->name = header;
263 block->filebase = filebase;
266 else
268 if (block == nullptr)
270 gmx_fatal(FARGS,
271 "reading termini database: "
272 "directive expected before line:\n%s\n"
273 "This might be a file in an old format.",
274 line);
276 /* this is not a header, so it must be data */
277 if (kwnr >= ebtsNR)
279 /* this is a hack: add/rename/delete atoms */
280 /* make space for hacks */
281 block->hack.emplace_back(MoleculePatch());
282 MoleculePatch* hack = &block->hack.back();
284 /* get data */
285 int n = 0;
286 if (kwnr == ekwRepl || kwnr == ekwDel)
288 if (sscanf(line, "%s%n", buf, &n) != 1)
290 gmx_fatal(FARGS,
291 "Reading Termini Database '%s': "
292 "expected atom name on line\n%s",
293 fn, line);
295 hack->oname = buf;
296 /* we only replace or delete one atom at a time */
297 hack->nr = 1;
299 else if (kwnr == ekwAdd)
301 read_ab(line, fn, hack);
302 get_a_line(in, line, STRLEN);
304 else
306 gmx_fatal(FARGS, "unimplemented keyword number %d (%s:%d)", kwnr, __FILE__, __LINE__);
308 if (kwnr == ekwRepl || kwnr == ekwAdd)
310 hack->atom.emplace_back();
311 read_atom(line + n, kwnr == ekwAdd, &hack->nname, &hack->atom.back(), atype,
312 &hack->cgnr);
313 if (hack->nname.empty())
315 if (!hack->oname.empty())
317 hack->nname = hack->oname;
319 else
321 gmx_fatal(FARGS,
322 "Reading Termini Database '%s': don't know which name the "
323 "new atom should have on line\n%s",
324 fn, line);
329 else if (kwnr >= 0 && kwnr < ebtsNR)
331 /* this is bonded data: bonds, angles, dihedrals or impropers */
332 int n = 0;
333 block->rb[kwnr].b.emplace_back();
334 BondedInteraction* newBond = &block->rb[kwnr].b.back();
335 for (int j = 0; j < btsNiatoms[kwnr]; j++)
337 int ni;
338 if (sscanf(line + n, "%s%n", buf, &ni) == 1)
340 newBond->a[j] = buf;
342 else
344 gmx_fatal(FARGS,
345 "Reading Termini Database '%s': expected %d atom names (found "
346 "%d) on line\n%s",
347 fn, btsNiatoms[kwnr], j - 1, line);
349 n += ni;
351 strcpy(buf, "");
352 sscanf(line + n, "%s", buf);
353 newBond->s = buf;
355 else
357 gmx_fatal(FARGS,
358 "Reading Termini Database: Expecting a header at line\n"
359 "%s",
360 line);
363 get_a_line(in, line, STRLEN);
366 gmx_ffclose(in);
369 int read_ter_db(const char* ffdir, char ter, std::vector<MoleculePatchDatabase>* tbptr, PreprocessingAtomTypes* atype)
371 std::string ext = gmx::formatString(".%c.tdb", ter);
373 /* Search for termini database files.
374 * Do not generate an error when none are found.
376 std::vector<std::string> tdbf = fflib_search_file_end(ffdir, ext.c_str(), FALSE);
377 tbptr->clear();
378 for (const auto& filename : tdbf)
380 read_ter_db_file(filename.c_str(), tbptr, atype);
383 if (debug)
385 print_ter_db("new", ter, *tbptr, atype);
388 return tbptr->size();
391 std::vector<MoleculePatchDatabase*> filter_ter(gmx::ArrayRef<MoleculePatchDatabase> tb, const char* resname)
393 // TODO Four years later, no force fields have ever used this, so decide status of this feature
394 /* Since some force fields (e.g. OPLS) needs different
395 * atomtypes for different residues there could be a lot
396 * of entries in the databases for specific residues
397 * (e.g. GLY-NH3+, SER-NH3+, ALA-NH3+).
399 * To reduce the database size, we assume that a terminus specifier liker
401 * [ GLY|SER|ALA-NH3+ ]
403 * would cover all of the three residue types above.
404 * Wildcards (*,?) are not OK. Don't worry about e.g. GLU vs. GLUH since
405 * pdb2gmx only uses the first 3 letters when calling this routine.
407 * To automate this, this routines scans a list of termini
408 * for the residue name "resname" and returns an allocated list of
409 * pointers to the termini that could be applied to the
410 * residue in question. The variable pointed to by nret will
411 * contain the number of valid pointers in the list.
412 * Remember to free the list when you are done with it...
415 auto none_idx = tb.end();
416 std::vector<MoleculePatchDatabase*> list;
418 for (auto it = tb.begin(); it != tb.end(); it++)
420 const char* s = it->name.c_str();
421 bool found = false;
424 if (gmx::equalCaseInsensitive(resname, s, 3))
426 found = true;
427 list.push_back(it);
429 else
431 /* advance to next |-separated field */
432 s = strchr(s, '|');
433 if (s != nullptr)
435 s++;
438 } while (!found && s != nullptr);
441 /* All residue-specific termini have been added. We might have to fall
442 * back on generic termini, which are characterized by not having
443 * '-' in the name prior to the last position (which indicates charge).
444 * The [ None ] alternative is special since we don't want that
445 * to be the default, so we put it last in the list we return.
447 for (auto it = tb.begin(); it != tb.end(); it++)
449 const char* s = it->name.c_str();
450 if (gmx::equalCaseInsensitive("None", it->name))
452 none_idx = it;
454 else
456 /* Time to see if there's a generic terminus that matches.
457 Is there a hyphen? */
458 const char* c = strchr(s, '-');
460 /* A conjunction hyphen normally indicates a residue-specific
461 terminus, which is named like "GLY-COOH". A generic terminus
462 won't have a hyphen. */
463 bool bFoundAnyHyphen = (c != nullptr);
464 /* '-' as the last character indicates charge, so if that's
465 the only one found e.g. "COO-", then it was not a conjunction
466 hyphen, so this is a generic terminus */
467 bool bOnlyFoundChargeHyphen = (bFoundAnyHyphen && *(c + 1) == '\0');
468 /* Thus, "GLY-COO-" is not recognized as a generic terminus. */
469 bool bFoundGenericTerminus = !bFoundAnyHyphen || bOnlyFoundChargeHyphen;
470 if (bFoundGenericTerminus)
472 /* Check that we haven't already added a residue-specific version
473 * of this terminus.
475 auto found = std::find_if(list.begin(), list.end(), [&s](const MoleculePatchDatabase* b) {
476 return strstr(b->name.c_str(), s) != nullptr;
478 if (found == list.end())
480 list.push_back(it);
485 if (none_idx != tb.end())
487 list.push_back(none_idx);
490 return list;
494 MoleculePatchDatabase* choose_ter(gmx::ArrayRef<MoleculePatchDatabase*> tb, const char* title)
496 int sel, ret;
498 printf("%s\n", title);
499 int i = 0;
500 for (const auto& modification : tb)
502 bool bIsZwitterion = (0 == gmx_wcmatch("*ZWITTERION*", modification->name.c_str()));
503 printf("%2d: %s%s\n", i, modification->name.c_str(),
504 bIsZwitterion ? " (only use with zwitterions containing exactly one residue)" : "");
505 i++;
509 ret = fscanf(stdin, "%d", &sel);
510 } while ((ret != 1) || (sel < 0) || (sel >= tb.ssize()));
512 return tb[sel];