2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2017,2018 by the GROMACS development team.
7 * Copyright (c) 2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
49 #include "gromacs/fileio/gmxfio.h"
50 #include "gromacs/gmxpreprocess/fflibutil.h"
51 #include "gromacs/gmxpreprocess/gpp_atomtype.h"
52 #include "gromacs/gmxpreprocess/grompp_impl.h"
53 #include "gromacs/gmxpreprocess/h_db.h"
54 #include "gromacs/gmxpreprocess/notset.h"
55 #include "gromacs/gmxpreprocess/toputil.h"
56 #include "gromacs/utility/cstringutil.h"
57 #include "gromacs/utility/fatalerror.h"
58 #include "gromacs/utility/futil.h"
59 #include "gromacs/utility/smalloc.h"
60 #include "gromacs/utility/strdb.h"
62 #include "hackblock.h"
65 /* use bonded types definitions in hackblock.h */
66 #define ekwRepl (ebtsNR + 1)
67 #define ekwAdd (ebtsNR + 2)
68 #define ekwDel (ebtsNR + 3)
70 static const char* kw_names
[ekwNR
] = { "replace", "add", "delete" };
72 static int find_kw(char* keyw
)
76 for (i
= 0; i
< ebtsNR
; i
++)
78 if (gmx_strcasecmp(btsNames
[i
], keyw
) == 0)
83 for (i
= 0; i
< ekwNR
; i
++)
85 if (gmx_strcasecmp(kw_names
[i
], keyw
) == 0)
87 return ebtsNR
+ 1 + i
;
94 #define FATAL() gmx_fatal(FARGS, "Reading Termini Database: not enough items on line\n%s", line)
96 static void read_atom(char* line
, bool bAdd
, std::string
* nname
, t_atom
* a
, PreprocessingAtomTypes
* atype
, int* cgnr
)
102 /* This code is messy, because of support for different formats:
103 * for replace: [new name] <atom type> <m> <q> [cgnr (old format)]
104 * for add: <atom type> <m> <q> [cgnr]
106 nr
= sscanf(line
, "%s %s %s %s %s", buf
[0], buf
[1], buf
[2], buf
[3], buf
[4]);
108 /* Here there an ambiguity due to the old replace format with cgnr,
109 * which was read for years, but ignored in the rest of the code.
110 * We have to assume that the atom type does not start with a digit
111 * to make a line with 4 entries uniquely interpretable.
113 if (!bAdd
&& nr
== 4 && isdigit(buf
[1][0]))
118 if (nr
< 3 || nr
> 4)
121 "Reading Termini Database: expected %d or %d items of atom data in stead of %d "
137 a
->type
= atype
->atomTypeFromName(buf
[i
++]);
138 sscanf(buf
[i
++], "%lf", &m
);
140 sscanf(buf
[i
++], "%lf", &q
);
144 sscanf(buf
[i
++], "%d", cgnr
);
152 static void print_atom(FILE* out
, const t_atom
& a
, PreprocessingAtomTypes
* atype
)
154 fprintf(out
, "\t%s\t%g\t%g\n", atype
->atomNameFromAtomType(a
.type
), a
.m
, a
.q
);
157 static void print_ter_db(const char* ff
,
159 gmx::ArrayRef
<const MoleculePatchDatabase
> tb
,
160 PreprocessingAtomTypes
* atype
)
162 std::string buf
= gmx::formatString("%s-%c.tdb", ff
, C
);
163 FILE* out
= gmx_fio_fopen(buf
.c_str(), "w");
165 for (const auto& modification
: tb
)
167 fprintf(out
, "[ %s ]\n", modification
.name
.c_str());
169 if (std::any_of(modification
.hack
.begin(), modification
.hack
.end(),
170 [](const auto& mod
) { return mod
.type() == MoleculePatchType::Replace
; }))
172 fprintf(out
, "[ %s ]\n", kw_names
[ekwRepl
- ebtsNR
- 1]);
173 for (const auto& hack
: modification
.hack
)
175 if (hack
.type() == MoleculePatchType::Replace
)
177 fprintf(out
, "%s\t", hack
.oname
.c_str());
178 print_atom(out
, hack
.atom
.back(), atype
);
182 if (std::any_of(modification
.hack
.begin(), modification
.hack
.end(),
183 [](const auto& mod
) { return mod
.type() == MoleculePatchType::Add
; }))
185 fprintf(out
, "[ %s ]\n", kw_names
[ekwAdd
- ebtsNR
- 1]);
186 for (const auto& hack
: modification
.hack
)
188 if (hack
.type() == MoleculePatchType::Add
)
190 print_ab(out
, hack
, hack
.nname
.c_str());
191 print_atom(out
, hack
.atom
.back(), atype
);
195 if (std::any_of(modification
.hack
.begin(), modification
.hack
.end(),
196 [](const auto& mod
) { return mod
.type() == MoleculePatchType::Delete
; }))
198 fprintf(out
, "[ %s ]\n", kw_names
[ekwDel
- ebtsNR
- 1]);
199 for (const auto& hack
: modification
.hack
)
201 if (hack
.type() == MoleculePatchType::Delete
)
203 fprintf(out
, "%s\n", hack
.oname
.c_str());
207 for (int bt
= 0; bt
< ebtsNR
; bt
++)
209 if (!modification
.rb
[bt
].b
.empty())
211 fprintf(out
, "[ %s ]\n", btsNames
[bt
]);
212 for (const auto& b
: modification
.rb
[bt
].b
)
214 for (int k
= 0; k
< btsNiatoms
[bt
]; k
++)
216 fprintf(out
, "%s%s", k
? "\t" : "", b
.a
[k
].c_str());
220 fprintf(out
, "\t%s", b
.s
.c_str());
231 static void read_ter_db_file(const char* fn
,
232 std::vector
<MoleculePatchDatabase
>* tbptr
,
233 PreprocessingAtomTypes
* atype
)
235 char filebase
[STRLEN
];
236 char header
[STRLEN
], buf
[STRLEN
], line
[STRLEN
];
238 fflib_filename_base(fn
, filebase
, STRLEN
);
239 /* Remove the C/N termini extension */
240 char* ptr
= strrchr(filebase
, '.');
246 FILE* in
= fflib_open(fn
);
249 get_a_line(in
, line
, STRLEN
);
250 MoleculePatchDatabase
* block
= nullptr;
253 if (get_header(line
, header
))
255 /* this is a new block, or a new keyword */
256 kwnr
= find_kw(header
);
260 tbptr
->emplace_back(MoleculePatchDatabase());
261 block
= &tbptr
->back();
262 clearModificationBlock(block
);
263 block
->name
= header
;
264 block
->filebase
= filebase
;
269 if (block
== nullptr)
272 "reading termini database: "
273 "directive expected before line:\n%s\n"
274 "This might be a file in an old format.",
277 /* this is not a header, so it must be data */
280 /* this is a hack: add/rename/delete atoms */
281 /* make space for hacks */
282 block
->hack
.emplace_back(MoleculePatch());
283 MoleculePatch
* hack
= &block
->hack
.back();
287 if (kwnr
== ekwRepl
|| kwnr
== ekwDel
)
289 if (sscanf(line
, "%s%n", buf
, &n
) != 1)
292 "Reading Termini Database '%s': "
293 "expected atom name on line\n%s",
297 /* we only replace or delete one atom at a time */
300 else if (kwnr
== ekwAdd
)
302 read_ab(line
, fn
, hack
);
303 get_a_line(in
, line
, STRLEN
);
307 gmx_fatal(FARGS
, "unimplemented keyword number %d (%s:%d)", kwnr
, __FILE__
, __LINE__
);
309 if (kwnr
== ekwRepl
|| kwnr
== ekwAdd
)
311 hack
->atom
.emplace_back();
312 read_atom(line
+ n
, kwnr
== ekwAdd
, &hack
->nname
, &hack
->atom
.back(), atype
,
314 if (hack
->nname
.empty())
316 if (!hack
->oname
.empty())
318 hack
->nname
= hack
->oname
;
323 "Reading Termini Database '%s': don't know which name the "
324 "new atom should have on line\n%s",
330 else if (kwnr
>= 0 && kwnr
< ebtsNR
)
332 /* this is bonded data: bonds, angles, dihedrals or impropers */
334 block
->rb
[kwnr
].b
.emplace_back();
335 BondedInteraction
* newBond
= &block
->rb
[kwnr
].b
.back();
336 for (int j
= 0; j
< btsNiatoms
[kwnr
]; j
++)
339 if (sscanf(line
+ n
, "%s%n", buf
, &ni
) == 1)
346 "Reading Termini Database '%s': expected %d atom names (found "
348 fn
, btsNiatoms
[kwnr
], j
- 1, line
);
353 sscanf(line
+ n
, "%s", buf
);
359 "Reading Termini Database: Expecting a header at line\n"
364 get_a_line(in
, line
, STRLEN
);
370 int read_ter_db(const char* ffdir
, char ter
, std::vector
<MoleculePatchDatabase
>* tbptr
, PreprocessingAtomTypes
* atype
)
372 std::string ext
= gmx::formatString(".%c.tdb", ter
);
374 /* Search for termini database files.
375 * Do not generate an error when none are found.
377 std::vector
<std::string
> tdbf
= fflib_search_file_end(ffdir
, ext
.c_str(), FALSE
);
379 for (const auto& filename
: tdbf
)
381 read_ter_db_file(filename
.c_str(), tbptr
, atype
);
386 print_ter_db("new", ter
, *tbptr
, atype
);
389 return tbptr
->size();
392 std::vector
<MoleculePatchDatabase
*> filter_ter(gmx::ArrayRef
<MoleculePatchDatabase
> tb
, const char* resname
)
394 // TODO Four years later, no force fields have ever used this, so decide status of this feature
395 /* Since some force fields (e.g. OPLS) needs different
396 * atomtypes for different residues there could be a lot
397 * of entries in the databases for specific residues
398 * (e.g. GLY-NH3+, SER-NH3+, ALA-NH3+).
400 * To reduce the database size, we assume that a terminus specifier liker
402 * [ GLY|SER|ALA-NH3+ ]
404 * would cover all of the three residue types above.
405 * Wildcards (*,?) are not OK. Don't worry about e.g. GLU vs. GLUH since
406 * pdb2gmx only uses the first 3 letters when calling this routine.
408 * To automate this, this routines scans a list of termini
409 * for the residue name "resname" and returns an allocated list of
410 * pointers to the termini that could be applied to the
411 * residue in question. The variable pointed to by nret will
412 * contain the number of valid pointers in the list.
413 * Remember to free the list when you are done with it...
416 auto none_idx
= tb
.end();
417 std::vector
<MoleculePatchDatabase
*> list
;
419 for (auto it
= tb
.begin(); it
!= tb
.end(); it
++)
421 const char* s
= it
->name
.c_str();
425 if (gmx::equalCaseInsensitive(resname
, s
, 3))
432 /* advance to next |-separated field */
439 } while (!found
&& s
!= nullptr);
442 /* All residue-specific termini have been added. We might have to fall
443 * back on generic termini, which are characterized by not having
444 * '-' in the name prior to the last position (which indicates charge).
445 * The [ None ] alternative is special since we don't want that
446 * to be the default, so we put it last in the list we return.
448 for (auto it
= tb
.begin(); it
!= tb
.end(); it
++)
450 const char* s
= it
->name
.c_str();
451 if (gmx::equalCaseInsensitive("None", it
->name
))
457 /* Time to see if there's a generic terminus that matches.
458 Is there a hyphen? */
459 const char* c
= strchr(s
, '-');
461 /* A conjunction hyphen normally indicates a residue-specific
462 terminus, which is named like "GLY-COOH". A generic terminus
463 won't have a hyphen. */
464 bool bFoundAnyHyphen
= (c
!= nullptr);
465 /* '-' as the last character indicates charge, so if that's
466 the only one found e.g. "COO-", then it was not a conjunction
467 hyphen, so this is a generic terminus */
468 bool bOnlyFoundChargeHyphen
= (bFoundAnyHyphen
&& *(c
+ 1) == '\0');
469 /* Thus, "GLY-COO-" is not recognized as a generic terminus. */
470 bool bFoundGenericTerminus
= !bFoundAnyHyphen
|| bOnlyFoundChargeHyphen
;
471 if (bFoundGenericTerminus
)
473 /* Check that we haven't already added a residue-specific version
476 auto found
= std::find_if(list
.begin(), list
.end(), [&s
](const MoleculePatchDatabase
* b
) {
477 return strstr(b
->name
.c_str(), s
) != nullptr;
479 if (found
== list
.end())
486 if (none_idx
!= tb
.end())
488 list
.push_back(none_idx
);
495 MoleculePatchDatabase
* choose_ter(gmx::ArrayRef
<MoleculePatchDatabase
*> tb
, const char* title
)
499 printf("%s\n", title
);
501 for (const auto& modification
: tb
)
503 bool bIsZwitterion
= (0 == gmx_wcmatch("*ZWITTERION*", modification
->name
.c_str()));
504 printf("%2d: %s%s\n", i
, modification
->name
.c_str(),
505 bIsZwitterion
? " (only use with zwitterions containing exactly one residue)" : "");
510 ret
= fscanf(stdin
, "%d", &sel
);
511 } while ((ret
!= 1) || (sel
< 0) || (sel
>= tb
.ssize()));