Clean up cmake build host detection
[gromacs.git] / src / gromacs / gmxpreprocess / pdb2gmx.cpp
blobbc2b891a7f7bb7e62fcdc7afd2818eb8ec106b77
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "gmxpre.h"
39 #include "pdb2gmx.h"
41 #include <ctype.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <time.h>
46 #include "gromacs/commandline/pargs.h"
47 #include "gromacs/fileio/confio.h"
48 #include "gromacs/fileio/gmxfio.h"
49 #include "gromacs/fileio/pdbio.h"
50 #include "gromacs/gmxlib/conformation-utilities.h"
51 #include "gromacs/gmxpreprocess/fflibutil.h"
52 #include "gromacs/gmxpreprocess/genhydro.h"
53 #include "gromacs/gmxpreprocess/h_db.h"
54 #include "gromacs/gmxpreprocess/hizzie.h"
55 #include "gromacs/gmxpreprocess/pdb2top.h"
56 #include "gromacs/gmxpreprocess/pgutil.h"
57 #include "gromacs/gmxpreprocess/resall.h"
58 #include "gromacs/gmxpreprocess/specbond.h"
59 #include "gromacs/gmxpreprocess/ter_db.h"
60 #include "gromacs/gmxpreprocess/toputil.h"
61 #include "gromacs/gmxpreprocess/xlate.h"
62 #include "gromacs/math/vec.h"
63 #include "gromacs/topology/atomprop.h"
64 #include "gromacs/topology/block.h"
65 #include "gromacs/topology/index.h"
66 #include "gromacs/topology/residuetypes.h"
67 #include "gromacs/topology/symtab.h"
68 #include "gromacs/topology/topology.h"
69 #include "gromacs/utility/arraysize.h"
70 #include "gromacs/utility/cstringutil.h"
71 #include "gromacs/utility/dir_separator.h"
72 #include "gromacs/utility/fatalerror.h"
73 #include "gromacs/utility/gmxassert.h"
74 #include "gromacs/utility/smalloc.h"
75 #include "gromacs/utility/strdb.h"
77 #define RTP_MAXCHAR 5
78 typedef struct {
79 char gmx[RTP_MAXCHAR+2];
80 char main[RTP_MAXCHAR+2];
81 char nter[RTP_MAXCHAR+2];
82 char cter[RTP_MAXCHAR+2];
83 char bter[RTP_MAXCHAR+2];
84 } rtprename_t;
87 static const char *res2bb_notermini(const char *name,
88 int nrr, const rtprename_t *rr)
90 /* NOTE: This function returns the main building block name,
91 * it does not take terminal renaming into account.
93 int i;
95 i = 0;
96 while (i < nrr && gmx_strcasecmp(name, rr[i].gmx) != 0)
98 i++;
101 return (i < nrr ? rr[i].main : name);
104 static const char *select_res(int nr, int resnr,
105 const char *name[], const char *expl[],
106 const char *title,
107 int nrr, const rtprename_t *rr)
109 int sel = 0;
111 printf("Which %s type do you want for residue %d\n", title, resnr+1);
112 for (sel = 0; (sel < nr); sel++)
114 printf("%d. %s (%s)\n",
115 sel, expl[sel], res2bb_notermini(name[sel], nrr, rr));
117 printf("\nType a number:"); fflush(stdout);
119 if (scanf("%d", &sel) != 1)
121 gmx_fatal(FARGS, "Answer me for res %s %d!", title, resnr+1);
124 return name[sel];
127 static const char *get_asptp(int resnr, int nrr, const rtprename_t *rr)
129 enum {
130 easp, easpH, easpNR
132 const char *lh[easpNR] = { "ASP", "ASPH" };
133 const char *expl[easpNR] = {
134 "Not protonated (charge -1)",
135 "Protonated (charge 0)"
138 return select_res(easpNR, resnr, lh, expl, "ASPARTIC ACID", nrr, rr);
141 static const char *get_glutp(int resnr, int nrr, const rtprename_t *rr)
143 enum {
144 eglu, egluH, egluNR
146 const char *lh[egluNR] = { "GLU", "GLUH" };
147 const char *expl[egluNR] = {
148 "Not protonated (charge -1)",
149 "Protonated (charge 0)"
152 return select_res(egluNR, resnr, lh, expl, "GLUTAMIC ACID", nrr, rr);
155 static const char *get_glntp(int resnr, int nrr, const rtprename_t *rr)
157 enum {
158 egln, eglnH, eglnNR
160 const char *lh[eglnNR] = { "GLN", "QLN" };
161 const char *expl[eglnNR] = {
162 "Not protonated (charge 0)",
163 "Protonated (charge +1)"
166 return select_res(eglnNR, resnr, lh, expl, "GLUTAMINE", nrr, rr);
169 static const char *get_lystp(int resnr, int nrr, const rtprename_t *rr)
171 enum {
172 elys, elysH, elysNR
174 const char *lh[elysNR] = { "LYSN", "LYS" };
175 const char *expl[elysNR] = {
176 "Not protonated (charge 0)",
177 "Protonated (charge +1)"
180 return select_res(elysNR, resnr, lh, expl, "LYSINE", nrr, rr);
183 static const char *get_argtp(int resnr, int nrr, const rtprename_t *rr)
185 enum {
186 earg, eargH, eargNR
188 const char *lh[eargNR] = { "ARGN", "ARG" };
189 const char *expl[eargNR] = {
190 "Not protonated (charge 0)",
191 "Protonated (charge +1)"
194 return select_res(eargNR, resnr, lh, expl, "ARGININE", nrr, rr);
197 static const char *get_histp(int resnr, int nrr, const rtprename_t *rr)
199 const char *expl[ehisNR] = {
200 "H on ND1 only",
201 "H on NE2 only",
202 "H on ND1 and NE2",
203 "Coupled to Heme"
206 return select_res(ehisNR, resnr, hh, expl, "HISTIDINE", nrr, rr);
209 static void read_rtprename(const char *fname, FILE *fp,
210 int *nrtprename, rtprename_t **rtprename)
212 char line[STRLEN], buf[STRLEN];
213 int n;
214 rtprename_t *rr;
215 int ncol, nc;
217 n = *nrtprename;
218 rr = *rtprename;
220 ncol = 0;
221 while (get_a_line(fp, line, STRLEN))
223 srenew(rr, n+1);
224 /* line is NULL-terminated and length<STRLEN, so final arg cannot overflow.
225 * For other args, we read up to 6 chars (so we can detect if the length is > 5).
226 * Note that the buffer length has been increased to 7 to allow this,
227 * so we just need to make sure the strings have zero-length initially.
229 rr[n].gmx[0] = '\0';
230 rr[n].main[0] = '\0';
231 rr[n].nter[0] = '\0';
232 rr[n].cter[0] = '\0';
233 rr[n].bter[0] = '\0';
234 nc = sscanf(line, "%6s %6s %6s %6s %6s %s",
235 rr[n].gmx, rr[n].main, rr[n].nter, rr[n].cter, rr[n].bter, buf);
236 if (strlen(rr[n].gmx) > RTP_MAXCHAR || strlen(rr[n].main) > RTP_MAXCHAR ||
237 strlen(rr[n].nter) > RTP_MAXCHAR || strlen(rr[n].cter) > RTP_MAXCHAR || strlen(rr[n].bter) > RTP_MAXCHAR)
239 gmx_fatal(FARGS, "Residue renaming database '%s' has strings longer than %d chars in first 5 columns:\n%s",
240 fname, RTP_MAXCHAR, line);
243 if (ncol == 0)
245 if (nc != 2 && nc != 5)
247 gmx_fatal(FARGS, "Residue renaming database '%s' has %d columns instead of %d, %d or %d", fname, ncol, 2, 5);
249 ncol = nc;
251 else if (nc != ncol)
253 gmx_fatal(FARGS, "A line in residue renaming database '%s' has %d columns, while previous lines have %d columns", fname, nc, ncol);
256 if (nc == 2)
258 /* This file does not have special termini names, copy them from main */
259 strcpy(rr[n].nter, rr[n].main);
260 strcpy(rr[n].cter, rr[n].main);
261 strcpy(rr[n].bter, rr[n].main);
264 n++;
267 *nrtprename = n;
268 *rtprename = rr;
271 static char *search_resrename(int nrr, rtprename_t *rr,
272 const char *name,
273 gmx_bool bStart, gmx_bool bEnd,
274 gmx_bool bCompareFFRTPname)
276 char *nn;
277 int i;
279 nn = nullptr;
281 i = 0;
282 while (i < nrr && ((!bCompareFFRTPname && strcmp(name, rr[i].gmx) != 0) ||
283 ( bCompareFFRTPname && strcmp(name, rr[i].main) != 0)))
285 i++;
288 /* If found in the database, rename this residue's rtp building block,
289 * otherwise keep the old name.
291 if (i < nrr)
293 if (bStart && bEnd)
295 nn = rr[i].bter;
297 else if (bStart)
299 nn = rr[i].nter;
301 else if (bEnd)
303 nn = rr[i].cter;
305 else
307 nn = rr[i].main;
310 if (nn[0] == '-')
312 gmx_fatal(FARGS, "In the chosen force field there is no residue type for '%s'%s", name, bStart ? ( bEnd ? " as a standalone (starting & ending) residue" : " as a starting terminus") : (bEnd ? " as an ending terminus" : ""));
316 return nn;
320 static void rename_resrtp(t_atoms *pdba, int nterpairs, int *r_start, int *r_end,
321 int nrr, rtprename_t *rr, t_symtab *symtab,
322 gmx_bool bVerbose)
324 int r, j;
325 gmx_bool bStart, bEnd;
326 char *nn;
327 gmx_bool bFFRTPTERRNM;
329 bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == nullptr);
331 for (r = 0; r < pdba->nres; r++)
333 bStart = FALSE;
334 bEnd = FALSE;
335 for (j = 0; j < nterpairs; j++)
337 if (r == r_start[j])
339 bStart = TRUE;
342 for (j = 0; j < nterpairs; j++)
344 if (r == r_end[j])
346 bEnd = TRUE;
350 nn = search_resrename(nrr, rr, *pdba->resinfo[r].rtp, bStart, bEnd, FALSE);
352 if (bFFRTPTERRNM && nn == nullptr && (bStart || bEnd))
354 /* This is a terminal residue, but the residue name,
355 * currently stored in .rtp, is not a standard residue name,
356 * but probably a force field specific rtp name.
357 * Check if we need to rename it because it is terminal.
359 nn = search_resrename(nrr, rr,
360 *pdba->resinfo[r].rtp, bStart, bEnd, TRUE);
363 if (nn != nullptr && strcmp(*pdba->resinfo[r].rtp, nn) != 0)
365 if (bVerbose)
367 printf("Changing rtp entry of residue %d %s to '%s'\n",
368 pdba->resinfo[r].nr, *pdba->resinfo[r].name, nn);
370 pdba->resinfo[r].rtp = put_symtab(symtab, nn);
375 static void pdbres_to_gmxrtp(t_atoms *pdba)
377 int i;
379 for (i = 0; (i < pdba->nres); i++)
381 if (pdba->resinfo[i].rtp == nullptr)
383 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
388 static void rename_pdbres(t_atoms *pdba, const char *oldnm, const char *newnm,
389 gmx_bool bFullCompare, t_symtab *symtab)
391 char *resnm;
392 int i;
394 for (i = 0; (i < pdba->nres); i++)
396 resnm = *pdba->resinfo[i].name;
397 if ((bFullCompare && (gmx_strcasecmp(resnm, oldnm) == 0)) ||
398 (!bFullCompare && strstr(resnm, oldnm) != nullptr))
400 /* Rename the residue name (not the rtp name) */
401 pdba->resinfo[i].name = put_symtab(symtab, newnm);
406 static void rename_bb(t_atoms *pdba, const char *oldnm, const char *newnm,
407 gmx_bool bFullCompare, t_symtab *symtab)
409 char *bbnm;
410 int i;
412 for (i = 0; (i < pdba->nres); i++)
414 /* We have not set the rtp name yes, use the residue name */
415 bbnm = *pdba->resinfo[i].name;
416 if ((bFullCompare && (gmx_strcasecmp(bbnm, oldnm) == 0)) ||
417 (!bFullCompare && strstr(bbnm, oldnm) != nullptr))
419 /* Change the rtp builing block name */
420 pdba->resinfo[i].rtp = put_symtab(symtab, newnm);
425 static void rename_bbint(t_atoms *pdba, const char *oldnm,
426 const char *gettp(int, int, const rtprename_t *),
427 gmx_bool bFullCompare,
428 t_symtab *symtab,
429 int nrr, const rtprename_t *rr)
431 int i;
432 const char *ptr;
433 char *bbnm;
435 for (i = 0; i < pdba->nres; i++)
437 /* We have not set the rtp name yes, use the residue name */
438 bbnm = *pdba->resinfo[i].name;
439 if ((bFullCompare && (strcmp(bbnm, oldnm) == 0)) ||
440 (!bFullCompare && strstr(bbnm, oldnm) != nullptr))
442 ptr = gettp(i, nrr, rr);
443 pdba->resinfo[i].rtp = put_symtab(symtab, ptr);
448 static void check_occupancy(t_atoms *atoms, const char *filename, gmx_bool bVerbose)
450 int i, ftp;
451 int nzero = 0;
452 int nnotone = 0;
454 ftp = fn2ftp(filename);
455 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
457 fprintf(stderr, "No occupancies in %s\n", filename);
459 else
461 for (i = 0; (i < atoms->nr); i++)
463 if (atoms->pdbinfo[i].occup != 1)
465 if (bVerbose)
467 fprintf(stderr, "Occupancy for atom %s%d-%s is %f rather than 1\n",
468 *atoms->resinfo[atoms->atom[i].resind].name,
469 atoms->resinfo[ atoms->atom[i].resind].nr,
470 *atoms->atomname[i],
471 atoms->pdbinfo[i].occup);
473 if (atoms->pdbinfo[i].occup == 0)
475 nzero++;
477 else
479 nnotone++;
483 if (nzero == atoms->nr)
485 fprintf(stderr, "All occupancy fields zero. This is probably not an X-Ray structure\n");
487 else if ((nzero > 0) || (nnotone > 0))
489 fprintf(stderr,
490 "\n"
491 "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
492 " occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
493 "\n",
494 nzero, nnotone, atoms->nr);
496 else
498 fprintf(stderr, "All occupancies are one\n");
503 void write_posres(char *fn, t_atoms *pdba, real fc)
505 FILE *fp;
506 int i;
508 fp = gmx_fio_fopen(fn, "w");
509 fprintf(fp,
510 "; In this topology include file, you will find position restraint\n"
511 "; entries for all the heavy atoms in your original pdb file.\n"
512 "; This means that all the protons which were added by pdb2gmx are\n"
513 "; not restrained.\n"
514 "\n"
515 "[ position_restraints ]\n"
516 "; %4s%6s%8s%8s%8s\n", "atom", "type", "fx", "fy", "fz"
518 for (i = 0; (i < pdba->nr); i++)
520 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
522 fprintf(fp, "%6d%6d %g %g %g\n", i+1, 1, fc, fc, fc);
525 gmx_fio_fclose(fp);
528 static int read_pdball(const char *inf, const char *outf, char *title,
529 t_atoms *atoms, rvec **x,
530 int *ePBC, matrix box, gmx_bool bRemoveH,
531 t_symtab *symtab, gmx_residuetype_t *rt, const char *watres,
532 gmx_atomprop_t aps, gmx_bool bVerbose)
533 /* Read a pdb file. (containing proteins) */
535 int natom, new_natom, i;
537 /* READ IT */
538 printf("Reading %s...\n", inf);
539 t_topology *top;
540 snew(top, 1);
541 read_tps_conf(inf, top, ePBC, x, nullptr, box, FALSE);
542 strncpy(title, *top->name, STRLEN);
543 title[STRLEN-1] = '\0';
544 *atoms = top->atoms;
545 sfree(top);
546 natom = atoms->nr;
547 if (atoms->pdbinfo == nullptr)
549 snew(atoms->pdbinfo, atoms->nr);
551 if (fn2ftp(inf) == efPDB)
553 get_pdb_atomnumber(atoms, aps);
555 if (bRemoveH)
557 new_natom = 0;
558 for (i = 0; i < atoms->nr; i++)
560 if (!is_hydrogen(*atoms->atomname[i]))
562 atoms->atom[new_natom] = atoms->atom[i];
563 atoms->atomname[new_natom] = atoms->atomname[i];
564 atoms->pdbinfo[new_natom] = atoms->pdbinfo[i];
565 copy_rvec((*x)[i], (*x)[new_natom]);
566 new_natom++;
569 atoms->nr = new_natom;
570 natom = new_natom;
573 printf("Read");
574 if (title[0])
576 printf(" '%s',", title);
578 printf(" %d atoms\n", natom);
580 /* Rename residues */
581 rename_pdbres(atoms, "HOH", watres, FALSE, symtab);
582 rename_pdbres(atoms, "SOL", watres, FALSE, symtab);
583 rename_pdbres(atoms, "WAT", watres, FALSE, symtab);
585 rename_atoms("xlateat.dat", nullptr,
586 atoms, symtab, nullptr, TRUE, rt, TRUE, bVerbose);
588 if (natom == 0)
590 return 0;
593 if (outf)
595 write_sto_conf(outf, title, atoms, *x, nullptr, *ePBC, box);
598 return natom;
601 void process_chain(t_atoms *pdba, rvec *x,
602 gmx_bool bTrpU, gmx_bool bPheU, gmx_bool bTyrU,
603 gmx_bool bLysMan, gmx_bool bAspMan, gmx_bool bGluMan,
604 gmx_bool bHisMan, gmx_bool bArgMan, gmx_bool bGlnMan,
605 real angle, real distance, t_symtab *symtab,
606 int nrr, const rtprename_t *rr)
608 /* Rename aromatics, lys, asp and histidine */
609 if (bTyrU)
611 rename_bb(pdba, "TYR", "TYRU", FALSE, symtab);
613 if (bTrpU)
615 rename_bb(pdba, "TRP", "TRPU", FALSE, symtab);
617 if (bPheU)
619 rename_bb(pdba, "PHE", "PHEU", FALSE, symtab);
621 if (bLysMan)
623 rename_bbint(pdba, "LYS", get_lystp, FALSE, symtab, nrr, rr);
625 if (bArgMan)
627 rename_bbint(pdba, "ARG", get_argtp, FALSE, symtab, nrr, rr);
629 if (bGlnMan)
631 rename_bbint(pdba, "GLN", get_glntp, FALSE, symtab, nrr, rr);
633 if (bAspMan)
635 rename_bbint(pdba, "ASP", get_asptp, FALSE, symtab, nrr, rr);
637 else
639 rename_bb(pdba, "ASPH", "ASP", FALSE, symtab);
641 if (bGluMan)
643 rename_bbint(pdba, "GLU", get_glutp, FALSE, symtab, nrr, rr);
645 else
647 rename_bb(pdba, "GLUH", "GLU", FALSE, symtab);
650 if (!bHisMan)
652 set_histp(pdba, x, angle, distance);
654 else
656 rename_bbint(pdba, "HIS", get_histp, TRUE, symtab, nrr, rr);
659 /* Initialize the rtp builing block names with the residue names
660 * for the residues that have not been processed above.
662 pdbres_to_gmxrtp(pdba);
664 /* Now we have all rtp names set.
665 * The rtp names will conform to Gromacs naming,
666 * unless the input pdb file contained one or more force field specific
667 * rtp names as residue names.
671 /* struct for sorting the atoms from the pdb file */
672 typedef struct {
673 int resnr; /* residue number */
674 int j; /* database order index */
675 int index; /* original atom number */
676 char anm1; /* second letter of atom name */
677 char altloc; /* alternate location indicator */
678 } t_pdbindex;
680 int pdbicomp(const void *a, const void *b)
682 t_pdbindex *pa, *pb;
683 int d;
685 pa = (t_pdbindex *)a;
686 pb = (t_pdbindex *)b;
688 d = (pa->resnr - pb->resnr);
689 if (d == 0)
691 d = (pa->j - pb->j);
692 if (d == 0)
694 d = (pa->anm1 - pb->anm1);
695 if (d == 0)
697 d = (pa->altloc - pb->altloc);
702 return d;
705 static void sort_pdbatoms(t_restp restp[],
706 int natoms, t_atoms **pdbaptr, rvec **x,
707 t_blocka *block, char ***gnames)
709 t_atoms *pdba, *pdbnew;
710 rvec **xnew;
711 int i, j;
712 t_restp *rptr;
713 t_pdbindex *pdbi;
714 int *a;
715 char *atomnm;
717 pdba = *pdbaptr;
718 natoms = pdba->nr;
719 pdbnew = nullptr;
720 snew(xnew, 1);
721 snew(pdbi, natoms);
723 for (i = 0; i < natoms; i++)
725 atomnm = *pdba->atomname[i];
726 rptr = &restp[pdba->atom[i].resind];
727 for (j = 0; (j < rptr->natom); j++)
729 if (gmx_strcasecmp(atomnm, *(rptr->atomname[j])) == 0)
731 break;
734 if (j == rptr->natom)
736 char buf[STRLEN];
738 sprintf(buf,
739 "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
740 "while sorting atoms.\n%s", atomnm,
741 *pdba->resinfo[pdba->atom[i].resind].name,
742 pdba->resinfo[pdba->atom[i].resind].nr,
743 rptr->resname,
744 rptr->natom,
745 is_hydrogen(atomnm) ?
746 "\nFor a hydrogen, this can be a different protonation state, or it\n"
747 "might have had a different number in the PDB file and was rebuilt\n"
748 "(it might for instance have been H3, and we only expected H1 & H2).\n"
749 "Note that hydrogens might have been added to the entry for the N-terminus.\n"
750 "Remove this hydrogen or choose a different protonation state to solve it.\n"
751 "Option -ignh will ignore all hydrogens in the input." : ".");
752 gmx_fatal(FARGS, buf);
754 /* make shadow array to be sorted into indexgroup */
755 pdbi[i].resnr = pdba->atom[i].resind;
756 pdbi[i].j = j;
757 pdbi[i].index = i;
758 pdbi[i].anm1 = atomnm[1];
759 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
761 qsort(pdbi, natoms, (size_t)sizeof(pdbi[0]), pdbicomp);
763 /* pdba is sorted in pdbnew using the pdbi index */
764 snew(a, natoms);
765 snew(pdbnew, 1);
766 init_t_atoms(pdbnew, natoms, TRUE);
767 snew(*xnew, natoms);
768 pdbnew->nr = pdba->nr;
769 pdbnew->nres = pdba->nres;
770 sfree(pdbnew->resinfo);
771 pdbnew->resinfo = pdba->resinfo;
772 for (i = 0; i < natoms; i++)
774 pdbnew->atom[i] = pdba->atom[pdbi[i].index];
775 pdbnew->atomname[i] = pdba->atomname[pdbi[i].index];
776 pdbnew->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
777 copy_rvec((*x)[pdbi[i].index], (*xnew)[i]);
778 /* make indexgroup in block */
779 a[i] = pdbi[i].index;
781 /* clean up */
782 sfree(pdba->atomname);
783 sfree(pdba->atom);
784 sfree(pdba->pdbinfo);
785 sfree(pdba);
786 sfree(*x);
787 /* copy the sorted pdbnew back to pdba */
788 *pdbaptr = pdbnew;
789 *x = *xnew;
790 add_grp(block, gnames, natoms, a, "prot_sort");
791 sfree(xnew);
792 sfree(a);
793 sfree(pdbi);
796 static int remove_duplicate_atoms(t_atoms *pdba, rvec x[], gmx_bool bVerbose)
798 int i, j, oldnatoms, ndel;
799 t_resinfo *ri;
801 printf("Checking for duplicate atoms....\n");
802 oldnatoms = pdba->nr;
803 ndel = 0;
804 /* NOTE: pdba->nr is modified inside the loop */
805 for (i = 1; (i < pdba->nr); i++)
807 /* compare 'i' and 'i-1', throw away 'i' if they are identical
808 this is a 'while' because multiple alternate locations can be present */
809 while ( (i < pdba->nr) &&
810 (pdba->atom[i-1].resind == pdba->atom[i].resind) &&
811 (strcmp(*pdba->atomname[i-1], *pdba->atomname[i]) == 0) )
813 ndel++;
814 if (bVerbose)
816 ri = &pdba->resinfo[pdba->atom[i].resind];
817 printf("deleting duplicate atom %4s %s%4d%c",
818 *pdba->atomname[i], *ri->name, ri->nr, ri->ic);
819 if (ri->chainid && (ri->chainid != ' '))
821 printf(" ch %c", ri->chainid);
823 if (pdba->pdbinfo)
825 if (pdba->pdbinfo[i].atomnr)
827 printf(" pdb nr %4d", pdba->pdbinfo[i].atomnr);
829 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc != ' '))
831 printf(" altloc %c", pdba->pdbinfo[i].altloc);
834 printf("\n");
836 pdba->nr--;
837 /* We can not free, since it might be in the symtab */
838 /* sfree(pdba->atomname[i]); */
839 for (j = i; j < pdba->nr; j++)
841 pdba->atom[j] = pdba->atom[j+1];
842 pdba->atomname[j] = pdba->atomname[j+1];
843 if (pdba->pdbinfo)
845 pdba->pdbinfo[j] = pdba->pdbinfo[j+1];
847 copy_rvec(x[j+1], x[j]);
849 srenew(pdba->atom, pdba->nr);
850 /* srenew(pdba->atomname, pdba->nr); */
851 srenew(pdba->pdbinfo, pdba->nr);
854 if (pdba->nr != oldnatoms)
856 printf("Now there are %d atoms. Deleted %d duplicates.\n", pdba->nr, ndel);
859 return pdba->nr;
862 void find_nc_ter(t_atoms *pdba, int r0, int r1, int *r_start, int *r_end,
863 gmx_residuetype_t *rt)
865 int i;
866 const char *p_startrestype;
867 const char *p_restype;
868 int nstartwarn, nendwarn;
870 *r_start = -1;
871 *r_end = -1;
873 nstartwarn = 0;
874 nendwarn = 0;
876 /* Find the starting terminus (typially N or 5') */
877 for (i = r0; i < r1 && *r_start == -1; i++)
879 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_startrestype);
880 if (!gmx_strcasecmp(p_startrestype, "Protein") || !gmx_strcasecmp(p_startrestype, "DNA") || !gmx_strcasecmp(p_startrestype, "RNA") )
882 printf("Identified residue %s%d as a starting terminus.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
883 *r_start = i;
885 else
887 if (nstartwarn < 5)
889 printf("Warning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
891 if (nstartwarn == 5)
893 printf("More than 5 unidentified residues at start of chain - disabling further warnings.\n");
895 nstartwarn++;
899 if (*r_start >= 0)
901 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
902 for (i = *r_start; i < r1; i++)
904 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_restype);
905 if (!gmx_strcasecmp(p_restype, p_startrestype) && nendwarn == 0)
907 *r_end = i;
909 else
911 if (nendwarn < 5)
913 printf("Warning: Residue %s%d in chain has different type (%s) from starting residue %s%d (%s).\n",
914 *pdba->resinfo[i].name, pdba->resinfo[i].nr, p_restype,
915 *pdba->resinfo[*r_start].name, pdba->resinfo[*r_start].nr, p_startrestype);
917 if (nendwarn == 5)
919 printf("More than 5 unidentified residues at end of chain - disabling further warnings.\n");
921 nendwarn++;
926 if (*r_end >= 0)
928 printf("Identified residue %s%d as a ending terminus.\n", *pdba->resinfo[*r_end].name, pdba->resinfo[*r_end].nr);
933 enum SplittingType
935 SPLIT_ID_OR_TER,
936 SPLIT_ID_AND_TER,
937 SPLIT_ID_ONLY,
938 SPLIT_TER_ONLY,
939 SPLIT_INTERACTIVE
942 static SplittingType getSplittingType(const char *chainsep)
944 SplittingType splitting = SPLIT_TER_ONLY; /* keep compiler happy */
946 /* Be a bit flexible to catch typos */
947 if (!strncmp(chainsep, "id_o", 4))
949 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
950 splitting = SPLIT_ID_OR_TER;
951 printf("Splitting chemical chains based on TER records or chain id changing.\n");
953 else if (!strncmp(chainsep, "int", 3))
955 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
956 splitting = SPLIT_INTERACTIVE;
957 printf("Splitting chemical chains interactively.\n");
959 else if (!strncmp(chainsep, "id_a", 4))
961 splitting = SPLIT_ID_AND_TER;
962 printf("Splitting chemical chains based on TER records and chain id changing.\n");
964 else if (strlen(chainsep) == 2 && !strncmp(chainsep, "id", 4))
966 splitting = SPLIT_ID_ONLY;
967 printf("Splitting chemical chains based on changing chain id only (ignoring TER records).\n");
969 else if (chainsep[0] == 't')
971 splitting = SPLIT_TER_ONLY;
972 printf("Splitting chemical chains based on TER records only (ignoring chain id).\n");
974 else
976 gmx_fatal(FARGS, "Unidentified setting for chain separation: %s\n", chainsep);
978 return splitting;
981 static void
982 modify_chain_numbers(t_atoms * pdba,
983 const char * chainsep)
985 int i;
986 char old_prev_chainid;
987 char old_this_chainid;
988 int old_prev_chainnum;
989 int old_this_chainnum;
990 t_resinfo *ri;
991 char select[STRLEN];
992 int new_chainnum;
993 int this_atomnum;
994 int prev_atomnum;
995 const char * prev_atomname;
996 const char * this_atomname;
997 const char * prev_resname;
998 const char * this_resname;
999 int prev_resnum;
1000 int this_resnum;
1001 char prev_chainid;
1002 char this_chainid;
1004 SplittingType splitting = getSplittingType(chainsep);
1006 /* The default chain enumeration is based on TER records only, which is reflected in chainnum below */
1008 old_prev_chainid = '?';
1009 old_prev_chainnum = -1;
1010 new_chainnum = -1;
1012 this_atomname = nullptr;
1013 this_atomnum = -1;
1014 this_resname = nullptr;
1015 this_resnum = -1;
1016 this_chainid = '?';
1018 for (i = 0; i < pdba->nres; i++)
1020 ri = &pdba->resinfo[i];
1021 old_this_chainid = ri->chainid;
1022 old_this_chainnum = ri->chainnum;
1024 prev_atomname = this_atomname;
1025 prev_atomnum = this_atomnum;
1026 prev_resname = this_resname;
1027 prev_resnum = this_resnum;
1028 prev_chainid = this_chainid;
1030 this_atomname = *(pdba->atomname[i]);
1031 this_atomnum = (pdba->pdbinfo != nullptr) ? pdba->pdbinfo[i].atomnr : i+1;
1032 this_resname = *ri->name;
1033 this_resnum = ri->nr;
1034 this_chainid = ri->chainid;
1036 switch (splitting)
1038 case SPLIT_ID_OR_TER:
1039 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1041 new_chainnum++;
1043 break;
1045 case SPLIT_ID_AND_TER:
1046 if (old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
1048 new_chainnum++;
1050 break;
1052 case SPLIT_ID_ONLY:
1053 if (old_this_chainid != old_prev_chainid)
1055 new_chainnum++;
1057 break;
1059 case SPLIT_TER_ONLY:
1060 if (old_this_chainnum != old_prev_chainnum)
1062 new_chainnum++;
1064 break;
1065 case SPLIT_INTERACTIVE:
1066 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1068 if (i > 0)
1070 printf("Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\n"
1071 "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]\n",
1072 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1073 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1075 if (nullptr == fgets(select, STRLEN-1, stdin))
1077 gmx_fatal(FARGS, "Error reading from stdin");
1080 if (i == 0 || select[0] == 'y')
1082 new_chainnum++;
1085 break;
1086 default:
1087 gmx_fatal(FARGS, "Internal inconsistency - this shouldn't happen...");
1088 break;
1090 old_prev_chainid = old_this_chainid;
1091 old_prev_chainnum = old_this_chainnum;
1093 ri->chainnum = new_chainnum;
1098 typedef struct {
1099 char chainid;
1100 char chainnum;
1101 int start;
1102 int natom;
1103 gmx_bool bAllWat;
1104 int nterpairs;
1105 int *chainstart;
1106 } t_pdbchain;
1108 typedef struct {
1109 char chainid;
1110 int chainnum;
1111 gmx_bool bAllWat;
1112 int nterpairs;
1113 int *chainstart;
1114 t_hackblock **ntdb;
1115 t_hackblock **ctdb;
1116 int *r_start;
1117 int *r_end;
1118 t_atoms *pdba;
1119 rvec *x;
1120 } t_chain;
1122 int gmx_pdb2gmx(int argc, char *argv[])
1124 const char *desc[] = {
1125 "[THISMODULE] reads a [REF].pdb[ref] (or [REF].gro[ref]) file, reads",
1126 "some database files, adds hydrogens to the molecules and generates",
1127 "coordinates in GROMACS (GROMOS), or optionally [REF].pdb[ref], format",
1128 "and a topology in GROMACS format.",
1129 "These files can subsequently be processed to generate a run input file.",
1130 "[PAR]",
1131 "[THISMODULE] will search for force fields by looking for",
1132 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1133 "of the current working directory and of the GROMACS library directory",
1134 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1135 "variable.",
1136 "By default the forcefield selection is interactive,",
1137 "but you can use the [TT]-ff[tt] option to specify one of the short names",
1138 "in the list on the command line instead. In that case [THISMODULE] just looks",
1139 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1140 "[PAR]",
1141 "After choosing a force field, all files will be read only from",
1142 "the corresponding force field directory.",
1143 "If you want to modify or add a residue types, you can copy the force",
1144 "field directory from the GROMACS library directory to your current",
1145 "working directory. If you want to add new protein residue types,",
1146 "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1147 "or copy the whole library directory to a local directory and set",
1148 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1149 "Check Chapter 5 of the manual for more information about file formats.",
1150 "[PAR]",
1152 "Note that a [REF].pdb[ref] file is nothing more than a file format, and it",
1153 "need not necessarily contain a protein structure. Every kind of",
1154 "molecule for which there is support in the database can be converted.",
1155 "If there is no support in the database, you can add it yourself.[PAR]",
1157 "The program has limited intelligence, it reads a number of database",
1158 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1159 "if necessary this can be done manually. The program can prompt the",
1160 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1161 "desired. For Lys the choice is between neutral (two protons on NZ) or",
1162 "protonated (three protons, default), for Asp and Glu unprotonated",
1163 "(default) or protonated, for His the proton can be either on ND1,",
1164 "on NE2 or on both. By default these selections are done automatically.",
1165 "For His, this is based on an optimal hydrogen bonding",
1166 "conformation. Hydrogen bonds are defined based on a simple geometric",
1167 "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1168 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1169 "[TT]-dist[tt] respectively.[PAR]",
1171 "The protonation state of N- and C-termini can be chosen interactively",
1172 "with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
1173 "respectively. Some force fields support zwitterionic forms for chains of",
1174 "one residue, but for polypeptides these options should NOT be selected.",
1175 "The AMBER force fields have unique forms for the terminal residues,",
1176 "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1177 "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1178 "respectively to use these forms, making sure you preserve the format",
1179 "of the coordinate file. Alternatively, use named terminating residues",
1180 "(e.g. ACE, NME).[PAR]",
1182 "The separation of chains is not entirely trivial since the markup",
1183 "in user-generated PDB files frequently varies and sometimes it",
1184 "is desirable to merge entries across a TER record, for instance",
1185 "if you want a disulfide bridge or distance restraints between",
1186 "two protein chains or if you have a HEME group bound to a protein.",
1187 "In such cases multiple chains should be contained in a single",
1188 "[TT]moleculetype[tt] definition.",
1189 "To handle this, [THISMODULE] uses two separate options.",
1190 "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1191 "start, and termini added when applicable. This can be done based on the",
1192 "existence of TER records, when the chain id changes, or combinations of either",
1193 "or both of these. You can also do the selection fully interactively.",
1194 "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1195 "are merged into one moleculetype, after adding all the chemical termini (or not).",
1196 "This can be turned off (no merging), all non-water chains can be merged into a",
1197 "single molecule, or the selection can be done interactively.[PAR]",
1199 "[THISMODULE] will also check the occupancy field of the [REF].pdb[ref] file.",
1200 "If any of the occupancies are not one, indicating that the atom is",
1201 "not resolved well in the structure, a warning message is issued.",
1202 "When a [REF].pdb[ref] file does not originate from an X-ray structure determination",
1203 "all occupancy fields may be zero. Either way, it is up to the user",
1204 "to verify the correctness of the input data (read the article!).[PAR]",
1206 "During processing the atoms will be reordered according to GROMACS",
1207 "conventions. With [TT]-n[tt] an index file can be generated that",
1208 "contains one group reordered in the same way. This allows you to",
1209 "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1210 "one limitation: reordering is done after the hydrogens are stripped",
1211 "from the input and before new hydrogens are added. This means that",
1212 "you should not use [TT]-ignh[tt].[PAR]",
1214 "The [REF].gro[ref] and [TT].g96[tt] file formats do not support chain",
1215 "identifiers. Therefore it is useful to enter a [REF].pdb[ref] file name at",
1216 "the [TT]-o[tt] option when you want to convert a multi-chain [REF].pdb[ref] file.",
1217 "[PAR]",
1219 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1220 "motions. Angular and out-of-plane motions can be removed by changing",
1221 "hydrogens into virtual sites and fixing angles, which fixes their",
1222 "position relative to neighboring atoms. Additionally, all atoms in the",
1223 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1224 "can be converted into virtual sites, eliminating the fast improper dihedral",
1225 "fluctuations in these rings. [BB]Note[bb] that in this case all other hydrogen",
1226 "atoms are also converted to virtual sites. The mass of all atoms that are",
1227 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1228 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1229 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1230 "done for water hydrogens to slow down the rotational motion of water.",
1231 "The increase in mass of the hydrogens is subtracted from the bonded",
1232 "(heavy) atom so that the total mass of the system remains the same."
1236 FILE *fp, *top_file, *top_file2, *itp_file = nullptr;
1237 int natom, nres;
1238 t_atoms pdba_all, *pdba;
1239 t_atoms *atoms;
1240 t_resinfo *ri;
1241 t_blocka *block;
1242 int chain, nch, maxch, nwaterchain;
1243 t_pdbchain *pdb_ch;
1244 t_chain *chains, *cc;
1245 char select[STRLEN];
1246 int nincl, nmol;
1247 char **incls;
1248 t_mols *mols;
1249 char **gnames;
1250 int ePBC;
1251 matrix box;
1252 rvec box_space;
1253 int i, j, k, l, nrtp;
1254 int *swap_index, si;
1255 t_restp *restp;
1256 t_hackblock *ah;
1257 t_symtab symtab;
1258 gpp_atomtype_t atype;
1259 gmx_residuetype_t*rt;
1260 const char *top_fn;
1261 char fn[256], itp_fn[STRLEN], posre_fn[STRLEN], buf_fn[STRLEN];
1262 char molname[STRLEN], title[STRLEN];
1263 char *c, forcefield[STRLEN], ffdir[STRLEN];
1264 char ffname[STRLEN], suffix[STRLEN], buf[STRLEN];
1265 char *watermodel;
1266 const char *watres;
1267 int nrtpf;
1268 char **rtpf;
1269 int nrrn;
1270 char **rrn;
1271 int nrtprename;
1272 rtprename_t *rtprename = nullptr;
1273 int nah, nNtdb, nCtdb, ntdblist;
1274 t_hackblock *ntdb, *ctdb, **tdblist;
1275 int nssbonds;
1276 t_ssbond *ssbonds;
1277 rvec *pdbx, *x;
1278 gmx_bool bVsites = FALSE, bWat, bPrevWat = FALSE, bITP, bVsiteAromatics = FALSE;
1279 real mHmult = 0;
1280 t_hackblock *hb_chain;
1281 t_restp *restp_chain;
1282 gmx_output_env_t *oenv;
1283 const char *p_restype;
1284 int rc;
1285 int this_atomnum;
1286 int prev_atomnum;
1287 const char * prev_atomname;
1288 const char * this_atomname;
1289 const char * prev_resname;
1290 const char * this_resname;
1291 int prev_resnum;
1292 int this_resnum;
1293 char prev_chainid;
1294 char this_chainid;
1295 int prev_chainnumber;
1296 int this_chainnumber;
1297 int nid_used;
1298 int this_chainstart;
1299 int prev_chainstart;
1300 gmx_bool bMerged;
1301 int nchainmerges;
1303 gmx_atomprop_t aps;
1305 t_filenm fnm[] = {
1306 { efSTX, "-f", "eiwit.pdb", ffREAD },
1307 { efSTO, "-o", "conf", ffWRITE },
1308 { efTOP, nullptr, nullptr, ffWRITE },
1309 { efITP, "-i", "posre", ffWRITE },
1310 { efNDX, "-n", "clean", ffOPTWR },
1311 { efSTO, "-q", "clean.pdb", ffOPTWR }
1313 #define NFILE asize(fnm)
1316 /* Command line arguments must be static */
1317 static gmx_bool bNewRTP = FALSE;
1318 static gmx_bool bInter = FALSE, bCysMan = FALSE;
1319 static gmx_bool bLysMan = FALSE, bAspMan = FALSE, bGluMan = FALSE, bHisMan = FALSE;
1320 static gmx_bool bGlnMan = FALSE, bArgMan = FALSE;
1321 static gmx_bool bTerMan = FALSE, bUnA = FALSE, bHeavyH;
1322 static gmx_bool bSort = TRUE, bAllowMissing = FALSE, bRemoveH = FALSE;
1323 static gmx_bool bDeuterate = FALSE, bVerbose = FALSE, bChargeGroups = TRUE, bCmap = TRUE;
1324 static gmx_bool bRenumRes = FALSE, bRTPresname = FALSE;
1325 static real angle = 135.0, distance = 0.3, posre_fc = 1000;
1326 static real long_bond_dist = 0.25, short_bond_dist = 0.05;
1327 static const char *vsitestr[] = { nullptr, "none", "hydrogens", "aromatics", nullptr };
1328 static const char *watstr[] = { nullptr, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", nullptr };
1329 static const char *chainsep[] = { nullptr, "id_or_ter", "id_and_ter", "ter", "id", "interactive", nullptr };
1330 static const char *merge[] = {nullptr, "no", "all", "interactive", nullptr };
1331 static const char *ff = "select";
1333 t_pargs pa[] = {
1334 { "-newrtp", FALSE, etBOOL, {&bNewRTP},
1335 "HIDDENWrite the residue database in new format to [TT]new.rtp[tt]"},
1336 { "-lb", FALSE, etREAL, {&long_bond_dist},
1337 "HIDDENLong bond warning distance" },
1338 { "-sb", FALSE, etREAL, {&short_bond_dist},
1339 "HIDDENShort bond warning distance" },
1340 { "-chainsep", FALSE, etENUM, {chainsep},
1341 "Condition in PDB files when a new chain should be started (adding termini)" },
1342 { "-merge", FALSE, etENUM, {&merge},
1343 "Merge multiple chains into a single [moleculetype]" },
1344 { "-ff", FALSE, etSTR, {&ff},
1345 "Force field, interactive by default. Use [TT]-h[tt] for information." },
1346 { "-water", FALSE, etENUM, {watstr},
1347 "Water model to use" },
1348 { "-inter", FALSE, etBOOL, {&bInter},
1349 "Set the next 8 options to interactive"},
1350 { "-ss", FALSE, etBOOL, {&bCysMan},
1351 "Interactive SS bridge selection" },
1352 { "-ter", FALSE, etBOOL, {&bTerMan},
1353 "Interactive termini selection, instead of charged (default)" },
1354 { "-lys", FALSE, etBOOL, {&bLysMan},
1355 "Interactive lysine selection, instead of charged" },
1356 { "-arg", FALSE, etBOOL, {&bArgMan},
1357 "Interactive arginine selection, instead of charged" },
1358 { "-asp", FALSE, etBOOL, {&bAspMan},
1359 "Interactive aspartic acid selection, instead of charged" },
1360 { "-glu", FALSE, etBOOL, {&bGluMan},
1361 "Interactive glutamic acid selection, instead of charged" },
1362 { "-gln", FALSE, etBOOL, {&bGlnMan},
1363 "Interactive glutamine selection, instead of neutral" },
1364 { "-his", FALSE, etBOOL, {&bHisMan},
1365 "Interactive histidine selection, instead of checking H-bonds" },
1366 { "-angle", FALSE, etREAL, {&angle},
1367 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
1368 { "-dist", FALSE, etREAL, {&distance},
1369 "Maximum donor-acceptor distance for a H-bond (nm)" },
1370 { "-una", FALSE, etBOOL, {&bUnA},
1371 "Select aromatic rings with united CH atoms on phenylalanine, "
1372 "tryptophane and tyrosine" },
1373 { "-sort", FALSE, etBOOL, {&bSort},
1374 "HIDDENSort the residues according to database, turning this off is dangerous as charge groups might be broken in parts" },
1375 { "-ignh", FALSE, etBOOL, {&bRemoveH},
1376 "Ignore hydrogen atoms that are in the coordinate file" },
1377 { "-missing", FALSE, etBOOL, {&bAllowMissing},
1378 "Continue when atoms are missing, dangerous" },
1379 { "-v", FALSE, etBOOL, {&bVerbose},
1380 "Be slightly more verbose in messages" },
1381 { "-posrefc", FALSE, etREAL, {&posre_fc},
1382 "Force constant for position restraints" },
1383 { "-vsite", FALSE, etENUM, {vsitestr},
1384 "Convert atoms to virtual sites" },
1385 { "-heavyh", FALSE, etBOOL, {&bHeavyH},
1386 "Make hydrogen atoms heavy" },
1387 { "-deuterate", FALSE, etBOOL, {&bDeuterate},
1388 "Change the mass of hydrogens to 2 amu" },
1389 { "-chargegrp", TRUE, etBOOL, {&bChargeGroups},
1390 "Use charge groups in the [REF].rtp[ref] file" },
1391 { "-cmap", TRUE, etBOOL, {&bCmap},
1392 "Use cmap torsions (if enabled in the [REF].rtp[ref] file)" },
1393 { "-renum", TRUE, etBOOL, {&bRenumRes},
1394 "Renumber the residues consecutively in the output" },
1395 { "-rtpres", TRUE, etBOOL, {&bRTPresname},
1396 "Use [REF].rtp[ref] entry names as residue names" }
1398 #define NPARGS asize(pa)
1400 if (!parse_common_args(&argc, argv, 0, NFILE, fnm, asize(pa), pa, asize(desc), desc,
1401 0, nullptr, &oenv))
1403 return 0;
1406 /* Force field selection, interactive or direct */
1407 choose_ff(strcmp(ff, "select") == 0 ? nullptr : ff,
1408 forcefield, sizeof(forcefield),
1409 ffdir, sizeof(ffdir));
1411 if (strlen(forcefield) > 0)
1413 strcpy(ffname, forcefield);
1414 ffname[0] = toupper(ffname[0]);
1416 else
1418 gmx_fatal(FARGS, "Empty forcefield string");
1421 printf("\nUsing the %s force field in directory %s\n\n",
1422 ffname, ffdir);
1424 choose_watermodel(watstr[0], ffdir, &watermodel);
1426 if (bInter)
1428 /* if anything changes here, also change description of -inter */
1429 bCysMan = TRUE;
1430 bTerMan = TRUE;
1431 bLysMan = TRUE;
1432 bArgMan = TRUE;
1433 bAspMan = TRUE;
1434 bGluMan = TRUE;
1435 bGlnMan = TRUE;
1436 bHisMan = TRUE;
1439 if (bHeavyH)
1441 mHmult = 4.0;
1443 else if (bDeuterate)
1445 mHmult = 2.0;
1447 else
1449 mHmult = 1.0;
1452 /* parse_common_args ensures vsitestr has been selected, but
1453 clang-static-analyzer needs clues to know that */
1454 GMX_ASSERT(vsitestr[0], "-vsite default wasn't processed correctly");
1455 switch (vsitestr[0][0])
1457 case 'n': /* none */
1458 bVsites = FALSE;
1459 bVsiteAromatics = FALSE;
1460 break;
1461 case 'h': /* hydrogens */
1462 bVsites = TRUE;
1463 bVsiteAromatics = FALSE;
1464 break;
1465 case 'a': /* aromatics */
1466 bVsites = TRUE;
1467 bVsiteAromatics = TRUE;
1468 break;
1469 default:
1470 gmx_fatal(FARGS, "DEATH HORROR in $s (%d): vsitestr[0]='%s'",
1471 __FILE__, __LINE__, vsitestr[0]);
1472 } /* end switch */
1474 /* Open the symbol table */
1475 open_symtab(&symtab);
1477 /* Residue type database */
1478 gmx_residuetype_init(&rt);
1480 /* Read residue renaming database(s), if present */
1481 nrrn = fflib_search_file_end(ffdir, ".r2b", FALSE, &rrn);
1483 nrtprename = 0;
1484 rtprename = nullptr;
1485 for (i = 0; i < nrrn; i++)
1487 fp = fflib_open(rrn[i]);
1488 read_rtprename(rrn[i], fp, &nrtprename, &rtprename);
1489 gmx_ffclose(fp);
1490 sfree(rrn[i]);
1492 sfree(rrn);
1494 /* Add all alternative names from the residue renaming database to the list of recognized amino/nucleic acids. */
1495 for (i = 0; i < nrtprename; i++)
1497 rc = gmx_residuetype_get_type(rt, rtprename[i].gmx, &p_restype);
1499 /* Only add names if the 'standard' gromacs/iupac base name was found */
1500 if (rc == 0)
1502 gmx_residuetype_add(rt, rtprename[i].main, p_restype);
1503 gmx_residuetype_add(rt, rtprename[i].nter, p_restype);
1504 gmx_residuetype_add(rt, rtprename[i].cter, p_restype);
1505 gmx_residuetype_add(rt, rtprename[i].bter, p_restype);
1509 clear_mat(box);
1510 if (watermodel != nullptr && (strstr(watermodel, "4p") ||
1511 strstr(watermodel, "4P")))
1513 watres = "HO4";
1515 else if (watermodel != nullptr && (strstr(watermodel, "5p") ||
1516 strstr(watermodel, "5P")))
1518 watres = "HO5";
1520 else
1522 watres = "HOH";
1525 aps = gmx_atomprop_init();
1526 natom = read_pdball(opt2fn("-f", NFILE, fnm), opt2fn_null("-q", NFILE, fnm), title,
1527 &pdba_all, &pdbx, &ePBC, box, bRemoveH, &symtab, rt, watres,
1528 aps, bVerbose);
1530 if (natom == 0)
1532 gmx_fatal(FARGS, "No atoms found in pdb file %s\n", opt2fn("-f", NFILE, fnm));
1535 printf("Analyzing pdb file\n");
1536 nwaterchain = 0;
1538 modify_chain_numbers(&pdba_all, chainsep[0]);
1540 nchainmerges = 0;
1542 this_atomname = nullptr;
1543 this_atomnum = -1;
1544 this_resname = nullptr;
1545 this_resnum = -1;
1546 this_chainid = '?';
1547 this_chainnumber = -1;
1548 this_chainstart = 0;
1549 /* Keep the compiler happy */
1550 prev_chainstart = 0;
1552 nch = 0;
1553 maxch = 16;
1554 snew(pdb_ch, maxch);
1556 bMerged = FALSE;
1557 for (i = 0; (i < natom); i++)
1559 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1561 /* TODO this should live in a helper object, and consolidate
1562 that with code in modify_chain_numbers */
1563 prev_atomname = this_atomname;
1564 prev_atomnum = this_atomnum;
1565 prev_resname = this_resname;
1566 prev_resnum = this_resnum;
1567 prev_chainid = this_chainid;
1568 prev_chainnumber = this_chainnumber;
1569 if (!bMerged)
1571 prev_chainstart = this_chainstart;
1574 this_atomname = *pdba_all.atomname[i];
1575 this_atomnum = (pdba_all.pdbinfo != nullptr) ? pdba_all.pdbinfo[i].atomnr : i+1;
1576 this_resname = *ri->name;
1577 this_resnum = ri->nr;
1578 this_chainid = ri->chainid;
1579 this_chainnumber = ri->chainnum;
1581 bWat = gmx_strcasecmp(*ri->name, watres) == 0;
1582 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat != bPrevWat))
1584 this_chainstart = pdba_all.atom[i].resind;
1586 bMerged = FALSE;
1587 if (i > 0 && !bWat)
1589 if (!strncmp(merge[0], "int", 3))
1591 printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) and chain starting with\n"
1592 "residue %s%d (chain id '%c', atom %d %s) into a single moleculetype (keeping termini)? [n/y]\n",
1593 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1594 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1596 if (nullptr == fgets(select, STRLEN-1, stdin))
1598 gmx_fatal(FARGS, "Error reading from stdin");
1600 bMerged = (select[0] == 'y');
1602 else if (!strncmp(merge[0], "all", 3))
1604 bMerged = TRUE;
1608 if (bMerged)
1610 pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] =
1611 pdba_all.atom[i].resind - prev_chainstart;
1612 pdb_ch[nch-1].nterpairs++;
1613 srenew(pdb_ch[nch-1].chainstart, pdb_ch[nch-1].nterpairs+1);
1614 nchainmerges++;
1616 else
1618 /* set natom for previous chain */
1619 if (nch > 0)
1621 pdb_ch[nch-1].natom = i-pdb_ch[nch-1].start;
1623 if (bWat)
1625 nwaterchain++;
1626 ri->chainid = ' ';
1628 /* check if chain identifier was used before */
1629 for (j = 0; (j < nch); j++)
1631 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
1633 printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1634 "They will be treated as separate chains unless you reorder your file.\n",
1635 ri->chainid);
1638 // TODO This is too convoluted. Use a std::vector
1639 if (nch == maxch)
1641 maxch += 16;
1642 srenew(pdb_ch, maxch);
1644 pdb_ch[nch].chainid = ri->chainid;
1645 pdb_ch[nch].chainnum = ri->chainnum;
1646 pdb_ch[nch].start = i;
1647 pdb_ch[nch].bAllWat = bWat;
1648 if (bWat)
1650 pdb_ch[nch].nterpairs = 0;
1652 else
1654 pdb_ch[nch].nterpairs = 1;
1656 snew(pdb_ch[nch].chainstart, pdb_ch[nch].nterpairs+1);
1657 /* modified [nch] to [0] below */
1658 pdb_ch[nch].chainstart[0] = 0;
1659 nch++;
1662 bPrevWat = bWat;
1664 pdb_ch[nch-1].natom = natom-pdb_ch[nch-1].start;
1666 /* set all the water blocks at the end of the chain */
1667 snew(swap_index, nch);
1668 j = 0;
1669 for (i = 0; i < nch; i++)
1671 if (!pdb_ch[i].bAllWat)
1673 swap_index[j] = i;
1674 j++;
1677 for (i = 0; i < nch; i++)
1679 if (pdb_ch[i].bAllWat)
1681 swap_index[j] = i;
1682 j++;
1685 if (nwaterchain > 1)
1687 printf("Moved all the water blocks to the end\n");
1690 snew(chains, nch);
1691 /* copy pdb data and x for all chains */
1692 for (i = 0; (i < nch); i++)
1694 si = swap_index[i];
1695 chains[i].chainid = pdb_ch[si].chainid;
1696 chains[i].chainnum = pdb_ch[si].chainnum;
1697 chains[i].bAllWat = pdb_ch[si].bAllWat;
1698 chains[i].nterpairs = pdb_ch[si].nterpairs;
1699 chains[i].chainstart = pdb_ch[si].chainstart;
1700 snew(chains[i].ntdb, pdb_ch[si].nterpairs);
1701 snew(chains[i].ctdb, pdb_ch[si].nterpairs);
1702 snew(chains[i].r_start, pdb_ch[si].nterpairs);
1703 snew(chains[i].r_end, pdb_ch[si].nterpairs);
1705 snew(chains[i].pdba, 1);
1706 init_t_atoms(chains[i].pdba, pdb_ch[si].natom, TRUE);
1707 snew(chains[i].x, chains[i].pdba->nr);
1708 for (j = 0; j < chains[i].pdba->nr; j++)
1710 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start+j];
1711 snew(chains[i].pdba->atomname[j], 1);
1712 *chains[i].pdba->atomname[j] =
1713 gmx_strdup(*pdba_all.atomname[pdb_ch[si].start+j]);
1714 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start+j];
1715 copy_rvec(pdbx[pdb_ch[si].start+j], chains[i].x[j]);
1717 /* Re-index the residues assuming that the indices are continuous */
1718 k = chains[i].pdba->atom[0].resind;
1719 nres = chains[i].pdba->atom[chains[i].pdba->nr-1].resind - k + 1;
1720 chains[i].pdba->nres = nres;
1721 for (j = 0; j < chains[i].pdba->nr; j++)
1723 chains[i].pdba->atom[j].resind -= k;
1725 srenew(chains[i].pdba->resinfo, nres);
1726 for (j = 0; j < nres; j++)
1728 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k+j];
1729 snew(chains[i].pdba->resinfo[j].name, 1);
1730 *chains[i].pdba->resinfo[j].name = gmx_strdup(*pdba_all.resinfo[k+j].name);
1731 /* make all chain identifiers equal to that of the chain */
1732 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
1736 if (nchainmerges > 0)
1738 printf("\nMerged chains into joint molecule definitions at %d places.\n\n",
1739 nchainmerges);
1742 printf("There are %d chains and %d blocks of water and "
1743 "%d residues with %d atoms\n",
1744 nch-nwaterchain, nwaterchain,
1745 pdba_all.nres, natom);
1747 printf("\n %5s %4s %6s\n", "chain", "#res", "#atoms");
1748 for (i = 0; (i < nch); i++)
1750 printf(" %d '%c' %5d %6d %s\n",
1751 i+1, chains[i].chainid ? chains[i].chainid : '-',
1752 chains[i].pdba->nres, chains[i].pdba->nr,
1753 chains[i].bAllWat ? "(only water)" : "");
1755 printf("\n");
1757 check_occupancy(&pdba_all, opt2fn("-f", NFILE, fnm), bVerbose);
1759 /* Read atomtypes... */
1760 atype = read_atype(ffdir, &symtab);
1762 /* read residue database */
1763 printf("Reading residue database... (%s)\n", forcefield);
1764 nrtpf = fflib_search_file_end(ffdir, ".rtp", TRUE, &rtpf);
1765 nrtp = 0;
1766 restp = nullptr;
1767 for (i = 0; i < nrtpf; i++)
1769 read_resall(rtpf[i], &nrtp, &restp, atype, &symtab, FALSE);
1770 sfree(rtpf[i]);
1772 sfree(rtpf);
1773 if (bNewRTP)
1775 /* Not correct with multiple rtp input files with different bonded types */
1776 fp = gmx_fio_fopen("new.rtp", "w");
1777 print_resall(fp, nrtp, restp, atype);
1778 gmx_fio_fclose(fp);
1781 /* read hydrogen database */
1782 nah = read_h_db(ffdir, &ah);
1784 /* Read Termini database... */
1785 nNtdb = read_ter_db(ffdir, 'n', &ntdb, atype);
1786 nCtdb = read_ter_db(ffdir, 'c', &ctdb, atype);
1788 top_fn = ftp2fn(efTOP, NFILE, fnm);
1789 top_file = gmx_fio_fopen(top_fn, "w");
1791 print_top_header(top_file, top_fn, FALSE, ffdir, mHmult);
1793 nincl = 0;
1794 nmol = 0;
1795 incls = nullptr;
1796 mols = nullptr;
1797 for (chain = 0; (chain < nch); chain++)
1799 cc = &(chains[chain]);
1801 /* set pdba, natom and nres to the current chain */
1802 pdba = cc->pdba;
1803 x = cc->x;
1804 natom = cc->pdba->nr;
1805 nres = cc->pdba->nres;
1807 if (cc->chainid && ( cc->chainid != ' ' ) )
1809 printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1810 chain+1, cc->chainid, natom, nres);
1812 else
1814 printf("Processing chain %d (%d atoms, %d residues)\n",
1815 chain+1, natom, nres);
1818 process_chain(pdba, x, bUnA, bUnA, bUnA, bLysMan, bAspMan, bGluMan,
1819 bHisMan, bArgMan, bGlnMan, angle, distance, &symtab,
1820 nrtprename, rtprename);
1822 cc->chainstart[cc->nterpairs] = pdba->nres;
1823 j = 0;
1824 for (i = 0; i < cc->nterpairs; i++)
1826 find_nc_ter(pdba, cc->chainstart[i], cc->chainstart[i+1],
1827 &(cc->r_start[j]), &(cc->r_end[j]), rt);
1829 if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
1831 j++;
1834 cc->nterpairs = j;
1835 if (cc->nterpairs == 0)
1837 printf("Problem with chain definition, or missing terminal residues.\n"
1838 "This chain does not appear to contain a recognized chain molecule.\n"
1839 "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
1842 /* Check for disulfides and other special bonds */
1843 nssbonds = mk_specbonds(pdba, x, bCysMan, &ssbonds, bVerbose);
1845 if (nrtprename > 0)
1847 rename_resrtp(pdba, cc->nterpairs, cc->r_start, cc->r_end, nrtprename, rtprename,
1848 &symtab, bVerbose);
1851 if (debug)
1853 if (nch == 1)
1855 sprintf(fn, "chain.pdb");
1857 else
1859 sprintf(fn, "chain_%c%d.pdb", cc->chainid, cc->chainnum);
1861 write_sto_conf(fn, title, pdba, x, nullptr, ePBC, box);
1865 for (i = 0; i < cc->nterpairs; i++)
1868 /* Set termini.
1869 * We first apply a filter so we only have the
1870 * termini that can be applied to the residue in question
1871 * (or a generic terminus if no-residue specific is available).
1873 /* First the N terminus */
1874 if (nNtdb > 0)
1876 tdblist = filter_ter(nrtp, restp, nNtdb, ntdb,
1877 *pdba->resinfo[cc->r_start[i]].name,
1878 *pdba->resinfo[cc->r_start[i]].rtp,
1879 &ntdblist);
1880 if (ntdblist == 0)
1882 printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
1883 "is already in a terminus-specific form and skipping terminus selection.\n");
1884 cc->ntdb[i] = nullptr;
1886 else
1888 if (bTerMan && ntdblist > 1)
1890 sprintf(select, "Select start terminus type for %s-%d",
1891 *pdba->resinfo[cc->r_start[i]].name,
1892 pdba->resinfo[cc->r_start[i]].nr);
1893 cc->ntdb[i] = choose_ter(ntdblist, tdblist, select);
1895 else
1897 cc->ntdb[i] = tdblist[0];
1900 printf("Start terminus %s-%d: %s\n",
1901 *pdba->resinfo[cc->r_start[i]].name,
1902 pdba->resinfo[cc->r_start[i]].nr,
1903 (cc->ntdb[i])->name);
1904 sfree(tdblist);
1907 else
1909 cc->ntdb[i] = nullptr;
1912 /* And the C terminus */
1913 if (nCtdb > 0)
1915 tdblist = filter_ter(nrtp, restp, nCtdb, ctdb,
1916 *pdba->resinfo[cc->r_end[i]].name,
1917 *pdba->resinfo[cc->r_end[i]].rtp,
1918 &ntdblist);
1919 if (ntdblist == 0)
1921 printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
1922 "is already in a terminus-specific form and skipping terminus selection.\n");
1923 cc->ctdb[i] = nullptr;
1925 else
1927 if (bTerMan && ntdblist > 1)
1929 sprintf(select, "Select end terminus type for %s-%d",
1930 *pdba->resinfo[cc->r_end[i]].name,
1931 pdba->resinfo[cc->r_end[i]].nr);
1932 cc->ctdb[i] = choose_ter(ntdblist, tdblist, select);
1934 else
1936 cc->ctdb[i] = tdblist[0];
1938 printf("End terminus %s-%d: %s\n",
1939 *pdba->resinfo[cc->r_end[i]].name,
1940 pdba->resinfo[cc->r_end[i]].nr,
1941 (cc->ctdb[i])->name);
1942 sfree(tdblist);
1945 else
1947 cc->ctdb[i] = nullptr;
1950 /* lookup hackblocks and rtp for all residues */
1951 get_hackblocks_rtp(&hb_chain, &restp_chain,
1952 nrtp, restp, pdba->nres, pdba->resinfo,
1953 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end);
1954 /* ideally, now we would not need the rtp itself anymore, but do
1955 everything using the hb and restp arrays. Unfortunately, that
1956 requires some re-thinking of code in gen_vsite.c, which I won't
1957 do now :( AF 26-7-99 */
1959 rename_atoms(nullptr, ffdir,
1960 pdba, &symtab, restp_chain, FALSE, rt, FALSE, bVerbose);
1962 match_atomnames_with_rtp(restp_chain, hb_chain, pdba, x, bVerbose);
1964 if (bSort)
1966 block = new_blocka();
1967 snew(gnames, 1);
1968 sort_pdbatoms(restp_chain, natom, &pdba, &x, block, &gnames);
1969 remove_duplicate_atoms(pdba, x, bVerbose);
1970 if (ftp2bSet(efNDX, NFILE, fnm))
1972 if (bRemoveH)
1974 fprintf(stderr, "WARNING: with the -remh option the generated "
1975 "index file (%s) might be useless\n"
1976 "(the index file is generated before hydrogens are added)",
1977 ftp2fn(efNDX, NFILE, fnm));
1979 write_index(ftp2fn(efNDX, NFILE, fnm), block, gnames, FALSE, 0);
1981 for (i = 0; i < block->nr; i++)
1983 sfree(gnames[i]);
1985 sfree(gnames);
1986 done_blocka(block);
1988 else
1990 fprintf(stderr, "WARNING: "
1991 "without sorting no check for duplicate atoms can be done\n");
1994 /* Generate Hydrogen atoms (and termini) in the sequence */
1995 printf("Generating any missing hydrogen atoms and/or adding termini.\n");
1996 natom = add_h(&pdba, &x, nah, ah,
1997 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end, bAllowMissing,
1998 nullptr, nullptr, TRUE, FALSE);
1999 printf("Now there are %d residues with %d atoms\n",
2000 pdba->nres, pdba->nr);
2001 if (debug)
2003 write_pdbfile(debug, title, pdba, x, ePBC, box, ' ', 0, nullptr, TRUE);
2006 if (debug)
2008 for (i = 0; (i < natom); i++)
2010 fprintf(debug, "Res %s%d atom %d %s\n",
2011 *(pdba->resinfo[pdba->atom[i].resind].name),
2012 pdba->resinfo[pdba->atom[i].resind].nr, i+1, *pdba->atomname[i]);
2016 strcpy(posre_fn, ftp2fn(efITP, NFILE, fnm));
2018 /* make up molecule name(s) */
2020 k = (cc->nterpairs > 0 && cc->r_start[0] >= 0) ? cc->r_start[0] : 0;
2022 gmx_residuetype_get_type(rt, *pdba->resinfo[k].name, &p_restype);
2024 suffix[0] = '\0';
2026 if (cc->bAllWat)
2028 sprintf(molname, "Water");
2030 else
2032 this_chainid = cc->chainid;
2034 /* Add the chain id if we have one */
2035 if (this_chainid != ' ')
2037 sprintf(buf, "_chain_%c", this_chainid);
2038 strcat(suffix, buf);
2041 /* Check if there have been previous chains with the same id */
2042 nid_used = 0;
2043 for (k = 0; k < chain; k++)
2045 if (cc->chainid == chains[k].chainid)
2047 nid_used++;
2050 /* Add the number for this chain identifier if there are multiple copies */
2051 if (nid_used > 0)
2054 sprintf(buf, "%d", nid_used+1);
2055 strcat(suffix, buf);
2058 if (strlen(suffix) > 0)
2060 sprintf(molname, "%s%s", p_restype, suffix);
2062 else
2064 strcpy(molname, p_restype);
2068 if ((nch-nwaterchain > 1) && !cc->bAllWat)
2070 bITP = TRUE;
2071 strcpy(itp_fn, top_fn);
2072 printf("Chain time...\n");
2073 c = strrchr(itp_fn, '.');
2074 sprintf(c, "_%s.itp", molname);
2075 c = strrchr(posre_fn, '.');
2076 sprintf(c, "_%s.itp", molname);
2077 if (strcmp(itp_fn, posre_fn) == 0)
2079 strcpy(buf_fn, posre_fn);
2080 c = strrchr(buf_fn, '.');
2081 *c = '\0';
2082 sprintf(posre_fn, "%s_pr.itp", buf_fn);
2085 nincl++;
2086 srenew(incls, nincl);
2087 incls[nincl-1] = gmx_strdup(itp_fn);
2088 itp_file = gmx_fio_fopen(itp_fn, "w");
2090 else
2092 bITP = FALSE;
2095 srenew(mols, nmol+1);
2096 if (cc->bAllWat)
2098 mols[nmol].name = gmx_strdup("SOL");
2099 mols[nmol].nr = pdba->nres;
2101 else
2103 mols[nmol].name = gmx_strdup(molname);
2104 mols[nmol].nr = 1;
2106 nmol++;
2108 if (bITP)
2110 print_top_comment(itp_file, itp_fn, ffdir, TRUE);
2113 if (cc->bAllWat)
2115 top_file2 = nullptr;
2117 else
2118 if (bITP)
2120 top_file2 = itp_file;
2122 else
2124 top_file2 = top_file;
2127 pdb2top(top_file2, posre_fn, molname, pdba, &x, atype, &symtab,
2128 nrtp, restp,
2129 restp_chain, hb_chain,
2130 bAllowMissing,
2131 bVsites, bVsiteAromatics, ffdir,
2132 mHmult, nssbonds, ssbonds,
2133 long_bond_dist, short_bond_dist, bDeuterate, bChargeGroups, bCmap,
2134 bRenumRes, bRTPresname);
2136 if (!cc->bAllWat)
2138 write_posres(posre_fn, pdba, posre_fc);
2141 if (bITP)
2143 gmx_fio_fclose(itp_file);
2146 /* pdba and natom have been reassigned somewhere so: */
2147 cc->pdba = pdba;
2148 cc->x = x;
2150 if (debug)
2152 if (cc->chainid == ' ')
2154 sprintf(fn, "chain.pdb");
2156 else
2158 sprintf(fn, "chain_%c.pdb", cc->chainid);
2160 write_sto_conf(fn, "", pdba, x, nullptr, ePBC, box);
2164 if (watermodel == nullptr)
2166 for (chain = 0; chain < nch; chain++)
2168 if (chains[chain].bAllWat)
2170 gmx_fatal(FARGS, "You have chosen not to include a water model, but there is water in the input file. Select a water model or remove the water from your input file.");
2174 else
2176 sprintf(buf_fn, "%s%c%s.itp", ffdir, DIR_SEPARATOR, watermodel);
2177 if (!fflib_fexist(buf_fn))
2179 gmx_fatal(FARGS, "The topology file '%s' for the selected water model '%s' can not be found in the force field directory. Select a different water model.",
2180 buf_fn, watermodel);
2184 print_top_mols(top_file, title, ffdir, watermodel, nincl, incls, nmol, mols);
2185 gmx_fio_fclose(top_file);
2187 gmx_residuetype_destroy(rt);
2189 /* now merge all chains back together */
2190 natom = 0;
2191 nres = 0;
2192 for (i = 0; (i < nch); i++)
2194 natom += chains[i].pdba->nr;
2195 nres += chains[i].pdba->nres;
2197 snew(atoms, 1);
2198 init_t_atoms(atoms, natom, FALSE);
2199 for (i = 0; i < atoms->nres; i++)
2201 sfree(atoms->resinfo[i].name);
2203 sfree(atoms->resinfo);
2204 atoms->nres = nres;
2205 snew(atoms->resinfo, nres);
2206 snew(x, natom);
2207 k = 0;
2208 l = 0;
2209 for (i = 0; (i < nch); i++)
2211 if (nch > 1)
2213 printf("Including chain %d in system: %d atoms %d residues\n",
2214 i+1, chains[i].pdba->nr, chains[i].pdba->nres);
2216 for (j = 0; (j < chains[i].pdba->nr); j++)
2218 atoms->atom[k] = chains[i].pdba->atom[j];
2219 atoms->atom[k].resind += l; /* l is processed nr of residues */
2220 atoms->atomname[k] = chains[i].pdba->atomname[j];
2221 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
2222 copy_rvec(chains[i].x[j], x[k]);
2223 k++;
2225 for (j = 0; (j < chains[i].pdba->nres); j++)
2227 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
2228 if (bRTPresname)
2230 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2232 l++;
2236 if (nch > 1)
2238 fprintf(stderr, "Now there are %d atoms and %d residues\n", k, l);
2239 print_sums(atoms, TRUE);
2242 fprintf(stderr, "\nWriting coordinate file...\n");
2243 clear_rvec(box_space);
2244 if (box[0][0] == 0)
2246 make_new_box(atoms->nr, x, box, box_space, FALSE);
2248 write_sto_conf(ftp2fn(efSTO, NFILE, fnm), title, atoms, x, nullptr, ePBC, box);
2250 printf("\t\t--------- PLEASE NOTE ------------\n");
2251 printf("You have successfully generated a topology from: %s.\n",
2252 opt2fn("-f", NFILE, fnm));
2253 if (watermodel != nullptr)
2255 printf("The %s force field and the %s water model are used.\n",
2256 ffname, watermodel);
2258 else
2260 printf("The %s force field is used.\n",
2261 ffname);
2263 printf("\t\t--------- ETON ESAELP ------------\n");
2265 return 0;