4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
53 #include "gmx_fatal.h"
72 #include "fflibutil.h"
84 static const char *res2bb_notermini(const char *name
,
85 int nrr
,const rtprename_t
*rr
)
87 /* NOTE: This function returns the main building block name,
88 * it does not take terminal renaming into account.
93 while (i
< nrr
&& gmx_strcasecmp(name
,rr
[i
].gmx
) != 0) {
97 return (i
< nrr
? rr
[i
].main
: name
);
100 static const char *select_res(int nr
,int resnr
,
101 const char *name
[],const char *expl
[],
103 int nrr
,const rtprename_t
*rr
)
107 printf("Which %s type do you want for residue %d\n",title
,resnr
+1);
108 for(sel
=0; (sel
< nr
); sel
++) {
109 printf("%d. %s (%s)\n",
110 sel
,expl
[sel
],res2bb_notermini(name
[sel
],nrr
,rr
));
112 printf("\nType a number:"); fflush(stdout
);
114 if (scanf("%d",&sel
) != 1)
115 gmx_fatal(FARGS
,"Answer me for res %s %d!",title
,resnr
+1);
120 static const char *get_asptp(int resnr
,int nrr
,const rtprename_t
*rr
)
122 enum { easp
, easpH
, easpNR
};
123 const char *lh
[easpNR
] = { "ASP", "ASPH" };
124 const char *expl
[easpNR
] = {
125 "Not protonated (charge -1)",
126 "Protonated (charge 0)"
129 return select_res(easpNR
,resnr
,lh
,expl
,"ASPARTIC ACID",nrr
,rr
);
132 static const char *get_glutp(int resnr
,int nrr
,const rtprename_t
*rr
)
134 enum { eglu
, egluH
, egluNR
};
135 const char *lh
[egluNR
] = { "GLU", "GLUH" };
136 const char *expl
[egluNR
] = {
137 "Not protonated (charge -1)",
138 "Protonated (charge 0)"
141 return select_res(egluNR
,resnr
,lh
,expl
,"GLUTAMIC ACID",nrr
,rr
);
144 static const char *get_glntp(int resnr
,int nrr
,const rtprename_t
*rr
)
146 enum { egln
, eglnH
, eglnNR
};
147 const char *lh
[eglnNR
] = { "GLN", "QLN" };
148 const char *expl
[eglnNR
] = {
149 "Not protonated (charge 0)",
150 "Protonated (charge +1)"
153 return select_res(eglnNR
,resnr
,lh
,expl
,"GLUTAMINE",nrr
,rr
);
156 static const char *get_lystp(int resnr
,int nrr
,const rtprename_t
*rr
)
158 enum { elys
, elysH
, elysNR
};
159 const char *lh
[elysNR
] = { "LYSN", "LYS" };
160 const char *expl
[elysNR
] = {
161 "Not protonated (charge 0)",
162 "Protonated (charge +1)"
165 return select_res(elysNR
,resnr
,lh
,expl
,"LYSINE",nrr
,rr
);
168 static const char *get_argtp(int resnr
,int nrr
,const rtprename_t
*rr
)
170 enum { earg
, eargH
, eargNR
};
171 const char *lh
[eargNR
] = { "ARGN", "ARG" };
172 const char *expl
[eargNR
] = {
173 "Not protonated (charge 0)",
174 "Protonated (charge +1)"
177 return select_res(eargNR
,resnr
,lh
,expl
,"ARGININE",nrr
,rr
);
180 static const char *get_cystp(int resnr
,int nrr
,const rtprename_t
*rr
)
182 enum { ecys
, ecysH
, ecysNR
};
183 const char *lh
[ecysNR
] = { "CYS2", "CYS" };
184 const char *expl
[ecysNR
] = {
185 "Cysteine in disulfide bridge",
189 return select_res(ecysNR
,resnr
,lh
,expl
,"CYSTEINE",nrr
,rr
);
193 static const char *get_histp(int resnr
,int nrr
,const rtprename_t
*rr
)
195 const char *expl
[ehisNR
] = {
202 return select_res(ehisNR
,resnr
,hh
,expl
,"HISTIDINE",nrr
,rr
);
205 static void read_rtprename(const char *fname
,FILE *fp
,
206 int *nrtprename
,rtprename_t
**rtprename
)
208 char line
[STRLEN
],buf
[STRLEN
];
217 while(get_a_line(fp
,line
,STRLEN
)) {
219 nc
= sscanf(line
,"%s %s %s %s %s %s",
220 rr
[n
].gmx
,rr
[n
].main
,rr
[n
].nter
,rr
[n
].cter
,rr
[n
].bter
,buf
);
222 if (nc
!= 2 && nc
!= 5) {
223 gmx_fatal(FARGS
,"Residue renaming database '%s' has %d columns instead of %d, %d or %d",fname
,ncol
,2,5);
226 } else if (nc
!= ncol
) {
227 gmx_fatal(FARGS
,"A line in residue renaming database '%s' has %d columns, while previous lines have %d columns",fname
,nc
,ncol
);
231 /* This file does not have special termini names, copy them from main */
232 strcpy(rr
[n
].nter
,rr
[n
].main
);
233 strcpy(rr
[n
].cter
,rr
[n
].main
);
234 strcpy(rr
[n
].bter
,rr
[n
].main
);
244 static void rename_resrtp(t_atoms
*pdba
,int nterpairs
,int *r_start
,int *r_end
,
245 int nrr
,rtprename_t
*rr
,t_symtab
*symtab
,
252 for(r
=0; r
<pdba
->nres
; r
++) {
254 while(i
<nrr
&& strcmp(*pdba
->resinfo
[r
].rtp
,rr
[i
].gmx
) != 0) {
258 /* If found in the database, rename this residue's rtp building block,
259 * otherwise keep the old name.
264 for(j
=0; j
<nterpairs
; j
++) {
265 if (r
== r_start
[j
]) {
269 for(j
=0; j
<nterpairs
; j
++) {
274 if (bStart
&& bEnd
) {
284 gmx_fatal(FARGS
,"In the chosen force field there is no residue type for '%s'%s",pdba
->resinfo
[r
].rtp
,bStart
? " as a starting terminus" : (bEnd
? " as an ending terminus" : ""));
286 if (strcmp(*pdba
->resinfo
[r
].rtp
,nn
) != 0) {
288 printf("Changing rtp entry of residue %d %s to '%s'\n",
289 pdba
->resinfo
[r
].nr
,*pdba
->resinfo
[r
].name
,nn
);
291 pdba
->resinfo
[r
].rtp
= put_symtab(symtab
,nn
);
297 static void pdbres_to_gmxrtp(t_atoms
*pdba
)
301 for(i
=0; (i
<pdba
->nres
); i
++) {
302 pdba
->resinfo
[i
].rtp
= pdba
->resinfo
[i
].name
;
306 static void rename_pdbres(t_atoms
*pdba
,const char *oldnm
,const char *newnm
,
307 bool bFullCompare
,t_symtab
*symtab
)
312 for(i
=0; (i
<pdba
->nres
); i
++) {
313 resnm
= *pdba
->resinfo
[i
].name
;
314 if ((bFullCompare
&& (gmx_strcasecmp(resnm
,oldnm
) == 0)) ||
315 (!bFullCompare
&& strstr(resnm
,oldnm
) != NULL
)) {
316 pdba
->resinfo
[i
].name
= put_symtab(symtab
,newnm
);
321 static void rename_bb(t_atoms
*pdba
,const char *oldnm
,const char *newnm
,
322 bool bFullCompare
,t_symtab
*symtab
)
327 for(i
=0; (i
<pdba
->nres
); i
++) {
328 bbnm
= *pdba
->resinfo
[i
].rtp
;
329 if ((bFullCompare
&& (gmx_strcasecmp(bbnm
,oldnm
) == 0)) ||
330 (!bFullCompare
&& strstr(bbnm
,oldnm
) != NULL
)) {
331 pdba
->resinfo
[i
].rtp
= put_symtab(symtab
,newnm
);
336 static void rename_bbint(t_atoms
*pdba
,const char *oldnm
,
337 const char *gettp(int,int,const rtprename_t
*),
340 int nrr
,const rtprename_t
*rr
)
346 for(i
=0; i
<pdba
->nres
; i
++) {
347 bbnm
= *pdba
->resinfo
[i
].rtp
;
348 if ((bFullCompare
&& (strcmp(bbnm
,oldnm
) == 0)) ||
349 (!bFullCompare
&& strstr(bbnm
,oldnm
) != NULL
)) {
350 ptr
= gettp(i
,nrr
,rr
);
351 pdba
->resinfo
[i
].rtp
= put_symtab(symtab
,ptr
);
356 static void check_occupancy(t_atoms
*atoms
,const char *filename
,bool bVerbose
)
362 ftp
= fn2ftp(filename
);
363 if (!atoms
->pdbinfo
|| ((ftp
!= efPDB
) && (ftp
!= efBRK
) && (ftp
!= efENT
)))
364 fprintf(stderr
,"No occupancies in %s\n",filename
);
366 for(i
=0; (i
<atoms
->nr
); i
++) {
367 if (atoms
->pdbinfo
[i
].occup
!= 1) {
369 fprintf(stderr
,"Occupancy for atom %s%d-%s is %f rather than 1\n",
370 *atoms
->resinfo
[atoms
->atom
[i
].resind
].name
,
371 atoms
->resinfo
[ atoms
->atom
[i
].resind
].nr
,
373 atoms
->pdbinfo
[i
].occup
);
374 if (atoms
->pdbinfo
[i
].occup
== 0)
380 if (nzero
== atoms
->nr
) {
381 fprintf(stderr
,"All occupancy fields zero. This is probably not an X-Ray structure\n");
382 } else if ((nzero
> 0) || (nnotone
> 0)) {
385 "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
386 " occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
388 nzero
,nnotone
,atoms
->nr
);
390 fprintf(stderr
,"All occupancies are one\n");
395 void write_posres(char *fn
,t_atoms
*pdba
,real fc
)
400 fp
=gmx_fio_fopen(fn
,"w");
402 "; In this topology include file, you will find position restraint\n"
403 "; entries for all the heavy atoms in your original pdb file.\n"
404 "; This means that all the protons which were added by pdb2gmx are\n"
405 "; not restrained.\n"
407 "[ position_restraints ]\n"
408 "; %4s%6s%8s%8s%8s\n","atom","type","fx","fy","fz"
410 for(i
=0; (i
<pdba
->nr
); i
++) {
411 if (!is_hydrogen(*pdba
->atomname
[i
]) && !is_dummymass(*pdba
->atomname
[i
]))
412 fprintf(fp
,"%6d%6d %g %g %g\n",i
+1,1,fc
,fc
,fc
);
417 static int read_pdball(const char *inf
, const char *outf
,char *title
,
418 t_atoms
*atoms
, rvec
**x
,
419 int *ePBC
,matrix box
, bool bRemoveH
,
420 t_symtab
*symtab
,gmx_residuetype_t rt
,const char *watres
,
421 gmx_atomprop_t aps
,bool bVerbose
)
422 /* Read a pdb file. (containing proteins) */
424 int natom
,new_natom
,i
;
427 printf("Reading %s...\n",inf
);
428 get_stx_coordnum(inf
,&natom
);
429 init_t_atoms(atoms
,natom
,TRUE
);
431 read_stx_conf(inf
,title
,atoms
,*x
,NULL
,ePBC
,box
);
432 if (fn2ftp(inf
) == efPDB
)
433 get_pdb_atomnumber(atoms
,aps
);
436 for(i
=0; i
<atoms
->nr
; i
++)
437 if (!is_hydrogen(*atoms
->atomname
[i
])) {
438 atoms
->atom
[new_natom
]=atoms
->atom
[i
];
439 atoms
->atomname
[new_natom
]=atoms
->atomname
[i
];
440 atoms
->pdbinfo
[new_natom
]=atoms
->pdbinfo
[i
];
441 copy_rvec((*x
)[i
],(*x
)[new_natom
]);
449 if (title
&& title
[0])
450 printf(" '%s',",title
);
451 printf(" %d atoms\n",natom
);
453 /* Rename residues */
454 rename_pdbres(atoms
,"HOH",watres
,FALSE
,symtab
);
455 rename_pdbres(atoms
,"SOL",watres
,FALSE
,symtab
);
456 rename_pdbres(atoms
,"WAT",watres
,FALSE
,symtab
);
458 rename_atoms("xlateat.dat",NULL
,
459 atoms
,symtab
,NULL
,TRUE
,rt
,TRUE
,bVerbose
);
465 write_sto_conf(outf
,title
,atoms
,*x
,NULL
,*ePBC
,box
);
470 void process_chain(t_atoms
*pdba
, rvec
*x
,
471 bool bTrpU
,bool bPheU
,bool bTyrU
,
472 bool bLysMan
,bool bAspMan
,bool bGluMan
,
473 bool bHisMan
,bool bArgMan
,bool bGlnMan
,
474 real angle
,real distance
,t_symtab
*symtab
,
475 int nrr
,const rtprename_t
*rr
)
477 /* Initialize the rtp builing block names with the residue names */
478 pdbres_to_gmxrtp(pdba
);
480 /* Rename aromatics, lys, asp and histidine */
481 if (bTyrU
) rename_bb(pdba
,"TYR","TYRU",FALSE
,symtab
);
482 if (bTrpU
) rename_bb(pdba
,"TRP","TRPU",FALSE
,symtab
);
483 if (bPheU
) rename_bb(pdba
,"PHE","PHEU",FALSE
,symtab
);
485 rename_bbint(pdba
,"LYS",get_lystp
,FALSE
,symtab
,nrr
,rr
);
487 rename_bbint(pdba
,"ARG",get_argtp
,FALSE
,symtab
,nrr
,rr
);
489 rename_bbint(pdba
,"GLN",get_glntp
,FALSE
,symtab
,nrr
,rr
);
491 rename_bbint(pdba
,"ASP",get_asptp
,FALSE
,symtab
,nrr
,rr
);
493 rename_bb(pdba
,"ASPH","ASP",FALSE
,symtab
);
495 rename_bbint(pdba
,"GLU",get_glutp
,FALSE
,symtab
,nrr
,rr
);
497 rename_bb(pdba
,"GLUH","GLU",FALSE
,symtab
);
500 set_histp(pdba
,x
,angle
,distance
);
502 rename_bbint(pdba
,"HIS",get_histp
,TRUE
,symtab
,nrr
,rr
);
505 /* struct for sorting the atoms from the pdb file */
507 int resnr
; /* residue number */
508 int j
; /* database order index */
509 int index
; /* original atom number */
510 char anm1
; /* second letter of atom name */
511 char altloc
; /* alternate location indicator */
514 int pdbicomp(const void *a
,const void *b
)
522 d
= (pa
->resnr
- pb
->resnr
);
526 d
= (pa
->anm1
- pb
->anm1
);
528 d
= (pa
->altloc
- pb
->altloc
);
535 static void sort_pdbatoms(int nrtp
,t_restp restp
[],t_hackblock hb
[],
536 int natoms
,t_atoms
**pdbaptr
,rvec
**x
,
537 t_blocka
*block
,char ***gnames
)
539 t_atoms
*pdba
,*pdbnew
;
554 for(i
=0; i
<natoms
; i
++) {
555 atomnm
= *pdba
->atomname
[i
];
556 rptr
= &restp
[pdba
->atom
[i
].resind
];
557 for(j
=0; (j
<rptr
->natom
); j
++) {
558 if (gmx_strcasecmp(atomnm
,*(rptr
->atomname
[j
])) == 0) {
562 if (j
==rptr
->natom
) {
563 if ( ( ( pdba
->atom
[i
].resind
== 0) && (atomnm
[0] == 'H') &&
564 ( (atomnm
[1] == '1') || (atomnm
[1] == '2') ||
565 (atomnm
[1] == '3') ) ) )
570 sprintf(buf
,"Atom %s in residue %s %d not found in rtp entry %s with %d atoms\n"
571 "while sorting atoms%s",atomnm
,
572 *pdba
->resinfo
[pdba
->atom
[i
].resind
].name
,
573 pdba
->resinfo
[pdba
->atom
[i
].resind
].nr
,
576 is_hydrogen(atomnm
) ? ". Maybe different protonation state.\n"
577 " Remove this hydrogen or choose a different "
578 "protonation state.\n"
579 " Option -ignh will ignore all hydrogens "
580 "in the input." : "");
581 gmx_fatal(FARGS
,buf
);
584 /* make shadow array to be sorted into indexgroup */
585 pdbi
[i
].resnr
= pdba
->atom
[i
].resind
;
588 pdbi
[i
].anm1
= atomnm
[1];
589 pdbi
[i
].altloc
= pdba
->pdbinfo
[i
].altloc
;
591 qsort(pdbi
,natoms
,(size_t)sizeof(pdbi
[0]),pdbicomp
);
593 /* pdba is sorted in pdbnew using the pdbi index */
596 init_t_atoms(pdbnew
,natoms
,TRUE
);
599 pdbnew
->nres
=pdba
->nres
;
600 sfree(pdbnew
->resinfo
);
601 pdbnew
->resinfo
=pdba
->resinfo
;
602 for (i
=0; i
<natoms
; i
++) {
603 pdbnew
->atom
[i
] = pdba
->atom
[pdbi
[i
].index
];
604 pdbnew
->atomname
[i
] = pdba
->atomname
[pdbi
[i
].index
];
605 pdbnew
->pdbinfo
[i
] = pdba
->pdbinfo
[pdbi
[i
].index
];
606 copy_rvec((*x
)[pdbi
[i
].index
],(*xnew
)[i
]);
607 /* make indexgroup in block */
611 sfree(pdba
->atomname
);
613 sfree(pdba
->pdbinfo
);
616 /* copy the sorted pdbnew back to pdba */
619 add_grp(block
, gnames
, natoms
, a
, "prot_sort");
625 static int remove_duplicate_atoms(t_atoms
*pdba
,rvec x
[],bool bVerbose
)
627 int i
,j
,oldnatoms
,ndel
;
630 printf("Checking for duplicate atoms....\n");
631 oldnatoms
= pdba
->nr
;
633 /* NOTE: pdba->nr is modified inside the loop */
634 for(i
=1; (i
< pdba
->nr
); i
++) {
635 /* compare 'i' and 'i-1', throw away 'i' if they are identical
636 this is a 'while' because multiple alternate locations can be present */
637 while ( (i
< pdba
->nr
) &&
638 (pdba
->atom
[i
-1].resind
== pdba
->atom
[i
].resind
) &&
639 (strcmp(*pdba
->atomname
[i
-1],*pdba
->atomname
[i
])==0) ) {
642 ri
= &pdba
->resinfo
[pdba
->atom
[i
].resind
];
643 printf("deleting duplicate atom %4s %s%4d%c",
644 *pdba
->atomname
[i
],*ri
->name
,ri
->nr
,ri
->ic
);
645 if (ri
->chainid
&& (ri
->chainid
!= ' '))
646 printf(" ch %c", ri
->chainid
);
648 if (pdba
->pdbinfo
[i
].atomnr
)
649 printf(" pdb nr %4d",pdba
->pdbinfo
[i
].atomnr
);
650 if (pdba
->pdbinfo
[i
].altloc
&& (pdba
->pdbinfo
[i
].altloc
!=' '))
651 printf(" altloc %c",pdba
->pdbinfo
[i
].altloc
);
656 /* We can not free, since it might be in the symtab */
657 /* sfree(pdba->atomname[i]); */
658 for (j
=i
; j
< pdba
->nr
; j
++) {
659 pdba
->atom
[j
] = pdba
->atom
[j
+1];
660 pdba
->atomname
[j
] = pdba
->atomname
[j
+1];
661 pdba
->pdbinfo
[j
] = pdba
->pdbinfo
[j
+1];
662 copy_rvec(x
[j
+1],x
[j
]);
664 srenew(pdba
->atom
, pdba
->nr
);
665 /* srenew(pdba->atomname, pdba->nr); */
666 srenew(pdba
->pdbinfo
, pdba
->nr
);
669 if (pdba
->nr
!= oldnatoms
)
670 printf("Now there are %d atoms. Deleted %d duplicates.\n",pdba
->nr
,ndel
);
675 void find_nc_ter(t_atoms
*pdba
,int r0
,int r1
,int *r_start
,int *r_end
,gmx_residuetype_t rt
)
678 const char *p_startrestype
;
679 const char *p_restype
;
680 int nstartwarn
,nendwarn
;
688 /* Find the starting terminus (typially N or 5') */
689 for(i
=r0
;i
<r1
&& *r_start
==-1;i
++)
691 gmx_residuetype_get_type(rt
,*pdba
->resinfo
[i
].name
,&p_startrestype
);
692 if( !gmx_strcasecmp(p_startrestype
,"Protein") || !gmx_strcasecmp(p_startrestype
,"DNA") || !gmx_strcasecmp(p_startrestype
,"RNA") )
694 printf("Identified residue %s%d as a starting terminus.\n",*pdba
->resinfo
[i
].name
,pdba
->resinfo
[i
].nr
);
701 printf("Warning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n",*pdba
->resinfo
[i
].name
,pdba
->resinfo
[i
].nr
);
705 printf("More than 5 unidentified residues at start of chain - disabling further warnings.\n");
713 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
714 for(i
=*r_start
;i
<r1
;i
++)
716 gmx_residuetype_get_type(rt
,*pdba
->resinfo
[i
].name
,&p_restype
);
717 if( !gmx_strcasecmp(p_restype
,p_startrestype
) && nendwarn
==0)
725 printf("Warning: Residue %s%d in chain has different type (%s) from starting residue %s%d (%s).\n",
726 *pdba
->resinfo
[i
].name
,pdba
->resinfo
[i
].nr
,p_restype
,
727 *pdba
->resinfo
[*r_start
].name
,pdba
->resinfo
[*r_start
].nr
,p_startrestype
);
731 printf("More than 5 unidentified residues at end of chain - disabling further warnings.\n");
740 printf("Identified residue %s%d as a ending terminus.\n",*pdba
->resinfo
[*r_end
].name
,pdba
->resinfo
[*r_end
].nr
);
746 modify_chain_numbers(t_atoms
* pdba
,
747 const char * chainsep
)
750 char old_prev_chainid
;
751 char old_this_chainid
;
752 int old_prev_chainnum
;
753 int old_this_chainnum
;
766 printf("Splitting PDB chains based on ");
767 splitting
= SPLIT_TER_ONLY
; /* keep compiler happy */
769 /* Be a bit flexible to catch typos */
770 if (!strncmp(chainsep
,"id_o",4) || !strncmp(chainsep
,"int",3))
772 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
773 splitting
= SPLIT_ID_OR_TER
;
774 printf("TER records or changing chain id.\n");
776 else if (!strncmp(chainsep
,"id_a",4))
778 splitting
= SPLIT_ID_AND_TER
;
779 printf("TER records and chain id.\n");
781 else if (strlen(chainsep
)==2 && !strncmp(chainsep
,"id",4))
783 splitting
= SPLIT_ID_ONLY
;
784 printf("changing chain id only (ignoring TER records).\n");
786 else if (chainsep
[0]=='t')
788 splitting
= SPLIT_TER_ONLY
;
789 printf("TER records only (ignoring chain id).\n");
793 gmx_fatal(FARGS
,"Unidentified setting for chain separation: %s\n",chainsep
);
796 /* The default chain enumeration is based on TER records only, which is reflected in chainnum below */
798 old_prev_chainid
= '?';
799 old_prev_chainnum
= -1;
802 for(i
=0;i
<pdba
->nres
;i
++)
804 ri
= &pdba
->resinfo
[i
];
805 old_this_chainid
= ri
->chainid
;
806 old_this_chainnum
= ri
->chainnum
;
810 case SPLIT_ID_OR_TER
:
811 if(old_this_chainid
!= old_prev_chainid
|| old_this_chainnum
!= old_prev_chainnum
)
817 case SPLIT_ID_AND_TER
:
818 if(old_this_chainid
!= old_prev_chainid
&& old_this_chainnum
!= old_prev_chainnum
)
825 if(old_this_chainid
!= old_prev_chainid
)
832 if(old_this_chainnum
!= old_prev_chainnum
)
838 gmx_fatal(FARGS
,"Internal inconsistency - this shouldn't happen...");
841 old_prev_chainid
= old_this_chainid
;
842 old_prev_chainnum
= old_this_chainnum
;
844 ri
->chainnum
= new_chainnum
;
873 int main(int argc
, char *argv
[])
875 const char *desc
[] = {
876 "This program reads a pdb (or gro) file, reads",
877 "some database files, adds hydrogens to the molecules and generates",
878 "coordinates in Gromacs (Gromos), or optionally pdb, format",
879 "and a topology in Gromacs format.",
880 "These files can subsequently be processed to generate a run input file.",
882 "pdb2gmx will search for force fields by looking for",
883 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
884 "of the current working directory and of the Gomracs library directory",
885 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
887 "By default the forcefield selection is interactive,",
888 "but you can use the [TT]-ff[tt] option to specify one of the short names",
889 "in the list on the command line instead. In that case pdb2gmx just looks",
890 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
892 "After choosing a force field, all files will be read only from",
893 "the corresponding force field directory.",
894 "If you want to modify or add a residue types, you can copy the force",
895 "field directory from the Gromacs library directory to your current",
896 "working directory. If you want to add new protein residue types,",
897 "you will need to modify residuetypes.dat in the libary directory",
898 "or copy the whole library directory to a local directory and set",
899 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
900 "Check chapter 5 of the manual for more information about file formats.",
903 "Note that a pdb file is nothing more than a file format, and it",
904 "need not necessarily contain a protein structure. Every kind of",
905 "molecule for which there is support in the database can be converted.",
906 "If there is no support in the database, you can add it yourself.[PAR]",
908 "The program has limited intelligence, it reads a number of database",
909 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
910 "if necessary this can be done manually. The program can prompt the",
911 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue she",
912 "wants. For LYS the choice is between neutral (two protons on NZ) or",
913 "protonated (three protons, default), for ASP and GLU unprotonated",
914 "(default) or protonated, for HIS the proton can be either on ND1,",
915 "on NE2 or on both. By default these selections are done automatically.",
916 "For His, this is based on an optimal hydrogen bonding",
917 "conformation. Hydrogen bonds are defined based on a simple geometric",
918 "criterium, specified by the maximum hydrogen-donor-acceptor angle",
919 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
920 "[TT]-dist[tt] respectively.[PAR]",
922 "The separation of chains is not entirely trivial since the markup",
923 "in user-generated PDB files frequently varies and sometimes it",
924 "is desirable to merge entries across a TER record, for instance",
925 "if you want a disulfide bridge or distance restraints between",
926 "two protein chains or if you have a HEME group bound to a protein.",
927 "In such cases multiple chains should be contained in a single",
928 "[TT]molecule_type[tt] definition.",
929 "To handle this, pdb2gmx has an option [TT]-chainsep[tt] so you can",
930 "choose whether a new chain should start when we find a TER record,",
931 "when the chain id changes, combinations of either or both of these",
932 "or fully interactively.[PAR]",
934 "pdb2gmx will also check the occupancy field of the pdb file.",
935 "If any of the occupancies are not one, indicating that the atom is",
936 "not resolved well in the structure, a warning message is issued.",
937 "When a pdb file does not originate from an X-Ray structure determination",
938 "all occupancy fields may be zero. Either way, it is up to the user",
939 "to verify the correctness of the input data (read the article!).[PAR]",
941 "During processing the atoms will be reordered according to Gromacs",
942 "conventions. With [TT]-n[tt] an index file can be generated that",
943 "contains one group reordered in the same way. This allows you to",
944 "convert a Gromos trajectory and coordinate file to Gromos. There is",
945 "one limitation: reordering is done after the hydrogens are stripped",
946 "from the input and before new hydrogens are added. This means that",
947 "you should not use [TT]-ignh[tt].[PAR]",
949 "The [TT].gro[tt] and [TT].g96[tt] file formats do not support chain",
950 "identifiers. Therefore it is useful to enter a pdb file name at",
951 "the [TT]-o[tt] option when you want to convert a multichain pdb file.",
954 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
955 "motions. Angular and out-of-plane motions can be removed by changing",
956 "hydrogens into virtual sites and fixing angles, which fixes their",
957 "position relative to neighboring atoms. Additionally, all atoms in the",
958 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
959 "can be converted into virtual sites, elminating the fast improper dihedral",
960 "fluctuations in these rings. Note that in this case all other hydrogen",
961 "atoms are also converted to virtual sites. The mass of all atoms that are",
962 "converted into virtual sites, is added to the heavy atoms.[PAR]",
963 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
964 "done by increasing the hydrogen-mass by a factor of 4. This is also",
965 "done for water hydrogens to slow down the rotational motion of water.",
966 "The increase in mass of the hydrogens is subtracted from the bonded",
967 "(heavy) atom so that the total mass of the system remains the same."
971 FILE *fp
,*top_file
,*top_file2
,*itp_file
=NULL
;
973 t_atoms pdba_all
,*pdba
;
977 int chain
,nch
,maxch
,nwaterchain
;
993 gpp_atomtype_t atype
;
994 gmx_residuetype_t rt
;
996 char fn
[256],itp_fn
[STRLEN
],posre_fn
[STRLEN
],buf_fn
[STRLEN
];
997 char molname
[STRLEN
],title
[STRLEN
],quote
[STRLEN
],generator
[STRLEN
];
998 char *c
,forcefield
[STRLEN
],ffdir
[STRLEN
];
999 char ffname
[STRLEN
],suffix
[STRLEN
],buf
[STRLEN
];
1008 rtprename_t
*rtprename
=NULL
;
1009 int nah
,nNtdb
,nCtdb
,ntdblist
;
1010 t_hackblock
*ntdb
,*ctdb
,**tdblist
;
1014 bool bVsites
=FALSE
,bWat
,bPrevWat
=FALSE
,bITP
,bVsiteAromatics
=FALSE
,bMerge
;
1016 t_hackblock
*hb_chain
;
1017 t_restp
*restp_chain
;
1019 const char *p_restype
;
1023 const char * prev_atomname
;
1024 const char * this_atomname
;
1025 const char * prev_resname
;
1026 const char * this_resname
;
1031 int prev_chainnumber
;
1032 int this_chainnumber
;
1034 int this_chainstart
;
1035 int prev_chainstart
;
1040 { efSTX
, "-f", "eiwit.pdb", ffREAD
},
1041 { efSTO
, "-o", "conf", ffWRITE
},
1042 { efTOP
, NULL
, NULL
, ffWRITE
},
1043 { efITP
, "-i", "posre", ffWRITE
},
1044 { efNDX
, "-n", "clean", ffOPTWR
},
1045 { efSTO
, "-q", "clean.pdb", ffOPTWR
}
1047 #define NFILE asize(fnm)
1050 /* Command line arguments must be static */
1051 static bool bNewRTP
=FALSE
;
1052 static bool bInter
=FALSE
, bCysMan
=FALSE
;
1053 static bool bLysMan
=FALSE
, bAspMan
=FALSE
, bGluMan
=FALSE
, bHisMan
=FALSE
;
1054 static bool bGlnMan
=FALSE
, bArgMan
=FALSE
;
1055 static bool bTerMan
=FALSE
, bUnA
=FALSE
, bHeavyH
;
1056 static bool bSort
=TRUE
, bAllowMissing
=FALSE
, bRemoveH
=FALSE
;
1057 static bool bDeuterate
=FALSE
,bVerbose
=FALSE
,bChargeGroups
=TRUE
,bCmap
=TRUE
;
1058 static bool bRenumRes
=FALSE
,bRTPresname
=FALSE
;
1059 static real angle
=135.0, distance
=0.3,posre_fc
=1000;
1060 static real long_bond_dist
=0.25, short_bond_dist
=0.05;
1061 static const char *vsitestr
[] = { NULL
, "none", "hydrogens", "aromatics", NULL
};
1062 static const char *watstr
[] = { NULL
, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", NULL
};
1063 static const char *chainsep
[] = { NULL
, "id_or_ter", "id_and_ter", "ter", "id", "interactive", NULL
};
1064 static const char *ff
= "select";
1067 { "-newrtp", FALSE
, etBOOL
, {&bNewRTP
},
1068 "HIDDENWrite the residue database in new format to 'new.rtp'"},
1069 { "-lb", FALSE
, etREAL
, {&long_bond_dist
},
1070 "HIDDENLong bond warning distance" },
1071 { "-sb", FALSE
, etREAL
, {&short_bond_dist
},
1072 "HIDDENShort bond warning distance" },
1073 { "-chainsep", FALSE
, etENUM
, {chainsep
},
1074 "Condition in PDB files when a new chain and molecule_type should be started" },
1075 { "-ff", FALSE
, etSTR
, {&ff
},
1076 "Force field, interactive by default. Use -h for information." },
1077 { "-water", FALSE
, etENUM
, {watstr
},
1078 "Water model to use" },
1079 { "-inter", FALSE
, etBOOL
, {&bInter
},
1080 "Set the next 8 options to interactive"},
1081 { "-ss", FALSE
, etBOOL
, {&bCysMan
},
1082 "Interactive SS bridge selection" },
1083 { "-ter", FALSE
, etBOOL
, {&bTerMan
},
1084 "Interactive termini selection, iso charged" },
1085 { "-lys", FALSE
, etBOOL
, {&bLysMan
},
1086 "Interactive Lysine selection, iso charged" },
1087 { "-arg", FALSE
, etBOOL
, {&bArgMan
},
1088 "Interactive Arganine selection, iso charged" },
1089 { "-asp", FALSE
, etBOOL
, {&bAspMan
},
1090 "Interactive Aspartic Acid selection, iso charged" },
1091 { "-glu", FALSE
, etBOOL
, {&bGluMan
},
1092 "Interactive Glutamic Acid selection, iso charged" },
1093 { "-gln", FALSE
, etBOOL
, {&bGlnMan
},
1094 "Interactive Glutamine selection, iso neutral" },
1095 { "-his", FALSE
, etBOOL
, {&bHisMan
},
1096 "Interactive Histidine selection, iso checking H-bonds" },
1097 { "-angle", FALSE
, etREAL
, {&angle
},
1098 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
1099 { "-dist", FALSE
, etREAL
, {&distance
},
1100 "Maximum donor-acceptor distance for a H-bond (nm)" },
1101 { "-una", FALSE
, etBOOL
, {&bUnA
},
1102 "Select aromatic rings with united CH atoms on Phenylalanine, "
1103 "Tryptophane and Tyrosine" },
1104 { "-sort", FALSE
, etBOOL
, {&bSort
},
1105 "HIDDENSort the residues according to database, turning this off is dangerous as charge groups might be broken in parts" },
1106 { "-ignh", FALSE
, etBOOL
, {&bRemoveH
},
1107 "Ignore hydrogen atoms that are in the pdb file" },
1108 { "-missing",FALSE
, etBOOL
, {&bAllowMissing
},
1109 "Continue when atoms are missing, dangerous" },
1110 { "-v", FALSE
, etBOOL
, {&bVerbose
},
1111 "Be slightly more verbose in messages" },
1112 { "-posrefc",FALSE
, etREAL
, {&posre_fc
},
1113 "Force constant for position restraints" },
1114 { "-vsite", FALSE
, etENUM
, {vsitestr
},
1115 "Convert atoms to virtual sites" },
1116 { "-heavyh", FALSE
, etBOOL
, {&bHeavyH
},
1117 "Make hydrogen atoms heavy" },
1118 { "-deuterate", FALSE
, etBOOL
, {&bDeuterate
},
1119 "Change the mass of hydrogens to 2 amu" },
1120 { "-chargegrp", TRUE
, etBOOL
, {&bChargeGroups
},
1121 "Use charge groups in the rtp file" },
1122 { "-cmap", TRUE
, etBOOL
, {&bCmap
},
1123 "Use cmap torsions (if enabled in the rtp file)" },
1124 { "-renum", TRUE
, etBOOL
, {&bRenumRes
},
1125 "Renumber the residues consecutively in the output" },
1126 { "-rtpres", TRUE
, etBOOL
, {&bRTPresname
},
1127 "Use rtp entry names as residue names" }
1129 #define NPARGS asize(pa)
1131 CopyRight(stderr
,argv
[0]);
1132 parse_common_args(&argc
,argv
,0,NFILE
,fnm
,asize(pa
),pa
,asize(desc
),desc
,
1135 /* Force field selection, interactive or direct */
1136 choose_ff(strcmp(ff
,"select") == 0 ? NULL
: ff
,
1137 forcefield
,sizeof(forcefield
),
1138 ffdir
,sizeof(ffdir
));
1140 if (strlen(forcefield
) > 0) {
1141 strcpy(ffname
,forcefield
);
1142 ffname
[0] = toupper(ffname
[0]);
1144 gmx_fatal(FARGS
,"Empty forcefield string");
1147 printf("\nUsing the %s force field in directory %s\n\n",
1150 choose_watermodel(watstr
[0],ffdir
,&watermodel
);
1153 /* if anything changes here, also change description of -inter */
1166 else if (bDeuterate
)
1171 switch(vsitestr
[0][0]) {
1172 case 'n': /* none */
1174 bVsiteAromatics
=FALSE
;
1176 case 'h': /* hydrogens */
1178 bVsiteAromatics
=FALSE
;
1180 case 'a': /* aromatics */
1182 bVsiteAromatics
=TRUE
;
1185 gmx_fatal(FARGS
,"DEATH HORROR in $s (%d): vsitestr[0]='%s'",
1186 __FILE__
,__LINE__
,vsitestr
[0]);
1189 /* Open the symbol table */
1190 open_symtab(&symtab
);
1192 /* Residue type database */
1193 gmx_residuetype_init(&rt
);
1195 /* Read residue renaming database(s), if present */
1196 nrrn
= fflib_search_file_end(ffdir
,".r2b",FALSE
,&rrn
);
1200 for(i
=0; i
<nrrn
; i
++) {
1201 fp
= fflib_open(rrn
[i
]);
1202 read_rtprename(rrn
[i
],fp
,&nrtprename
,&rtprename
);
1208 /* Add all alternative names from the residue renaming database to the list of recognized amino/nucleic acids. */
1210 for(i
=0;i
<nrtprename
;i
++)
1212 rc
=gmx_residuetype_get_type(rt
,rtprename
[i
].gmx
,&p_restype
);
1214 /* Only add names if the 'standard' gromacs/iupac base name was found */
1217 gmx_residuetype_add(rt
,rtprename
[i
].main
,p_restype
);
1218 gmx_residuetype_add(rt
,rtprename
[i
].nter
,p_restype
);
1219 gmx_residuetype_add(rt
,rtprename
[i
].cter
,p_restype
);
1220 gmx_residuetype_add(rt
,rtprename
[i
].bter
,p_restype
);
1225 if (watermodel
!= NULL
&& (strstr(watermodel
,"4p") ||
1226 strstr(watermodel
,"4P"))) {
1228 } else if (watermodel
!= NULL
&& (strstr(watermodel
,"5p") ||
1229 strstr(watermodel
,"5P"))) {
1235 aps
= gmx_atomprop_init();
1236 natom
= read_pdball(opt2fn("-f",NFILE
,fnm
),opt2fn_null("-q",NFILE
,fnm
),title
,
1237 &pdba_all
,&pdbx
,&ePBC
,box
,bRemoveH
,&symtab
,rt
,watres
,
1241 gmx_fatal(FARGS
,"No atoms found in pdb file %s\n",opt2fn("-f",NFILE
,fnm
));
1243 printf("Analyzing pdb file\n");
1248 modify_chain_numbers(&pdba_all
,chainsep
[0]);
1251 bMerge
= !strncmp(chainsep
[0],"int",3);
1253 this_atomname
= NULL
;
1255 this_resname
= NULL
;
1258 this_chainnumber
= -1;
1259 this_chainstart
= 0;
1262 for (i
=0; (i
<natom
); i
++)
1264 ri
= &pdba_all
.resinfo
[pdba_all
.atom
[i
].resind
];
1266 prev_atomname
= this_atomname
;
1267 prev_atomnum
= this_atomnum
;
1268 prev_resname
= this_resname
;
1269 prev_resnum
= this_resnum
;
1270 prev_chainid
= this_chainid
;
1271 prev_chainnumber
= this_chainnumber
;
1272 prev_chainstart
= this_chainstart
;
1274 this_atomname
= *pdba_all
.atomname
[i
];
1275 this_atomnum
= (pdba_all
.pdbinfo
!= NULL
) ? pdba_all
.pdbinfo
[i
].atomnr
: i
+1;
1276 this_resname
= *ri
->name
;
1277 this_resnum
= ri
->nr
;
1278 this_chainid
= ri
->chainid
;
1279 this_chainnumber
= ri
->chainnum
;
1281 bWat
= gmx_strcasecmp(*ri
->name
,watres
) == 0;
1282 if ((i
== 0) || (this_chainnumber
!= prev_chainnumber
) || (bWat
!= bPrevWat
))
1284 this_chainstart
= pdba_all
.atom
[i
].resind
;
1285 if (bMerge
&& i
>0 && !bWat
)
1287 printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) with\n"
1288 "chain starting with residue %s%d (chain id '%c', atom %d %s)? [n/y]\n",
1289 prev_resname
,prev_resnum
,prev_chainid
,prev_atomnum
,prev_atomname
,
1290 this_resname
,this_resnum
,this_chainid
,this_atomnum
,this_atomname
);
1292 if(NULL
==fgets(select
,STRLEN
-1,stdin
))
1294 gmx_fatal(FARGS
,"Error reading from stdin");
1302 if (select
[0] == 'y')
1304 pdb_ch
[nch
-1].chainstart
[pdb_ch
[nch
-1].nterpairs
] =
1305 pdba_all
.atom
[i
].resind
- prev_chainstart
;
1306 pdb_ch
[nch
-1].nterpairs
++;
1307 srenew(pdb_ch
[nch
-1].chainstart
,pdb_ch
[nch
-1].nterpairs
+1);
1311 /* set natom for previous chain */
1314 pdb_ch
[nch
-1].natom
=i
-pdb_ch
[nch
-1].start
;
1321 /* check if chain identifier was used before */
1322 for (j
=0; (j
<nch
); j
++)
1324 if (pdb_ch
[j
].chainid
!= ' ' && pdb_ch
[j
].chainid
== ri
->chainid
)
1326 printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1327 "They will be treated as separate chains unless you reorder your file.\n",
1334 srenew(pdb_ch
,maxch
);
1336 pdb_ch
[nch
].chainid
= ri
->chainid
;
1337 pdb_ch
[nch
].chainnum
= ri
->chainnum
;
1338 pdb_ch
[nch
].start
=i
;
1339 pdb_ch
[nch
].bAllWat
=bWat
;
1341 pdb_ch
[nch
].nterpairs
=0;
1343 pdb_ch
[nch
].nterpairs
=1;
1344 snew(pdb_ch
[nch
].chainstart
,pdb_ch
[nch
].nterpairs
+1);
1345 /* modified [nch] to [0] below */
1346 pdb_ch
[nch
].chainstart
[0]=0;
1352 pdb_ch
[nch
-1].natom
=natom
-pdb_ch
[nch
-1].start
;
1354 /* set all the water blocks at the end of the chain */
1355 snew(swap_index
,nch
);
1357 for(i
=0; i
<nch
; i
++)
1358 if (!pdb_ch
[i
].bAllWat
) {
1362 for(i
=0; i
<nch
; i
++)
1363 if (pdb_ch
[i
].bAllWat
) {
1368 printf("Moved all the water blocks to the end\n");
1371 /* copy pdb data and x for all chains */
1372 for (i
=0; (i
<nch
); i
++) {
1374 chains
[i
].chainid
= pdb_ch
[si
].chainid
;
1375 chains
[i
].chainnum
= pdb_ch
[si
].chainnum
;
1376 chains
[i
].bAllWat
= pdb_ch
[si
].bAllWat
;
1377 chains
[i
].nterpairs
= pdb_ch
[si
].nterpairs
;
1378 chains
[i
].chainstart
= pdb_ch
[si
].chainstart
;
1379 snew(chains
[i
].ntdb
,pdb_ch
[si
].nterpairs
);
1380 snew(chains
[i
].ctdb
,pdb_ch
[si
].nterpairs
);
1381 snew(chains
[i
].r_start
,pdb_ch
[si
].nterpairs
);
1382 snew(chains
[i
].r_end
,pdb_ch
[si
].nterpairs
);
1384 snew(chains
[i
].pdba
,1);
1385 init_t_atoms(chains
[i
].pdba
,pdb_ch
[si
].natom
,TRUE
);
1386 snew(chains
[i
].x
,chains
[i
].pdba
->nr
);
1387 for (j
=0; j
<chains
[i
].pdba
->nr
; j
++) {
1388 chains
[i
].pdba
->atom
[j
] = pdba_all
.atom
[pdb_ch
[si
].start
+j
];
1389 snew(chains
[i
].pdba
->atomname
[j
],1);
1390 *chains
[i
].pdba
->atomname
[j
] =
1391 strdup(*pdba_all
.atomname
[pdb_ch
[si
].start
+j
]);
1392 chains
[i
].pdba
->pdbinfo
[j
] = pdba_all
.pdbinfo
[pdb_ch
[si
].start
+j
];
1393 copy_rvec(pdbx
[pdb_ch
[si
].start
+j
],chains
[i
].x
[j
]);
1395 /* Re-index the residues assuming that the indices are continuous */
1396 k
= chains
[i
].pdba
->atom
[0].resind
;
1397 nres
= chains
[i
].pdba
->atom
[chains
[i
].pdba
->nr
-1].resind
- k
+ 1;
1398 chains
[i
].pdba
->nres
= nres
;
1399 for(j
=0; j
< chains
[i
].pdba
->nr
; j
++) {
1400 chains
[i
].pdba
->atom
[j
].resind
-= k
;
1402 srenew(chains
[i
].pdba
->resinfo
,nres
);
1403 for(j
=0; j
<nres
; j
++) {
1404 chains
[i
].pdba
->resinfo
[j
] = pdba_all
.resinfo
[k
+j
];
1405 snew(chains
[i
].pdba
->resinfo
[j
].name
,1);
1406 *chains
[i
].pdba
->resinfo
[j
].name
= strdup(*pdba_all
.resinfo
[k
+j
].name
);
1407 /* make all chain identifiers equal to that of the chain */
1408 chains
[i
].pdba
->resinfo
[j
].chainid
= pdb_ch
[si
].chainid
;
1413 printf("\nMerged %d chains into one molecule definition\n\n",
1414 pdb_ch
[0].nterpairs
);
1416 printf("There are %d chains and %d blocks of water and "
1417 "%d residues with %d atoms\n",
1418 nch
-nwaterchain
,nwaterchain
,
1419 pdba_all
.resinfo
[pdba_all
.atom
[natom
-1].resind
].nr
,natom
);
1421 printf("\n %5s %4s %6s\n","chain","#res","#atoms");
1422 for (i
=0; (i
<nch
); i
++)
1423 printf(" %d '%c' %5d %6d %s\n",
1424 i
+1, chains
[i
].chainid
? chains
[i
].chainid
:'-',
1425 chains
[i
].pdba
->nres
, chains
[i
].pdba
->nr
,
1426 chains
[i
].bAllWat
? "(only water)":"");
1429 check_occupancy(&pdba_all
,opt2fn("-f",NFILE
,fnm
),bVerbose
);
1431 /* Read atomtypes... */
1432 atype
= read_atype(ffdir
,&symtab
);
1434 /* read residue database */
1435 printf("Reading residue database... (%s)\n",forcefield
);
1436 nrtpf
= fflib_search_file_end(ffdir
,".rtp",TRUE
,&rtpf
);
1439 for(i
=0; i
<nrtpf
; i
++) {
1440 read_resall(rtpf
[i
],&nrtp
,&restp
,atype
,&symtab
,FALSE
);
1445 /* Not correct with multiple rtp input files with different bonded types */
1446 fp
=gmx_fio_fopen("new.rtp","w");
1447 print_resall(fp
,nrtp
,restp
,atype
);
1451 /* read hydrogen database */
1452 nah
= read_h_db(ffdir
,&ah
);
1454 /* Read Termini database... */
1455 nNtdb
=read_ter_db(ffdir
,'n',&ntdb
,atype
);
1456 nCtdb
=read_ter_db(ffdir
,'c',&ctdb
,atype
);
1458 top_fn
=ftp2fn(efTOP
,NFILE
,fnm
);
1459 top_file
=gmx_fio_fopen(top_fn
,"w");
1461 #ifdef PACKAGE_VERSION
1462 sprintf(generator
,"%s - version %s",ShortProgram(), PACKAGE_VERSION
);
1464 sprintf(generator
,"%s - version %s",ShortProgram(), "unknown" );
1466 print_top_header(top_file
,top_fn
,generator
,FALSE
,ffdir
,mHmult
);
1473 for(chain
=0; (chain
<nch
); chain
++) {
1474 cc
= &(chains
[chain
]);
1476 /* set pdba, natom and nres to the current chain */
1480 nres
=cc
->pdba
->nres
;
1482 if (cc
->chainid
&& ( cc
->chainid
!= ' ' ) )
1483 printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1484 chain
+1,cc
->chainid
,natom
,nres
);
1486 printf("Processing chain %d (%d atoms, %d residues)\n",
1487 chain
+1,natom
,nres
);
1489 process_chain(pdba
,x
,bUnA
,bUnA
,bUnA
,bLysMan
,bAspMan
,bGluMan
,
1490 bHisMan
,bArgMan
,bGlnMan
,angle
,distance
,&symtab
,
1491 nrtprename
,rtprename
);
1493 for(i
=0; i
<cc
->nterpairs
; i
++) {
1495 cc
->chainstart
[cc
->nterpairs
] = pdba
->nres
;
1497 find_nc_ter(pdba
,cc
->chainstart
[i
],cc
->chainstart
[i
+1],
1498 &(cc
->r_start
[i
]),&(cc
->r_end
[i
]),rt
);
1501 if ( (cc
->r_start
[i
]<0) || (cc
->r_end
[i
]<0) ) {
1502 printf("Problem with chain definition, or missing terminal residues.\n"
1503 "This chain does not appear to contain a recognized chain molecule.\n"
1504 "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
1511 /* Check for disulfides and other special bonds */
1512 nssbonds
= mk_specbonds(pdba
,x
,bCysMan
,&ssbonds
,bVerbose
);
1514 if (nrtprename
> 0) {
1515 rename_resrtp(pdba
,cc
->nterpairs
,cc
->r_start
,cc
->r_end
,nrtprename
,rtprename
,
1521 sprintf(fn
,"chain.pdb");
1523 sprintf(fn
,"chain_%c%d.pdb",cc
->chainid
,cc
->chainnum
);
1525 write_sto_conf(fn
,title
,pdba
,x
,NULL
,ePBC
,box
);
1529 for(i
=0; i
<cc
->nterpairs
; i
++)
1533 * We first apply a filter so we only have the
1534 * termini that can be applied to the residue in question
1535 * (or a generic terminus if no-residue specific is available).
1537 /* First the N terminus */
1540 tdblist
= filter_ter(nrtp
,restp
,nNtdb
,ntdb
,
1541 *pdba
->resinfo
[cc
->r_start
[i
]].name
,
1542 *pdba
->resinfo
[cc
->r_start
[i
]].rtp
,
1546 printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
1547 "is already in a terminus-specific form and skipping terminus selection.\n");
1552 if(bTerMan
&& ntdblist
>1)
1554 cc
->ntdb
[i
] = choose_ter(ntdblist
,tdblist
,"Select start terminus type");
1558 cc
->ntdb
[i
] = tdblist
[0];
1561 printf("Start terminus: %s\n",(cc
->ntdb
[i
])->name
);
1570 /* And the C terminus */
1573 tdblist
= filter_ter(nrtp
,restp
,nCtdb
,ctdb
,
1574 *pdba
->resinfo
[cc
->r_start
[i
]].name
,
1575 *pdba
->resinfo
[cc
->r_end
[i
]].rtp
,
1579 printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
1580 "is already in a terminus-specific form and skipping terminus selection.\n");
1585 if(bTerMan
&& ntdblist
>1)
1587 cc
->ctdb
[i
] = choose_ter(ntdblist
,tdblist
,"Select end terminus type");
1591 cc
->ctdb
[i
] = tdblist
[0];
1593 printf("End terminus: %s\n",(cc
->ctdb
[i
])->name
);
1602 /* lookup hackblocks and rtp for all residues */
1603 get_hackblocks_rtp(&hb_chain
, &restp_chain
,
1604 nrtp
, restp
, pdba
->nres
, pdba
->resinfo
,
1605 cc
->nterpairs
, cc
->ntdb
, cc
->ctdb
, cc
->r_start
, cc
->r_end
);
1606 /* ideally, now we would not need the rtp itself anymore, but do
1607 everything using the hb and restp arrays. Unfortunately, that
1608 requires some re-thinking of code in gen_vsite.c, which I won't
1609 do now :( AF 26-7-99 */
1611 rename_atoms(NULL
,ffdir
,
1612 pdba
,&symtab
,restp_chain
,FALSE
,rt
,FALSE
,bVerbose
);
1614 match_atomnames_with_rtp(restp_chain
,hb_chain
,pdba
,x
,bVerbose
);
1617 block
= new_blocka();
1619 sort_pdbatoms(pdba
->nres
,restp_chain
,hb_chain
,
1620 natom
,&pdba
,&x
,block
,&gnames
);
1621 natom
= remove_duplicate_atoms(pdba
,x
,bVerbose
);
1622 if (ftp2bSet(efNDX
,NFILE
,fnm
)) {
1624 fprintf(stderr
,"WARNING: with the -remh option the generated "
1625 "index file (%s) might be useless\n"
1626 "(the index file is generated before hydrogens are added)",
1627 ftp2fn(efNDX
,NFILE
,fnm
));
1629 write_index(ftp2fn(efNDX
,NFILE
,fnm
),block
,gnames
);
1631 for(i
=0; i
< block
->nr
; i
++)
1636 fprintf(stderr
,"WARNING: "
1637 "without sorting no check for duplicate atoms can be done\n");
1640 /* Generate Hydrogen atoms (and termini) in the sequence */
1641 natom
=add_h(&pdba
,&x
,nah
,ah
,
1642 cc
->nterpairs
,cc
->ntdb
,cc
->ctdb
,cc
->r_start
,cc
->r_end
,bAllowMissing
,
1643 NULL
,NULL
,TRUE
,FALSE
);
1644 printf("Now there are %d residues with %d atoms\n",
1645 pdba
->nres
,pdba
->nr
);
1646 if (debug
) write_pdbfile(debug
,title
,pdba
,x
,ePBC
,box
,' ',0,NULL
,TRUE
);
1649 for(i
=0; (i
<natom
); i
++)
1650 fprintf(debug
,"Res %s%d atom %d %s\n",
1651 *(pdba
->resinfo
[pdba
->atom
[i
].resind
].name
),
1652 pdba
->resinfo
[pdba
->atom
[i
].resind
].nr
,i
+1,*pdba
->atomname
[i
]);
1654 strcpy(posre_fn
,ftp2fn(efITP
,NFILE
,fnm
));
1656 /* make up molecule name(s) */
1658 k
= (cc
->nterpairs
>0 && cc
->r_start
[0]>=0) ? cc
->r_start
[0] : 0;
1660 gmx_residuetype_get_type(rt
,*pdba
->resinfo
[k
].name
,&p_restype
);
1666 sprintf(molname
,"Water");
1670 this_chainid
= cc
->chainid
;
1672 /* Add the chain id if we have one */
1673 if(this_chainid
!= ' ')
1675 sprintf(buf
,"_chain_%c",this_chainid
);
1679 /* Check if there have been previous chains with the same id */
1681 for(k
=0;k
<chain
;k
++)
1683 if(cc
->chainid
== chains
[k
].chainid
)
1688 /* Add the number for this chain identifier if there are multiple copies */
1692 sprintf(buf
,"%d",nid_used
+1);
1696 if(strlen(suffix
)>0)
1698 sprintf(molname
,"%s%s",p_restype
,suffix
);
1702 strcpy(molname
,p_restype
);
1706 if ((nch
-nwaterchain
>1) && !cc
->bAllWat
) {
1708 strcpy(itp_fn
,top_fn
);
1709 printf("Chain time...\n");
1710 c
=strrchr(itp_fn
,'.');
1711 sprintf(c
,"_%s.itp",molname
);
1712 c
=strrchr(posre_fn
,'.');
1713 sprintf(c
,"_%s.itp",molname
);
1714 if (strcmp(itp_fn
,posre_fn
) == 0) {
1715 strcpy(buf_fn
,posre_fn
);
1716 c
= strrchr(buf_fn
,'.');
1718 sprintf(posre_fn
,"%s_pr.itp",buf_fn
);
1722 srenew(incls
,nincl
);
1723 incls
[nincl
-1]=strdup(itp_fn
);
1724 itp_file
=gmx_fio_fopen(itp_fn
,"w");
1728 srenew(mols
,nmol
+1);
1730 mols
[nmol
].name
= strdup("SOL");
1731 mols
[nmol
].nr
= pdba
->nres
;
1733 mols
[nmol
].name
= strdup(molname
);
1739 print_top_comment(itp_file
,itp_fn
,generator
,TRUE
);
1749 pdb2top(top_file2
,posre_fn
,molname
,pdba
,&x
,atype
,&symtab
,
1751 restp_chain
,hb_chain
,
1752 cc
->nterpairs
,cc
->ntdb
,cc
->ctdb
,cc
->r_start
,cc
->r_end
,bAllowMissing
,
1753 bVsites
,bVsiteAromatics
,forcefield
,ffdir
,
1754 mHmult
,nssbonds
,ssbonds
,
1755 long_bond_dist
,short_bond_dist
,bDeuterate
,bChargeGroups
,bCmap
,
1756 bRenumRes
,bRTPresname
);
1759 write_posres(posre_fn
,pdba
,posre_fc
);
1762 gmx_fio_fclose(itp_file
);
1764 /* pdba and natom have been reassigned somewhere so: */
1769 if (cc
->chainid
== ' ')
1770 sprintf(fn
,"chain.pdb");
1772 sprintf(fn
,"chain_%c.pdb",cc
->chainid
);
1773 cool_quote(quote
,255,NULL
);
1774 write_sto_conf(fn
,quote
,pdba
,x
,NULL
,ePBC
,box
);
1778 if (watermodel
== NULL
) {
1779 for(chain
=0; chain
<nch
; chain
++) {
1780 if (chains
[chain
].bAllWat
) {
1781 gmx_fatal(FARGS
,"You have chosen not to include a water model, but there is water in the input file. Select a water model or remove the water from your input file.");
1785 sprintf(buf_fn
,"%s%c%s.itp",ffdir
,DIR_SEPARATOR
,watermodel
);
1786 if (!fflib_fexist(buf_fn
)) {
1787 gmx_fatal(FARGS
,"The topology file '%s' for the selected water model '%s' can not be found in the force field directory. Select a different water model.",
1792 print_top_mols(top_file
,title
,ffdir
,watermodel
,nincl
,incls
,nmol
,mols
);
1793 gmx_fio_fclose(top_file
);
1795 gmx_residuetype_destroy(rt
);
1797 /* now merge all chains back together */
1800 for (i
=0; (i
<nch
); i
++) {
1801 natom
+=chains
[i
].pdba
->nr
;
1802 nres
+=chains
[i
].pdba
->nres
;
1805 init_t_atoms(atoms
,natom
,FALSE
);
1806 for(i
=0; i
< atoms
->nres
; i
++)
1807 sfree(atoms
->resinfo
[i
].name
);
1808 sfree(atoms
->resinfo
);
1810 snew(atoms
->resinfo
,nres
);
1814 for (i
=0; (i
<nch
); i
++) {
1816 printf("Including chain %d in system: %d atoms %d residues\n",
1817 i
+1,chains
[i
].pdba
->nr
,chains
[i
].pdba
->nres
);
1818 for (j
=0; (j
<chains
[i
].pdba
->nr
); j
++) {
1819 atoms
->atom
[k
]=chains
[i
].pdba
->atom
[j
];
1820 atoms
->atom
[k
].resind
+= l
; /* l is processed nr of residues */
1821 atoms
->atomname
[k
]=chains
[i
].pdba
->atomname
[j
];
1822 atoms
->resinfo
[atoms
->atom
[k
].resind
].chainid
= chains
[i
].chainid
;
1823 copy_rvec(chains
[i
].x
[j
],x
[k
]);
1826 for (j
=0; (j
<chains
[i
].pdba
->nres
); j
++) {
1827 atoms
->resinfo
[l
] = chains
[i
].pdba
->resinfo
[j
];
1829 atoms
->resinfo
[l
].name
= atoms
->resinfo
[l
].rtp
;
1836 fprintf(stderr
,"Now there are %d atoms and %d residues\n",k
,l
);
1837 print_sums(atoms
, TRUE
);
1840 fprintf(stderr
,"\nWriting coordinate file...\n");
1841 clear_rvec(box_space
);
1843 gen_box(0,atoms
->nr
,x
,box
,box_space
,FALSE
);
1844 write_sto_conf(ftp2fn(efSTO
,NFILE
,fnm
),title
,atoms
,x
,NULL
,ePBC
,box
);
1846 printf("\t\t--------- PLEASE NOTE ------------\n");
1847 printf("You have successfully generated a topology from: %s.\n",
1848 opt2fn("-f",NFILE
,fnm
));
1849 if (watermodel
!= NULL
) {
1850 printf("The %s force field and the %s water model are used.\n",
1853 printf("The %s force field is used.\n",
1856 printf("\t\t--------- ETON ESAELP ------------\n");