fixed pdb2gmx behavior for nearly matching residue names and made the matching much...
[gromacs.git] / src / kernel / pdb2gmx.c
blob9c22d9207b6d2cdd4a55e0337dc71fe7222fb020
1 /*
2 * $id$
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * VERSION 3.2.0
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
33 * And Hey:
34 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
40 #include <time.h>
41 #include <ctype.h>
42 #include "sysstuff.h"
43 #include "typedefs.h"
44 #include "gmxfio.h"
45 #include "smalloc.h"
46 #include "copyrite.h"
47 #include "string2.h"
48 #include "confio.h"
49 #include "symtab.h"
50 #include "vec.h"
51 #include "statutil.h"
52 #include "futil.h"
53 #include "gmx_fatal.h"
54 #include "pdbio.h"
55 #include "toputil.h"
56 #include "h_db.h"
57 #include "physics.h"
58 #include "pgutil.h"
59 #include "calch.h"
60 #include "resall.h"
61 #include "pdb2top.h"
62 #include "ter_db.h"
63 #include "strdb.h"
64 #include "gbutil.h"
65 #include "genhydro.h"
66 #include "readinp.h"
67 #include "atomprop.h"
68 #include "xlate.h"
69 #include "specbond.h"
70 #include "index.h"
71 #include "hizzie.h"
72 #include "fflibutil.h"
75 typedef struct {
76 char gmx[6];
77 char main[6];
78 char nter[6];
79 char cter[6];
80 char bter[6];
81 } rtprename_t;
84 static const char *res2bb_notermini(const char *name,
85 int nrr,const rtprename_t *rr)
87 /* NOTE: This function returns the main building block name,
88 * it does not take terminal renaming into account.
90 int i;
92 i = 0;
93 while (i < nrr && gmx_strcasecmp(name,rr[i].gmx) != 0) {
94 i++;
97 return (i < nrr ? rr[i].main : name);
100 static const char *select_res(int nr,int resnr,
101 const char *name[],const char *expl[],
102 const char *title,
103 int nrr,const rtprename_t *rr)
105 int sel=0;
107 printf("Which %s type do you want for residue %d\n",title,resnr+1);
108 for(sel=0; (sel < nr); sel++) {
109 printf("%d. %s (%s)\n",
110 sel,expl[sel],res2bb_notermini(name[sel],nrr,rr));
112 printf("\nType a number:"); fflush(stdout);
114 if (scanf("%d",&sel) != 1)
115 gmx_fatal(FARGS,"Answer me for res %s %d!",title,resnr+1);
117 return name[sel];
120 static const char *get_asptp(int resnr,int nrr,const rtprename_t *rr)
122 enum { easp, easpH, easpNR };
123 const char *lh[easpNR] = { "ASP", "ASPH" };
124 const char *expl[easpNR] = {
125 "Not protonated (charge -1)",
126 "Protonated (charge 0)"
129 return select_res(easpNR,resnr,lh,expl,"ASPARTIC ACID",nrr,rr);
132 static const char *get_glutp(int resnr,int nrr,const rtprename_t *rr)
134 enum { eglu, egluH, egluNR };
135 const char *lh[egluNR] = { "GLU", "GLUH" };
136 const char *expl[egluNR] = {
137 "Not protonated (charge -1)",
138 "Protonated (charge 0)"
141 return select_res(egluNR,resnr,lh,expl,"GLUTAMIC ACID",nrr,rr);
144 static const char *get_glntp(int resnr,int nrr,const rtprename_t *rr)
146 enum { egln, eglnH, eglnNR };
147 const char *lh[eglnNR] = { "GLN", "QLN" };
148 const char *expl[eglnNR] = {
149 "Not protonated (charge 0)",
150 "Protonated (charge +1)"
153 return select_res(eglnNR,resnr,lh,expl,"GLUTAMINE",nrr,rr);
156 static const char *get_lystp(int resnr,int nrr,const rtprename_t *rr)
158 enum { elys, elysH, elysNR };
159 const char *lh[elysNR] = { "LYSN", "LYS" };
160 const char *expl[elysNR] = {
161 "Not protonated (charge 0)",
162 "Protonated (charge +1)"
165 return select_res(elysNR,resnr,lh,expl,"LYSINE",nrr,rr);
168 static const char *get_argtp(int resnr,int nrr,const rtprename_t *rr)
170 enum { earg, eargH, eargNR };
171 const char *lh[eargNR] = { "ARGN", "ARG" };
172 const char *expl[eargNR] = {
173 "Not protonated (charge 0)",
174 "Protonated (charge +1)"
177 return select_res(eargNR,resnr,lh,expl,"ARGININE",nrr,rr);
180 static const char *get_cystp(int resnr,int nrr,const rtprename_t *rr)
182 enum { ecys, ecysH, ecysNR };
183 const char *lh[ecysNR] = { "CYS2", "CYS" };
184 const char *expl[ecysNR] = {
185 "Cysteine in disulfide bridge",
186 "Protonated"
189 return select_res(ecysNR,resnr,lh,expl,"CYSTEINE",nrr,rr);
193 static const char *get_histp(int resnr,int nrr,const rtprename_t *rr)
195 const char *expl[ehisNR] = {
196 "H on ND1 only",
197 "H on NE2 only",
198 "H on ND1 and NE2",
199 "Coupled to Heme"
202 return select_res(ehisNR,resnr,hh,expl,"HISTIDINE",nrr,rr);
205 static void read_rtprename(const char *fname,FILE *fp,
206 int *nrtprename,rtprename_t **rtprename)
208 char line[STRLEN],buf[STRLEN];
209 int n;
210 rtprename_t *rr;
211 int ncol,nc;
213 n = *nrtprename;
214 rr = *rtprename;
216 ncol = 0;
217 while(get_a_line(fp,line,STRLEN)) {
218 srenew(rr,n+1);
219 nc = sscanf(line,"%s %s %s %s %s %s",
220 rr[n].gmx,rr[n].main,rr[n].nter,rr[n].cter,rr[n].bter,buf);
221 if (ncol == 0) {
222 if (nc != 2 && nc != 5) {
223 gmx_fatal(FARGS,"Residue renaming database '%s' has %d columns instead of %d, %d or %d",fname,ncol,2,5);
225 ncol = nc;
226 } else if (nc != ncol) {
227 gmx_fatal(FARGS,"A line in residue renaming database '%s' has %d columns, while previous lines have %d columns",fname,nc,ncol);
230 if (nc == 2) {
231 /* This file does not have special termini names, copy them from main */
232 strcpy(rr[n].nter,rr[n].main);
233 strcpy(rr[n].cter,rr[n].main);
234 strcpy(rr[n].bter,rr[n].main);
237 n++;
240 *nrtprename = n;
241 *rtprename = rr;
244 static void rename_resrtp(t_atoms *pdba,int nterpairs,int *r_start,int *r_end,
245 int nrr,rtprename_t *rr,t_symtab *symtab,
246 bool bVerbose)
248 int r,i,j;
249 bool bStart,bEnd;
250 char *nn;
252 for(r=0; r<pdba->nres; r++) {
253 i = 0;
254 while(i<nrr && strcmp(*pdba->resinfo[r].rtp,rr[i].gmx) != 0) {
255 i++;
258 /* If found in the database, rename this residue's rtp building block,
259 * otherwise keep the old name.
261 if (i < nrr) {
262 bStart = FALSE;
263 bEnd = FALSE;
264 for(j=0; j<nterpairs; j++) {
265 if (r == r_start[j]) {
266 bStart = TRUE;
269 for(j=0; j<nterpairs; j++) {
270 if (r == r_end[j]) {
271 bEnd = TRUE;
274 if (bStart && bEnd) {
275 nn = rr[i].bter;
276 } else if (bStart) {
277 nn = rr[i].nter;
278 } else if (bEnd) {
279 nn = rr[i].cter;
280 } else {
281 nn = rr[i].main;
283 if (nn[0] == '-') {
284 gmx_fatal(FARGS,"In the chosen force field there is no residue type for '%s'%s",pdba->resinfo[r].rtp,bStart ? " as a starting terminus" : (bEnd ? " as an ending terminus" : ""));
286 if (strcmp(*pdba->resinfo[r].rtp,nn) != 0) {
287 if (bVerbose) {
288 printf("Changing rtp entry of residue %d %s to '%s'\n",
289 pdba->resinfo[r].nr,*pdba->resinfo[r].name,nn);
291 pdba->resinfo[r].rtp = put_symtab(symtab,nn);
297 static void pdbres_to_gmxrtp(t_atoms *pdba)
299 int i;
301 for(i=0; (i<pdba->nres); i++) {
302 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
306 static void rename_pdbres(t_atoms *pdba,const char *oldnm,const char *newnm,
307 bool bFullCompare,t_symtab *symtab)
309 char *resnm;
310 int i;
312 for(i=0; (i<pdba->nres); i++) {
313 resnm = *pdba->resinfo[i].name;
314 if ((bFullCompare && (gmx_strcasecmp(resnm,oldnm) == 0)) ||
315 (!bFullCompare && strstr(resnm,oldnm) != NULL)) {
316 pdba->resinfo[i].name = put_symtab(symtab,newnm);
321 static void rename_bb(t_atoms *pdba,const char *oldnm,const char *newnm,
322 bool bFullCompare,t_symtab *symtab)
324 char *bbnm;
325 int i;
327 for(i=0; (i<pdba->nres); i++) {
328 bbnm = *pdba->resinfo[i].rtp;
329 if ((bFullCompare && (gmx_strcasecmp(bbnm,oldnm) == 0)) ||
330 (!bFullCompare && strstr(bbnm,oldnm) != NULL)) {
331 pdba->resinfo[i].rtp = put_symtab(symtab,newnm);
336 static void rename_bbint(t_atoms *pdba,const char *oldnm,
337 const char *gettp(int,int,const rtprename_t *),
338 bool bFullCompare,
339 t_symtab *symtab,
340 int nrr,const rtprename_t *rr)
342 int i;
343 const char *ptr;
344 char *bbnm;
346 for(i=0; i<pdba->nres; i++) {
347 bbnm = *pdba->resinfo[i].rtp;
348 if ((bFullCompare && (strcmp(bbnm,oldnm) == 0)) ||
349 (!bFullCompare && strstr(bbnm,oldnm) != NULL)) {
350 ptr = gettp(i,nrr,rr);
351 pdba->resinfo[i].rtp = put_symtab(symtab,ptr);
356 static void check_occupancy(t_atoms *atoms,const char *filename,bool bVerbose)
358 int i,ftp;
359 int nzero=0;
360 int nnotone=0;
362 ftp = fn2ftp(filename);
363 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
364 fprintf(stderr,"No occupancies in %s\n",filename);
365 else {
366 for(i=0; (i<atoms->nr); i++) {
367 if (atoms->pdbinfo[i].occup != 1) {
368 if (bVerbose)
369 fprintf(stderr,"Occupancy for atom %s%d-%s is %f rather than 1\n",
370 *atoms->resinfo[atoms->atom[i].resind].name,
371 atoms->resinfo[ atoms->atom[i].resind].nr,
372 *atoms->atomname[i],
373 atoms->pdbinfo[i].occup);
374 if (atoms->pdbinfo[i].occup == 0)
375 nzero++;
376 else
377 nnotone++;
380 if (nzero == atoms->nr) {
381 fprintf(stderr,"All occupancy fields zero. This is probably not an X-Ray structure\n");
382 } else if ((nzero > 0) || (nnotone > 0)) {
383 fprintf(stderr,
384 "\n"
385 "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
386 " occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
387 "\n",
388 nzero,nnotone,atoms->nr);
389 } else {
390 fprintf(stderr,"All occupancies are one\n");
395 void write_posres(char *fn,t_atoms *pdba,real fc)
397 FILE *fp;
398 int i;
400 fp=gmx_fio_fopen(fn,"w");
401 fprintf(fp,
402 "; In this topology include file, you will find position restraint\n"
403 "; entries for all the heavy atoms in your original pdb file.\n"
404 "; This means that all the protons which were added by pdb2gmx are\n"
405 "; not restrained.\n"
406 "\n"
407 "[ position_restraints ]\n"
408 "; %4s%6s%8s%8s%8s\n","atom","type","fx","fy","fz"
410 for(i=0; (i<pdba->nr); i++) {
411 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
412 fprintf(fp,"%6d%6d %g %g %g\n",i+1,1,fc,fc,fc);
414 gmx_fio_fclose(fp);
417 static int read_pdball(const char *inf, const char *outf,char *title,
418 t_atoms *atoms, rvec **x,
419 int *ePBC,matrix box, bool bRemoveH,
420 t_symtab *symtab,gmx_residuetype_t rt,const char *watres,
421 gmx_atomprop_t aps,bool bVerbose)
422 /* Read a pdb file. (containing proteins) */
424 int natom,new_natom,i;
426 /* READ IT */
427 printf("Reading %s...\n",inf);
428 get_stx_coordnum(inf,&natom);
429 init_t_atoms(atoms,natom,TRUE);
430 snew(*x,natom);
431 read_stx_conf(inf,title,atoms,*x,NULL,ePBC,box);
432 if (fn2ftp(inf) == efPDB)
433 get_pdb_atomnumber(atoms,aps);
434 if (bRemoveH) {
435 new_natom=0;
436 for(i=0; i<atoms->nr; i++)
437 if (!is_hydrogen(*atoms->atomname[i])) {
438 atoms->atom[new_natom]=atoms->atom[i];
439 atoms->atomname[new_natom]=atoms->atomname[i];
440 atoms->pdbinfo[new_natom]=atoms->pdbinfo[i];
441 copy_rvec((*x)[i],(*x)[new_natom]);
442 new_natom++;
444 atoms->nr=new_natom;
445 natom=new_natom;
448 printf("Read");
449 if (title && title[0])
450 printf(" '%s',",title);
451 printf(" %d atoms\n",natom);
453 /* Rename residues */
454 rename_pdbres(atoms,"HOH",watres,FALSE,symtab);
455 rename_pdbres(atoms,"SOL",watres,FALSE,symtab);
456 rename_pdbres(atoms,"WAT",watres,FALSE,symtab);
458 rename_atoms("xlateat.dat",NULL,
459 atoms,symtab,NULL,TRUE,rt,TRUE,bVerbose);
461 if (natom == 0)
462 return 0;
464 if (outf)
465 write_sto_conf(outf,title,atoms,*x,NULL,*ePBC,box);
467 return natom;
470 void process_chain(t_atoms *pdba, rvec *x,
471 bool bTrpU,bool bPheU,bool bTyrU,
472 bool bLysMan,bool bAspMan,bool bGluMan,
473 bool bHisMan,bool bArgMan,bool bGlnMan,
474 real angle,real distance,t_symtab *symtab,
475 int nrr,const rtprename_t *rr)
477 /* Initialize the rtp builing block names with the residue names */
478 pdbres_to_gmxrtp(pdba);
480 /* Rename aromatics, lys, asp and histidine */
481 if (bTyrU) rename_bb(pdba,"TYR","TYRU",FALSE,symtab);
482 if (bTrpU) rename_bb(pdba,"TRP","TRPU",FALSE,symtab);
483 if (bPheU) rename_bb(pdba,"PHE","PHEU",FALSE,symtab);
484 if (bLysMan)
485 rename_bbint(pdba,"LYS",get_lystp,FALSE,symtab,nrr,rr);
486 if (bArgMan)
487 rename_bbint(pdba,"ARG",get_argtp,FALSE,symtab,nrr,rr);
488 if (bGlnMan)
489 rename_bbint(pdba,"GLN",get_glntp,FALSE,symtab,nrr,rr);
490 if (bAspMan)
491 rename_bbint(pdba,"ASP",get_asptp,FALSE,symtab,nrr,rr);
492 else
493 rename_bb(pdba,"ASPH","ASP",FALSE,symtab);
494 if (bGluMan)
495 rename_bbint(pdba,"GLU",get_glutp,FALSE,symtab,nrr,rr);
496 else
497 rename_bb(pdba,"GLUH","GLU",FALSE,symtab);
499 if (!bHisMan)
500 set_histp(pdba,x,angle,distance);
501 else
502 rename_bbint(pdba,"HIS",get_histp,TRUE,symtab,nrr,rr);
505 /* struct for sorting the atoms from the pdb file */
506 typedef struct {
507 int resnr; /* residue number */
508 int j; /* database order index */
509 int index; /* original atom number */
510 char anm1; /* second letter of atom name */
511 char altloc; /* alternate location indicator */
512 } t_pdbindex;
514 int pdbicomp(const void *a,const void *b)
516 t_pdbindex *pa,*pb;
517 int d;
519 pa=(t_pdbindex *)a;
520 pb=(t_pdbindex *)b;
522 d = (pa->resnr - pb->resnr);
523 if (d==0) {
524 d = (pa->j - pb->j);
525 if (d==0) {
526 d = (pa->anm1 - pb->anm1);
527 if (d==0)
528 d = (pa->altloc - pb->altloc);
532 return d;
535 static void sort_pdbatoms(int nrtp,t_restp restp[],t_hackblock hb[],
536 int natoms,t_atoms **pdbaptr,rvec **x,
537 t_blocka *block,char ***gnames)
539 t_atoms *pdba,*pdbnew;
540 rvec **xnew;
541 int i,j;
542 t_restp *rptr;
543 t_hackblock *hbr;
544 t_pdbindex *pdbi;
545 atom_id *a;
546 char *atomnm;
548 pdba=*pdbaptr;
549 natoms=pdba->nr;
550 pdbnew=NULL;
551 snew(xnew,1);
552 snew(pdbi, natoms);
554 for(i=0; i<natoms; i++) {
555 atomnm = *pdba->atomname[i];
556 rptr = &restp[pdba->atom[i].resind];
557 for(j=0; (j<rptr->natom); j++) {
558 if (gmx_strcasecmp(atomnm,*(rptr->atomname[j])) == 0) {
559 break;
562 if (j==rptr->natom) {
563 if ( ( ( pdba->atom[i].resind == 0) && (atomnm[0] == 'H') &&
564 ( (atomnm[1] == '1') || (atomnm[1] == '2') ||
565 (atomnm[1] == '3') ) ) )
566 j=1;
567 else {
568 char buf[STRLEN];
570 sprintf(buf,"Atom %s in residue %s %d not found in rtp entry %s with %d atoms\n"
571 "while sorting atoms%s",atomnm,
572 *pdba->resinfo[pdba->atom[i].resind].name,
573 pdba->resinfo[pdba->atom[i].resind].nr,
574 rptr->resname,
575 rptr->natom,
576 is_hydrogen(atomnm) ? ". Maybe different protonation state.\n"
577 " Remove this hydrogen or choose a different "
578 "protonation state.\n"
579 " Option -ignh will ignore all hydrogens "
580 "in the input." : "");
581 gmx_fatal(FARGS,buf);
584 /* make shadow array to be sorted into indexgroup */
585 pdbi[i].resnr = pdba->atom[i].resind;
586 pdbi[i].j = j;
587 pdbi[i].index = i;
588 pdbi[i].anm1 = atomnm[1];
589 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
591 qsort(pdbi,natoms,(size_t)sizeof(pdbi[0]),pdbicomp);
593 /* pdba is sorted in pdbnew using the pdbi index */
594 snew(a,natoms);
595 snew(pdbnew,1);
596 init_t_atoms(pdbnew,natoms,TRUE);
597 snew(*xnew,natoms);
598 pdbnew->nr=pdba->nr;
599 pdbnew->nres=pdba->nres;
600 sfree(pdbnew->resinfo);
601 pdbnew->resinfo=pdba->resinfo;
602 for (i=0; i<natoms; i++) {
603 pdbnew->atom[i] = pdba->atom[pdbi[i].index];
604 pdbnew->atomname[i] = pdba->atomname[pdbi[i].index];
605 pdbnew->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
606 copy_rvec((*x)[pdbi[i].index],(*xnew)[i]);
607 /* make indexgroup in block */
608 a[i]=pdbi[i].index;
610 /* clean up */
611 sfree(pdba->atomname);
612 sfree(pdba->atom);
613 sfree(pdba->pdbinfo);
614 sfree(pdba);
615 sfree(*x);
616 /* copy the sorted pdbnew back to pdba */
617 *pdbaptr=pdbnew;
618 *x=*xnew;
619 add_grp(block, gnames, natoms, a, "prot_sort");
620 sfree(xnew);
621 sfree(a);
622 sfree(pdbi);
625 static int remove_duplicate_atoms(t_atoms *pdba,rvec x[],bool bVerbose)
627 int i,j,oldnatoms,ndel;
628 t_resinfo *ri;
630 printf("Checking for duplicate atoms....\n");
631 oldnatoms = pdba->nr;
632 ndel = 0;
633 /* NOTE: pdba->nr is modified inside the loop */
634 for(i=1; (i < pdba->nr); i++) {
635 /* compare 'i' and 'i-1', throw away 'i' if they are identical
636 this is a 'while' because multiple alternate locations can be present */
637 while ( (i < pdba->nr) &&
638 (pdba->atom[i-1].resind == pdba->atom[i].resind) &&
639 (strcmp(*pdba->atomname[i-1],*pdba->atomname[i])==0) ) {
640 ndel++;
641 if (bVerbose) {
642 ri = &pdba->resinfo[pdba->atom[i].resind];
643 printf("deleting duplicate atom %4s %s%4d%c",
644 *pdba->atomname[i],*ri->name,ri->nr,ri->ic);
645 if (ri->chainid && (ri->chainid != ' '))
646 printf(" ch %c", ri->chainid);
647 if (pdba->pdbinfo) {
648 if (pdba->pdbinfo[i].atomnr)
649 printf(" pdb nr %4d",pdba->pdbinfo[i].atomnr);
650 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc!=' '))
651 printf(" altloc %c",pdba->pdbinfo[i].altloc);
653 printf("\n");
655 pdba->nr--;
656 /* We can not free, since it might be in the symtab */
657 /* sfree(pdba->atomname[i]); */
658 for (j=i; j < pdba->nr; j++) {
659 pdba->atom[j] = pdba->atom[j+1];
660 pdba->atomname[j] = pdba->atomname[j+1];
661 pdba->pdbinfo[j] = pdba->pdbinfo[j+1];
662 copy_rvec(x[j+1],x[j]);
664 srenew(pdba->atom, pdba->nr);
665 /* srenew(pdba->atomname, pdba->nr); */
666 srenew(pdba->pdbinfo, pdba->nr);
669 if (pdba->nr != oldnatoms)
670 printf("Now there are %d atoms. Deleted %d duplicates.\n",pdba->nr,ndel);
672 return pdba->nr;
675 void find_nc_ter(t_atoms *pdba,int r0,int r1,int *r_start,int *r_end,gmx_residuetype_t rt)
677 int i;
678 const char *p_startrestype;
679 const char *p_restype;
680 int nstartwarn,nendwarn;
682 *r_start = -1;
683 *r_end = -1;
685 nstartwarn = 0;
686 nendwarn = 0;
688 /* Find the starting terminus (typially N or 5') */
689 for(i=r0;i<r1 && *r_start==-1;i++)
691 gmx_residuetype_get_type(rt,*pdba->resinfo[i].name,&p_startrestype);
692 if( !gmx_strcasecmp(p_startrestype,"Protein") || !gmx_strcasecmp(p_startrestype,"DNA") || !gmx_strcasecmp(p_startrestype,"RNA") )
694 printf("Identified residue %s%d as a starting terminus.\n",*pdba->resinfo[i].name,pdba->resinfo[i].nr);
695 *r_start=i;
697 else
699 if(nstartwarn < 5)
701 printf("Warning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n",*pdba->resinfo[i].name,pdba->resinfo[i].nr);
703 if(nstartwarn == 5)
705 printf("More than 5 unidentified residues at start of chain - disabling further warnings.\n");
707 nstartwarn++;
711 if(*r_start>=0)
713 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
714 for(i=*r_start;i<r1;i++)
716 gmx_residuetype_get_type(rt,*pdba->resinfo[i].name,&p_restype);
717 if( !gmx_strcasecmp(p_restype,p_startrestype) && nendwarn==0)
719 *r_end=i;
721 else
723 if(nendwarn < 5)
725 printf("Warning: Residue %s%d in chain has different type (%s) from starting residue %s%d (%s).\n",
726 *pdba->resinfo[i].name,pdba->resinfo[i].nr,p_restype,
727 *pdba->resinfo[*r_start].name,pdba->resinfo[*r_start].nr,p_startrestype);
729 if(nendwarn == 5)
731 printf("More than 5 unidentified residues at end of chain - disabling further warnings.\n");
733 nendwarn++;
738 if(*r_end>=0)
740 printf("Identified residue %s%d as a ending terminus.\n",*pdba->resinfo[*r_end].name,pdba->resinfo[*r_end].nr);
745 static void
746 modify_chain_numbers(t_atoms * pdba,
747 const char * chainsep)
749 int i;
750 char old_prev_chainid;
751 char old_this_chainid;
752 int old_prev_chainnum;
753 int old_this_chainnum;
754 t_resinfo *ri;
755 int new_chainnum;
757 enum
759 SPLIT_ID_OR_TER,
760 SPLIT_ID_AND_TER,
761 SPLIT_ID_ONLY,
762 SPLIT_TER_ONLY
764 splitting;
766 printf("Splitting PDB chains based on ");
767 splitting = SPLIT_TER_ONLY; /* keep compiler happy */
769 /* Be a bit flexible to catch typos */
770 if (!strncmp(chainsep,"id_o",4) || !strncmp(chainsep,"int",3))
772 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
773 splitting = SPLIT_ID_OR_TER;
774 printf("TER records or changing chain id.\n");
776 else if (!strncmp(chainsep,"id_a",4))
778 splitting = SPLIT_ID_AND_TER;
779 printf("TER records and chain id.\n");
781 else if (strlen(chainsep)==2 && !strncmp(chainsep,"id",4))
783 splitting = SPLIT_ID_ONLY;
784 printf("changing chain id only (ignoring TER records).\n");
786 else if (chainsep[0]=='t')
788 splitting = SPLIT_TER_ONLY;
789 printf("TER records only (ignoring chain id).\n");
791 else
793 gmx_fatal(FARGS,"Unidentified setting for chain separation: %s\n",chainsep);
796 /* The default chain enumeration is based on TER records only, which is reflected in chainnum below */
798 old_prev_chainid = '?';
799 old_prev_chainnum = -1;
800 new_chainnum = -1;
802 for(i=0;i<pdba->nres;i++)
804 ri = &pdba->resinfo[i];
805 old_this_chainid = ri->chainid;
806 old_this_chainnum = ri->chainnum;
808 switch (splitting)
810 case SPLIT_ID_OR_TER:
811 if(old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
813 new_chainnum++;
815 break;
817 case SPLIT_ID_AND_TER:
818 if(old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
820 new_chainnum++;
822 break;
824 case SPLIT_ID_ONLY:
825 if(old_this_chainid != old_prev_chainid)
827 new_chainnum++;
829 break;
831 case SPLIT_TER_ONLY:
832 if(old_this_chainnum != old_prev_chainnum)
834 new_chainnum++;
836 break;
837 default:
838 gmx_fatal(FARGS,"Internal inconsistency - this shouldn't happen...");
839 break;
841 old_prev_chainid = old_this_chainid;
842 old_prev_chainnum = old_this_chainnum;
844 ri->chainnum = new_chainnum;
849 typedef struct {
850 char chainid;
851 char chainnum;
852 int start;
853 int natom;
854 bool bAllWat;
855 int nterpairs;
856 int *chainstart;
857 } t_pdbchain;
859 typedef struct {
860 char chainid;
861 int chainnum;
862 bool bAllWat;
863 int nterpairs;
864 int *chainstart;
865 t_hackblock **ntdb;
866 t_hackblock **ctdb;
867 int *r_start;
868 int *r_end;
869 t_atoms *pdba;
870 rvec *x;
871 } t_chain;
873 int main(int argc, char *argv[])
875 const char *desc[] = {
876 "This program reads a pdb (or gro) file, reads",
877 "some database files, adds hydrogens to the molecules and generates",
878 "coordinates in Gromacs (Gromos), or optionally pdb, format",
879 "and a topology in Gromacs format.",
880 "These files can subsequently be processed to generate a run input file.",
881 "[PAR]",
882 "pdb2gmx will search for force fields by looking for",
883 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
884 "of the current working directory and of the Gomracs library directory",
885 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
886 "variable.",
887 "By default the forcefield selection is interactive,",
888 "but you can use the [TT]-ff[tt] option to specify one of the short names",
889 "in the list on the command line instead. In that case pdb2gmx just looks",
890 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
891 "[PAR]",
892 "After choosing a force field, all files will be read only from",
893 "the corresponding force field directory.",
894 "If you want to modify or add a residue types, you can copy the force",
895 "field directory from the Gromacs library directory to your current",
896 "working directory. If you want to add new protein residue types,",
897 "you will need to modify residuetypes.dat in the libary directory",
898 "or copy the whole library directory to a local directory and set",
899 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
900 "Check chapter 5 of the manual for more information about file formats.",
901 "[PAR]",
903 "Note that a pdb file is nothing more than a file format, and it",
904 "need not necessarily contain a protein structure. Every kind of",
905 "molecule for which there is support in the database can be converted.",
906 "If there is no support in the database, you can add it yourself.[PAR]",
908 "The program has limited intelligence, it reads a number of database",
909 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
910 "if necessary this can be done manually. The program can prompt the",
911 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue she",
912 "wants. For LYS the choice is between neutral (two protons on NZ) or",
913 "protonated (three protons, default), for ASP and GLU unprotonated",
914 "(default) or protonated, for HIS the proton can be either on ND1,",
915 "on NE2 or on both. By default these selections are done automatically.",
916 "For His, this is based on an optimal hydrogen bonding",
917 "conformation. Hydrogen bonds are defined based on a simple geometric",
918 "criterium, specified by the maximum hydrogen-donor-acceptor angle",
919 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
920 "[TT]-dist[tt] respectively.[PAR]",
922 "The separation of chains is not entirely trivial since the markup",
923 "in user-generated PDB files frequently varies and sometimes it",
924 "is desirable to merge entries across a TER record, for instance",
925 "if you want a disulfide bridge or distance restraints between",
926 "two protein chains or if you have a HEME group bound to a protein.",
927 "In such cases multiple chains should be contained in a single",
928 "[TT]molecule_type[tt] definition.",
929 "To handle this, pdb2gmx has an option [TT]-chainsep[tt] so you can",
930 "choose whether a new chain should start when we find a TER record,",
931 "when the chain id changes, combinations of either or both of these",
932 "or fully interactively.[PAR]",
934 "pdb2gmx will also check the occupancy field of the pdb file.",
935 "If any of the occupancies are not one, indicating that the atom is",
936 "not resolved well in the structure, a warning message is issued.",
937 "When a pdb file does not originate from an X-Ray structure determination",
938 "all occupancy fields may be zero. Either way, it is up to the user",
939 "to verify the correctness of the input data (read the article!).[PAR]",
941 "During processing the atoms will be reordered according to Gromacs",
942 "conventions. With [TT]-n[tt] an index file can be generated that",
943 "contains one group reordered in the same way. This allows you to",
944 "convert a Gromos trajectory and coordinate file to Gromos. There is",
945 "one limitation: reordering is done after the hydrogens are stripped",
946 "from the input and before new hydrogens are added. This means that",
947 "you should not use [TT]-ignh[tt].[PAR]",
949 "The [TT].gro[tt] and [TT].g96[tt] file formats do not support chain",
950 "identifiers. Therefore it is useful to enter a pdb file name at",
951 "the [TT]-o[tt] option when you want to convert a multichain pdb file.",
952 "[PAR]",
954 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
955 "motions. Angular and out-of-plane motions can be removed by changing",
956 "hydrogens into virtual sites and fixing angles, which fixes their",
957 "position relative to neighboring atoms. Additionally, all atoms in the",
958 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
959 "can be converted into virtual sites, elminating the fast improper dihedral",
960 "fluctuations in these rings. Note that in this case all other hydrogen",
961 "atoms are also converted to virtual sites. The mass of all atoms that are",
962 "converted into virtual sites, is added to the heavy atoms.[PAR]",
963 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
964 "done by increasing the hydrogen-mass by a factor of 4. This is also",
965 "done for water hydrogens to slow down the rotational motion of water.",
966 "The increase in mass of the hydrogens is subtracted from the bonded",
967 "(heavy) atom so that the total mass of the system remains the same."
971 FILE *fp,*top_file,*top_file2,*itp_file=NULL;
972 int natom,nres;
973 t_atoms pdba_all,*pdba;
974 t_atoms *atoms;
975 t_resinfo *ri;
976 t_blocka *block;
977 int chain,nch,maxch,nwaterchain;
978 t_pdbchain *pdb_ch;
979 t_chain *chains,*cc;
980 char select[STRLEN];
981 int nincl,nmol;
982 char **incls;
983 t_mols *mols;
984 char **gnames;
985 int ePBC;
986 matrix box;
987 rvec box_space;
988 int i,j,k,l,nrtp;
989 int *swap_index,si;
990 t_restp *restp;
991 t_hackblock *ah;
992 t_symtab symtab;
993 gpp_atomtype_t atype;
994 gmx_residuetype_t rt;
995 const char *top_fn;
996 char fn[256],itp_fn[STRLEN],posre_fn[STRLEN],buf_fn[STRLEN];
997 char molname[STRLEN],title[STRLEN],quote[STRLEN],generator[STRLEN];
998 char *c,forcefield[STRLEN],ffdir[STRLEN];
999 char ffname[STRLEN],suffix[STRLEN],buf[STRLEN];
1000 char *watermodel;
1001 const char *watres;
1002 int nrtpf;
1003 char **rtpf;
1004 char rtp[STRLEN];
1005 int nrrn;
1006 char **rrn;
1007 int nrtprename,naa;
1008 rtprename_t *rtprename=NULL;
1009 int nah,nNtdb,nCtdb,ntdblist;
1010 t_hackblock *ntdb,*ctdb,**tdblist;
1011 int nssbonds;
1012 t_ssbond *ssbonds;
1013 rvec *pdbx,*x;
1014 bool bVsites=FALSE,bWat,bPrevWat=FALSE,bITP,bVsiteAromatics=FALSE,bMerge;
1015 real mHmult=0;
1016 t_hackblock *hb_chain;
1017 t_restp *restp_chain;
1018 output_env_t oenv;
1019 const char *p_restype;
1020 int rc;
1021 int this_atomnum;
1022 int prev_atomnum;
1023 const char * prev_atomname;
1024 const char * this_atomname;
1025 const char * prev_resname;
1026 const char * this_resname;
1027 int prev_resnum;
1028 int this_resnum;
1029 char prev_chainid;
1030 char this_chainid;
1031 int prev_chainnumber;
1032 int this_chainnumber;
1033 int nid_used;
1034 int this_chainstart;
1035 int prev_chainstart;
1037 gmx_atomprop_t aps;
1039 t_filenm fnm[] = {
1040 { efSTX, "-f", "eiwit.pdb", ffREAD },
1041 { efSTO, "-o", "conf", ffWRITE },
1042 { efTOP, NULL, NULL, ffWRITE },
1043 { efITP, "-i", "posre", ffWRITE },
1044 { efNDX, "-n", "clean", ffOPTWR },
1045 { efSTO, "-q", "clean.pdb", ffOPTWR }
1047 #define NFILE asize(fnm)
1050 /* Command line arguments must be static */
1051 static bool bNewRTP=FALSE;
1052 static bool bInter=FALSE, bCysMan=FALSE;
1053 static bool bLysMan=FALSE, bAspMan=FALSE, bGluMan=FALSE, bHisMan=FALSE;
1054 static bool bGlnMan=FALSE, bArgMan=FALSE;
1055 static bool bTerMan=FALSE, bUnA=FALSE, bHeavyH;
1056 static bool bSort=TRUE, bAllowMissing=FALSE, bRemoveH=FALSE;
1057 static bool bDeuterate=FALSE,bVerbose=FALSE,bChargeGroups=TRUE,bCmap=TRUE;
1058 static bool bRenumRes=FALSE,bRTPresname=FALSE;
1059 static real angle=135.0, distance=0.3,posre_fc=1000;
1060 static real long_bond_dist=0.25, short_bond_dist=0.05;
1061 static const char *vsitestr[] = { NULL, "none", "hydrogens", "aromatics", NULL };
1062 static const char *watstr[] = { NULL, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", NULL };
1063 static const char *chainsep[] = { NULL, "id_or_ter", "id_and_ter", "ter", "id", "interactive", NULL };
1064 static const char *ff = "select";
1066 t_pargs pa[] = {
1067 { "-newrtp", FALSE, etBOOL, {&bNewRTP},
1068 "HIDDENWrite the residue database in new format to 'new.rtp'"},
1069 { "-lb", FALSE, etREAL, {&long_bond_dist},
1070 "HIDDENLong bond warning distance" },
1071 { "-sb", FALSE, etREAL, {&short_bond_dist},
1072 "HIDDENShort bond warning distance" },
1073 { "-chainsep", FALSE, etENUM, {chainsep},
1074 "Condition in PDB files when a new chain and molecule_type should be started" },
1075 { "-ff", FALSE, etSTR, {&ff},
1076 "Force field, interactive by default. Use -h for information." },
1077 { "-water", FALSE, etENUM, {watstr},
1078 "Water model to use" },
1079 { "-inter", FALSE, etBOOL, {&bInter},
1080 "Set the next 8 options to interactive"},
1081 { "-ss", FALSE, etBOOL, {&bCysMan},
1082 "Interactive SS bridge selection" },
1083 { "-ter", FALSE, etBOOL, {&bTerMan},
1084 "Interactive termini selection, iso charged" },
1085 { "-lys", FALSE, etBOOL, {&bLysMan},
1086 "Interactive Lysine selection, iso charged" },
1087 { "-arg", FALSE, etBOOL, {&bArgMan},
1088 "Interactive Arganine selection, iso charged" },
1089 { "-asp", FALSE, etBOOL, {&bAspMan},
1090 "Interactive Aspartic Acid selection, iso charged" },
1091 { "-glu", FALSE, etBOOL, {&bGluMan},
1092 "Interactive Glutamic Acid selection, iso charged" },
1093 { "-gln", FALSE, etBOOL, {&bGlnMan},
1094 "Interactive Glutamine selection, iso neutral" },
1095 { "-his", FALSE, etBOOL, {&bHisMan},
1096 "Interactive Histidine selection, iso checking H-bonds" },
1097 { "-angle", FALSE, etREAL, {&angle},
1098 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
1099 { "-dist", FALSE, etREAL, {&distance},
1100 "Maximum donor-acceptor distance for a H-bond (nm)" },
1101 { "-una", FALSE, etBOOL, {&bUnA},
1102 "Select aromatic rings with united CH atoms on Phenylalanine, "
1103 "Tryptophane and Tyrosine" },
1104 { "-sort", FALSE, etBOOL, {&bSort},
1105 "HIDDENSort the residues according to database, turning this off is dangerous as charge groups might be broken in parts" },
1106 { "-ignh", FALSE, etBOOL, {&bRemoveH},
1107 "Ignore hydrogen atoms that are in the pdb file" },
1108 { "-missing",FALSE, etBOOL, {&bAllowMissing},
1109 "Continue when atoms are missing, dangerous" },
1110 { "-v", FALSE, etBOOL, {&bVerbose},
1111 "Be slightly more verbose in messages" },
1112 { "-posrefc",FALSE, etREAL, {&posre_fc},
1113 "Force constant for position restraints" },
1114 { "-vsite", FALSE, etENUM, {vsitestr},
1115 "Convert atoms to virtual sites" },
1116 { "-heavyh", FALSE, etBOOL, {&bHeavyH},
1117 "Make hydrogen atoms heavy" },
1118 { "-deuterate", FALSE, etBOOL, {&bDeuterate},
1119 "Change the mass of hydrogens to 2 amu" },
1120 { "-chargegrp", TRUE, etBOOL, {&bChargeGroups},
1121 "Use charge groups in the rtp file" },
1122 { "-cmap", TRUE, etBOOL, {&bCmap},
1123 "Use cmap torsions (if enabled in the rtp file)" },
1124 { "-renum", TRUE, etBOOL, {&bRenumRes},
1125 "Renumber the residues consecutively in the output" },
1126 { "-rtpres", TRUE, etBOOL, {&bRTPresname},
1127 "Use rtp entry names as residue names" }
1129 #define NPARGS asize(pa)
1131 CopyRight(stderr,argv[0]);
1132 parse_common_args(&argc,argv,0,NFILE,fnm,asize(pa),pa,asize(desc),desc,
1133 0,NULL,&oenv);
1135 /* Force field selection, interactive or direct */
1136 choose_ff(strcmp(ff,"select") == 0 ? NULL : ff,
1137 forcefield,sizeof(forcefield),
1138 ffdir,sizeof(ffdir));
1140 if (strlen(forcefield) > 0) {
1141 strcpy(ffname,forcefield);
1142 ffname[0] = toupper(ffname[0]);
1143 } else {
1144 gmx_fatal(FARGS,"Empty forcefield string");
1147 printf("\nUsing the %s force field in directory %s\n\n",
1148 ffname,ffdir);
1150 choose_watermodel(watstr[0],ffdir,&watermodel);
1152 if (bInter) {
1153 /* if anything changes here, also change description of -inter */
1154 bCysMan = TRUE;
1155 bTerMan = TRUE;
1156 bLysMan = TRUE;
1157 bArgMan = TRUE;
1158 bAspMan = TRUE;
1159 bGluMan = TRUE;
1160 bGlnMan = TRUE;
1161 bHisMan = TRUE;
1164 if (bHeavyH)
1165 mHmult=4.0;
1166 else if (bDeuterate)
1167 mHmult=2.0;
1168 else
1169 mHmult=1.0;
1171 switch(vsitestr[0][0]) {
1172 case 'n': /* none */
1173 bVsites=FALSE;
1174 bVsiteAromatics=FALSE;
1175 break;
1176 case 'h': /* hydrogens */
1177 bVsites=TRUE;
1178 bVsiteAromatics=FALSE;
1179 break;
1180 case 'a': /* aromatics */
1181 bVsites=TRUE;
1182 bVsiteAromatics=TRUE;
1183 break;
1184 default:
1185 gmx_fatal(FARGS,"DEATH HORROR in $s (%d): vsitestr[0]='%s'",
1186 __FILE__,__LINE__,vsitestr[0]);
1187 }/* end switch */
1189 /* Open the symbol table */
1190 open_symtab(&symtab);
1192 /* Residue type database */
1193 gmx_residuetype_init(&rt);
1195 /* Read residue renaming database(s), if present */
1196 nrrn = fflib_search_file_end(ffdir,".r2b",FALSE,&rrn);
1198 nrtprename = 0;
1199 rtprename = NULL;
1200 for(i=0; i<nrrn; i++) {
1201 fp = fflib_open(rrn[i]);
1202 read_rtprename(rrn[i],fp,&nrtprename,&rtprename);
1203 fclose(fp);
1204 sfree(rrn[i]);
1206 sfree(rrn);
1208 /* Add all alternative names from the residue renaming database to the list of recognized amino/nucleic acids. */
1209 naa=0;
1210 for(i=0;i<nrtprename;i++)
1212 rc=gmx_residuetype_get_type(rt,rtprename[i].gmx,&p_restype);
1214 /* Only add names if the 'standard' gromacs/iupac base name was found */
1215 if(rc==0)
1217 gmx_residuetype_add(rt,rtprename[i].main,p_restype);
1218 gmx_residuetype_add(rt,rtprename[i].nter,p_restype);
1219 gmx_residuetype_add(rt,rtprename[i].cter,p_restype);
1220 gmx_residuetype_add(rt,rtprename[i].bter,p_restype);
1224 clear_mat(box);
1225 if (watermodel != NULL && (strstr(watermodel,"4p") ||
1226 strstr(watermodel,"4P"))) {
1227 watres = "HO4";
1228 } else if (watermodel != NULL && (strstr(watermodel,"5p") ||
1229 strstr(watermodel,"5P"))) {
1230 watres = "HO5";
1231 } else {
1232 watres = "HOH";
1235 aps = gmx_atomprop_init();
1236 natom = read_pdball(opt2fn("-f",NFILE,fnm),opt2fn_null("-q",NFILE,fnm),title,
1237 &pdba_all,&pdbx,&ePBC,box,bRemoveH,&symtab,rt,watres,
1238 aps,bVerbose);
1240 if (natom==0)
1241 gmx_fatal(FARGS,"No atoms found in pdb file %s\n",opt2fn("-f",NFILE,fnm));
1243 printf("Analyzing pdb file\n");
1244 nch=0;
1245 maxch=0;
1246 nwaterchain=0;
1248 modify_chain_numbers(&pdba_all,chainsep[0]);
1251 bMerge = !strncmp(chainsep[0],"int",3);
1253 this_atomname = NULL;
1254 this_atomnum = -1;
1255 this_resname = NULL;
1256 this_resnum = -1;
1257 this_chainid = '?';
1258 this_chainnumber = -1;
1259 this_chainstart = 0;
1261 pdb_ch=NULL;
1262 for (i=0; (i<natom); i++)
1264 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1266 prev_atomname = this_atomname;
1267 prev_atomnum = this_atomnum;
1268 prev_resname = this_resname;
1269 prev_resnum = this_resnum;
1270 prev_chainid = this_chainid;
1271 prev_chainnumber = this_chainnumber;
1272 prev_chainstart = this_chainstart;
1274 this_atomname = *pdba_all.atomname[i];
1275 this_atomnum = (pdba_all.pdbinfo != NULL) ? pdba_all.pdbinfo[i].atomnr : i+1;
1276 this_resname = *ri->name;
1277 this_resnum = ri->nr;
1278 this_chainid = ri->chainid;
1279 this_chainnumber = ri->chainnum;
1281 bWat = gmx_strcasecmp(*ri->name,watres) == 0;
1282 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat != bPrevWat))
1284 this_chainstart = pdba_all.atom[i].resind;
1285 if (bMerge && i>0 && !bWat)
1287 printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) with\n"
1288 "chain starting with residue %s%d (chain id '%c', atom %d %s)? [n/y]\n",
1289 prev_resname,prev_resnum,prev_chainid,prev_atomnum,prev_atomname,
1290 this_resname,this_resnum,this_chainid,this_atomnum,this_atomname);
1292 if(NULL==fgets(select,STRLEN-1,stdin))
1294 gmx_fatal(FARGS,"Error reading from stdin");
1297 else
1299 select[0] = 'n';
1302 if (select[0] == 'y')
1304 pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] =
1305 pdba_all.atom[i].resind - prev_chainstart;
1306 pdb_ch[nch-1].nterpairs++;
1307 srenew(pdb_ch[nch-1].chainstart,pdb_ch[nch-1].nterpairs+1);
1309 else
1311 /* set natom for previous chain */
1312 if (nch > 0)
1314 pdb_ch[nch-1].natom=i-pdb_ch[nch-1].start;
1316 if (bWat)
1318 nwaterchain++;
1319 ri->chainid = ' ';
1321 /* check if chain identifier was used before */
1322 for (j=0; (j<nch); j++)
1324 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
1326 printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1327 "They will be treated as separate chains unless you reorder your file.\n",
1328 ri->chainid);
1331 if (nch == maxch)
1333 maxch += 16;
1334 srenew(pdb_ch,maxch);
1336 pdb_ch[nch].chainid = ri->chainid;
1337 pdb_ch[nch].chainnum = ri->chainnum;
1338 pdb_ch[nch].start=i;
1339 pdb_ch[nch].bAllWat=bWat;
1340 if (bWat)
1341 pdb_ch[nch].nterpairs=0;
1342 else
1343 pdb_ch[nch].nterpairs=1;
1344 snew(pdb_ch[nch].chainstart,pdb_ch[nch].nterpairs+1);
1345 /* modified [nch] to [0] below */
1346 pdb_ch[nch].chainstart[0]=0;
1347 nch++;
1350 bPrevWat=bWat;
1352 pdb_ch[nch-1].natom=natom-pdb_ch[nch-1].start;
1354 /* set all the water blocks at the end of the chain */
1355 snew(swap_index,nch);
1356 j=0;
1357 for(i=0; i<nch; i++)
1358 if (!pdb_ch[i].bAllWat) {
1359 swap_index[j]=i;
1360 j++;
1362 for(i=0; i<nch; i++)
1363 if (pdb_ch[i].bAllWat) {
1364 swap_index[j]=i;
1365 j++;
1367 if (nwaterchain>1)
1368 printf("Moved all the water blocks to the end\n");
1370 snew(chains,nch);
1371 /* copy pdb data and x for all chains */
1372 for (i=0; (i<nch); i++) {
1373 si=swap_index[i];
1374 chains[i].chainid = pdb_ch[si].chainid;
1375 chains[i].chainnum = pdb_ch[si].chainnum;
1376 chains[i].bAllWat = pdb_ch[si].bAllWat;
1377 chains[i].nterpairs = pdb_ch[si].nterpairs;
1378 chains[i].chainstart = pdb_ch[si].chainstart;
1379 snew(chains[i].ntdb,pdb_ch[si].nterpairs);
1380 snew(chains[i].ctdb,pdb_ch[si].nterpairs);
1381 snew(chains[i].r_start,pdb_ch[si].nterpairs);
1382 snew(chains[i].r_end,pdb_ch[si].nterpairs);
1384 snew(chains[i].pdba,1);
1385 init_t_atoms(chains[i].pdba,pdb_ch[si].natom,TRUE);
1386 snew(chains[i].x,chains[i].pdba->nr);
1387 for (j=0; j<chains[i].pdba->nr; j++) {
1388 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start+j];
1389 snew(chains[i].pdba->atomname[j],1);
1390 *chains[i].pdba->atomname[j] =
1391 strdup(*pdba_all.atomname[pdb_ch[si].start+j]);
1392 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start+j];
1393 copy_rvec(pdbx[pdb_ch[si].start+j],chains[i].x[j]);
1395 /* Re-index the residues assuming that the indices are continuous */
1396 k = chains[i].pdba->atom[0].resind;
1397 nres = chains[i].pdba->atom[chains[i].pdba->nr-1].resind - k + 1;
1398 chains[i].pdba->nres = nres;
1399 for(j=0; j < chains[i].pdba->nr; j++) {
1400 chains[i].pdba->atom[j].resind -= k;
1402 srenew(chains[i].pdba->resinfo,nres);
1403 for(j=0; j<nres; j++) {
1404 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k+j];
1405 snew(chains[i].pdba->resinfo[j].name,1);
1406 *chains[i].pdba->resinfo[j].name = strdup(*pdba_all.resinfo[k+j].name);
1407 /* make all chain identifiers equal to that of the chain */
1408 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
1412 if (bMerge)
1413 printf("\nMerged %d chains into one molecule definition\n\n",
1414 pdb_ch[0].nterpairs);
1416 printf("There are %d chains and %d blocks of water and "
1417 "%d residues with %d atoms\n",
1418 nch-nwaterchain,nwaterchain,
1419 pdba_all.resinfo[pdba_all.atom[natom-1].resind].nr,natom);
1421 printf("\n %5s %4s %6s\n","chain","#res","#atoms");
1422 for (i=0; (i<nch); i++)
1423 printf(" %d '%c' %5d %6d %s\n",
1424 i+1, chains[i].chainid ? chains[i].chainid:'-',
1425 chains[i].pdba->nres, chains[i].pdba->nr,
1426 chains[i].bAllWat ? "(only water)":"");
1427 printf("\n");
1429 check_occupancy(&pdba_all,opt2fn("-f",NFILE,fnm),bVerbose);
1431 /* Read atomtypes... */
1432 atype = read_atype(ffdir,&symtab);
1434 /* read residue database */
1435 printf("Reading residue database... (%s)\n",forcefield);
1436 nrtpf = fflib_search_file_end(ffdir,".rtp",TRUE,&rtpf);
1437 nrtp = 0;
1438 restp = NULL;
1439 for(i=0; i<nrtpf; i++) {
1440 read_resall(rtpf[i],&nrtp,&restp,atype,&symtab,FALSE);
1441 sfree(rtpf[i]);
1443 sfree(rtpf);
1444 if (bNewRTP) {
1445 /* Not correct with multiple rtp input files with different bonded types */
1446 fp=gmx_fio_fopen("new.rtp","w");
1447 print_resall(fp,nrtp,restp,atype);
1448 gmx_fio_fclose(fp);
1451 /* read hydrogen database */
1452 nah = read_h_db(ffdir,&ah);
1454 /* Read Termini database... */
1455 nNtdb=read_ter_db(ffdir,'n',&ntdb,atype);
1456 nCtdb=read_ter_db(ffdir,'c',&ctdb,atype);
1458 top_fn=ftp2fn(efTOP,NFILE,fnm);
1459 top_file=gmx_fio_fopen(top_fn,"w");
1461 #ifdef PACKAGE_VERSION
1462 sprintf(generator,"%s - version %s",ShortProgram(), PACKAGE_VERSION );
1463 #else
1464 sprintf(generator,"%s - version %s",ShortProgram(), "unknown" );
1465 #endif
1466 print_top_header(top_file,top_fn,generator,FALSE,ffdir,mHmult);
1468 nincl=0;
1469 nmol=0;
1470 incls=NULL;
1471 mols=NULL;
1472 nres=0;
1473 for(chain=0; (chain<nch); chain++) {
1474 cc = &(chains[chain]);
1476 /* set pdba, natom and nres to the current chain */
1477 pdba =cc->pdba;
1478 x =cc->x;
1479 natom=cc->pdba->nr;
1480 nres =cc->pdba->nres;
1482 if (cc->chainid && ( cc->chainid != ' ' ) )
1483 printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1484 chain+1,cc->chainid,natom,nres);
1485 else
1486 printf("Processing chain %d (%d atoms, %d residues)\n",
1487 chain+1,natom,nres);
1489 process_chain(pdba,x,bUnA,bUnA,bUnA,bLysMan,bAspMan,bGluMan,
1490 bHisMan,bArgMan,bGlnMan,angle,distance,&symtab,
1491 nrtprename,rtprename);
1493 for(i=0; i<cc->nterpairs; i++) {
1495 cc->chainstart[cc->nterpairs] = pdba->nres;
1497 find_nc_ter(pdba,cc->chainstart[i],cc->chainstart[i+1],
1498 &(cc->r_start[i]),&(cc->r_end[i]),rt);
1501 if ( (cc->r_start[i]<0) || (cc->r_end[i]<0) ) {
1502 printf("Problem with chain definition, or missing terminal residues.\n"
1503 "This chain does not appear to contain a recognized chain molecule.\n"
1504 "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
1506 cc->nterpairs = i;
1507 break;
1511 /* Check for disulfides and other special bonds */
1512 nssbonds = mk_specbonds(pdba,x,bCysMan,&ssbonds,bVerbose);
1514 if (nrtprename > 0) {
1515 rename_resrtp(pdba,cc->nterpairs,cc->r_start,cc->r_end,nrtprename,rtprename,
1516 &symtab,bVerbose);
1519 if (debug) {
1520 if (nch==1) {
1521 sprintf(fn,"chain.pdb");
1522 } else {
1523 sprintf(fn,"chain_%c%d.pdb",cc->chainid,cc->chainnum);
1525 write_sto_conf(fn,title,pdba,x,NULL,ePBC,box);
1529 for(i=0; i<cc->nterpairs; i++)
1532 /* Set termini.
1533 * We first apply a filter so we only have the
1534 * termini that can be applied to the residue in question
1535 * (or a generic terminus if no-residue specific is available).
1537 /* First the N terminus */
1538 if (nNtdb > 0)
1540 tdblist = filter_ter(nrtp,restp,nNtdb,ntdb,
1541 *pdba->resinfo[cc->r_start[i]].name,
1542 *pdba->resinfo[cc->r_start[i]].rtp,
1543 &ntdblist);
1544 if(ntdblist==0)
1546 printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
1547 "is already in a terminus-specific form and skipping terminus selection.\n");
1548 cc->ntdb[i]=NULL;
1550 else
1552 if(bTerMan && ntdblist>1)
1554 cc->ntdb[i] = choose_ter(ntdblist,tdblist,"Select start terminus type");
1556 else
1558 cc->ntdb[i] = tdblist[0];
1561 printf("Start terminus: %s\n",(cc->ntdb[i])->name);
1562 sfree(tdblist);
1565 else
1567 cc->ntdb[i] = NULL;
1570 /* And the C terminus */
1571 if (nCtdb > 0)
1573 tdblist = filter_ter(nrtp,restp,nCtdb,ctdb,
1574 *pdba->resinfo[cc->r_start[i]].name,
1575 *pdba->resinfo[cc->r_end[i]].rtp,
1576 &ntdblist);
1577 if(ntdblist==0)
1579 printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
1580 "is already in a terminus-specific form and skipping terminus selection.\n");
1581 cc->ctdb[i] = NULL;
1583 else
1585 if(bTerMan && ntdblist>1)
1587 cc->ctdb[i] = choose_ter(ntdblist,tdblist,"Select end terminus type");
1589 else
1591 cc->ctdb[i] = tdblist[0];
1593 printf("End terminus: %s\n",(cc->ctdb[i])->name);
1594 sfree(tdblist);
1597 else
1599 cc->ctdb[i] = NULL;
1602 /* lookup hackblocks and rtp for all residues */
1603 get_hackblocks_rtp(&hb_chain, &restp_chain,
1604 nrtp, restp, pdba->nres, pdba->resinfo,
1605 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end);
1606 /* ideally, now we would not need the rtp itself anymore, but do
1607 everything using the hb and restp arrays. Unfortunately, that
1608 requires some re-thinking of code in gen_vsite.c, which I won't
1609 do now :( AF 26-7-99 */
1611 rename_atoms(NULL,ffdir,
1612 pdba,&symtab,restp_chain,FALSE,rt,FALSE,bVerbose);
1614 match_atomnames_with_rtp(restp_chain,hb_chain,pdba,x,bVerbose);
1616 if (bSort) {
1617 block = new_blocka();
1618 snew(gnames,1);
1619 sort_pdbatoms(pdba->nres,restp_chain,hb_chain,
1620 natom,&pdba,&x,block,&gnames);
1621 natom = remove_duplicate_atoms(pdba,x,bVerbose);
1622 if (ftp2bSet(efNDX,NFILE,fnm)) {
1623 if (bRemoveH) {
1624 fprintf(stderr,"WARNING: with the -remh option the generated "
1625 "index file (%s) might be useless\n"
1626 "(the index file is generated before hydrogens are added)",
1627 ftp2fn(efNDX,NFILE,fnm));
1629 write_index(ftp2fn(efNDX,NFILE,fnm),block,gnames);
1631 for(i=0; i < block->nr; i++)
1632 sfree(gnames[i]);
1633 sfree(gnames);
1634 done_blocka(block);
1635 } else {
1636 fprintf(stderr,"WARNING: "
1637 "without sorting no check for duplicate atoms can be done\n");
1640 /* Generate Hydrogen atoms (and termini) in the sequence */
1641 natom=add_h(&pdba,&x,nah,ah,
1642 cc->nterpairs,cc->ntdb,cc->ctdb,cc->r_start,cc->r_end,bAllowMissing,
1643 NULL,NULL,TRUE,FALSE);
1644 printf("Now there are %d residues with %d atoms\n",
1645 pdba->nres,pdba->nr);
1646 if (debug) write_pdbfile(debug,title,pdba,x,ePBC,box,' ',0,NULL,TRUE);
1648 if (debug)
1649 for(i=0; (i<natom); i++)
1650 fprintf(debug,"Res %s%d atom %d %s\n",
1651 *(pdba->resinfo[pdba->atom[i].resind].name),
1652 pdba->resinfo[pdba->atom[i].resind].nr,i+1,*pdba->atomname[i]);
1654 strcpy(posre_fn,ftp2fn(efITP,NFILE,fnm));
1656 /* make up molecule name(s) */
1658 k = (cc->nterpairs>0 && cc->r_start[0]>=0) ? cc->r_start[0] : 0;
1660 gmx_residuetype_get_type(rt,*pdba->resinfo[k].name,&p_restype);
1662 suffix[0]='\0';
1664 if (cc->bAllWat)
1666 sprintf(molname,"Water");
1668 else
1670 this_chainid = cc->chainid;
1672 /* Add the chain id if we have one */
1673 if(this_chainid != ' ')
1675 sprintf(buf,"_chain_%c",this_chainid);
1676 strcat(suffix,buf);
1679 /* Check if there have been previous chains with the same id */
1680 nid_used = 0;
1681 for(k=0;k<chain;k++)
1683 if(cc->chainid == chains[k].chainid)
1685 nid_used++;
1688 /* Add the number for this chain identifier if there are multiple copies */
1689 if(nid_used>0)
1692 sprintf(buf,"%d",nid_used+1);
1693 strcat(suffix,buf);
1696 if(strlen(suffix)>0)
1698 sprintf(molname,"%s%s",p_restype,suffix);
1700 else
1702 strcpy(molname,p_restype);
1706 if ((nch-nwaterchain>1) && !cc->bAllWat) {
1707 bITP=TRUE;
1708 strcpy(itp_fn,top_fn);
1709 printf("Chain time...\n");
1710 c=strrchr(itp_fn,'.');
1711 sprintf(c,"_%s.itp",molname);
1712 c=strrchr(posre_fn,'.');
1713 sprintf(c,"_%s.itp",molname);
1714 if (strcmp(itp_fn,posre_fn) == 0) {
1715 strcpy(buf_fn,posre_fn);
1716 c = strrchr(buf_fn,'.');
1717 *c = '\0';
1718 sprintf(posre_fn,"%s_pr.itp",buf_fn);
1721 nincl++;
1722 srenew(incls,nincl);
1723 incls[nincl-1]=strdup(itp_fn);
1724 itp_file=gmx_fio_fopen(itp_fn,"w");
1725 } else
1726 bITP=FALSE;
1728 srenew(mols,nmol+1);
1729 if (cc->bAllWat) {
1730 mols[nmol].name = strdup("SOL");
1731 mols[nmol].nr = pdba->nres;
1732 } else {
1733 mols[nmol].name = strdup(molname);
1734 mols[nmol].nr = 1;
1736 nmol++;
1738 if (bITP)
1739 print_top_comment(itp_file,itp_fn,generator,TRUE);
1741 if (cc->bAllWat)
1742 top_file2=NULL;
1743 else
1744 if (bITP)
1745 top_file2=itp_file;
1746 else
1747 top_file2=top_file;
1749 pdb2top(top_file2,posre_fn,molname,pdba,&x,atype,&symtab,
1750 nrtp,restp,
1751 restp_chain,hb_chain,
1752 cc->nterpairs,cc->ntdb,cc->ctdb,cc->r_start,cc->r_end,bAllowMissing,
1753 bVsites,bVsiteAromatics,forcefield,ffdir,
1754 mHmult,nssbonds,ssbonds,
1755 long_bond_dist,short_bond_dist,bDeuterate,bChargeGroups,bCmap,
1756 bRenumRes,bRTPresname);
1758 if (!cc->bAllWat)
1759 write_posres(posre_fn,pdba,posre_fc);
1761 if (bITP)
1762 gmx_fio_fclose(itp_file);
1764 /* pdba and natom have been reassigned somewhere so: */
1765 cc->pdba = pdba;
1766 cc->x = x;
1768 if (debug) {
1769 if (cc->chainid == ' ')
1770 sprintf(fn,"chain.pdb");
1771 else
1772 sprintf(fn,"chain_%c.pdb",cc->chainid);
1773 cool_quote(quote,255,NULL);
1774 write_sto_conf(fn,quote,pdba,x,NULL,ePBC,box);
1778 if (watermodel == NULL) {
1779 for(chain=0; chain<nch; chain++) {
1780 if (chains[chain].bAllWat) {
1781 gmx_fatal(FARGS,"You have chosen not to include a water model, but there is water in the input file. Select a water model or remove the water from your input file.");
1784 } else {
1785 sprintf(buf_fn,"%s%c%s.itp",ffdir,DIR_SEPARATOR,watermodel);
1786 if (!fflib_fexist(buf_fn)) {
1787 gmx_fatal(FARGS,"The topology file '%s' for the selected water model '%s' can not be found in the force field directory. Select a different water model.",
1788 buf_fn,watermodel);
1792 print_top_mols(top_file,title,ffdir,watermodel,nincl,incls,nmol,mols);
1793 gmx_fio_fclose(top_file);
1795 gmx_residuetype_destroy(rt);
1797 /* now merge all chains back together */
1798 natom=0;
1799 nres=0;
1800 for (i=0; (i<nch); i++) {
1801 natom+=chains[i].pdba->nr;
1802 nres+=chains[i].pdba->nres;
1804 snew(atoms,1);
1805 init_t_atoms(atoms,natom,FALSE);
1806 for(i=0; i < atoms->nres; i++)
1807 sfree(atoms->resinfo[i].name);
1808 sfree(atoms->resinfo);
1809 atoms->nres=nres;
1810 snew(atoms->resinfo,nres);
1811 snew(x,natom);
1812 k=0;
1813 l=0;
1814 for (i=0; (i<nch); i++) {
1815 if (nch>1)
1816 printf("Including chain %d in system: %d atoms %d residues\n",
1817 i+1,chains[i].pdba->nr,chains[i].pdba->nres);
1818 for (j=0; (j<chains[i].pdba->nr); j++) {
1819 atoms->atom[k]=chains[i].pdba->atom[j];
1820 atoms->atom[k].resind += l; /* l is processed nr of residues */
1821 atoms->atomname[k]=chains[i].pdba->atomname[j];
1822 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
1823 copy_rvec(chains[i].x[j],x[k]);
1824 k++;
1826 for (j=0; (j<chains[i].pdba->nres); j++) {
1827 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
1828 if (bRTPresname) {
1829 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
1831 l++;
1835 if (nch>1) {
1836 fprintf(stderr,"Now there are %d atoms and %d residues\n",k,l);
1837 print_sums(atoms, TRUE);
1840 fprintf(stderr,"\nWriting coordinate file...\n");
1841 clear_rvec(box_space);
1842 if (box[0][0] == 0)
1843 gen_box(0,atoms->nr,x,box,box_space,FALSE);
1844 write_sto_conf(ftp2fn(efSTO,NFILE,fnm),title,atoms,x,NULL,ePBC,box);
1846 printf("\t\t--------- PLEASE NOTE ------------\n");
1847 printf("You have successfully generated a topology from: %s.\n",
1848 opt2fn("-f",NFILE,fnm));
1849 if (watermodel != NULL) {
1850 printf("The %s force field and the %s water model are used.\n",
1851 ffname,watermodel);
1852 } else {
1853 printf("The %s force field is used.\n",
1854 ffname);
1856 printf("\t\t--------- ETON ESAELP ------------\n");
1859 thanx(stdout);
1861 return 0;