src/kernel/pdb2gmx.c

   1 /*
   2  * $id$
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #include <time.h>
  41 #include <ctype.h>
  42 #include "sysstuff.h"
  43 #include "typedefs.h"
  44 #include "gmxfio.h"
  45 #include "smalloc.h"
  46 #include "copyrite.h"
  47 #include "string2.h"
  48 #include "confio.h"
  49 #include "symtab.h"
  50 #include "vec.h"
  51 #include "statutil.h"
  52 #include "futil.h"
  53 #include "gmx_fatal.h"
  54 #include "pdbio.h"
  55 #include "toputil.h"
  56 #include "h_db.h"
  57 #include "physics.h"
  58 #include "pgutil.h"
  59 #include "calch.h"
  60 #include "resall.h"
  61 #include "pdb2top.h"
  62 #include "ter_db.h"
  63 #include "strdb.h"
  64 #include "gbutil.h"
  65 #include "genhydro.h"
  66 #include "readinp.h"
  67 #include "atomprop.h"
  68 #include "xlate.h"
  69 #include "specbond.h"
  70 #include "index.h"
  71 #include "hizzie.h"
  72 #include "fflibutil.h"
  73
  74
  75 typedef struct {
  76   char gmx[6];
  77   char main[6];
  78   char nter[6];
  79   char cter[6];
  80   char bter[6];
  81 } rtprename_t;
  82
  83
  84 static const char *res2bb_notermini(const char *name,
  85                                     int nrr,const rtprename_t *rr)
  86 {
  87   /* NOTE: This function returns the main building block name,
  88    *       it does not take terminal renaming into account.
  89    */
  90   int i;
  91
  92   i = 0;
  93   while (i < nrr && gmx_strcasecmp(name,rr[i].gmx) != 0) {
  94     i++;
  95   }
  96
  97   return (i < nrr ? rr[i].main : name);
  98 }
  99
 100 static const char *select_res(int nr,int resnr,
 101                               const char *name[],const char *expl[],
 102                               const char *title,
 103                               int nrr,const rtprename_t *rr)
 104 {
 105   int sel=0;
 106
 107   printf("Which %s type do you want for residue %d\n",title,resnr+1);
 108   for(sel=0; (sel < nr); sel++) {
 109     printf("%d. %s (%s)\n",
 110            sel,expl[sel],res2bb_notermini(name[sel],nrr,rr));
 111   }
 112   printf("\nType a number:"); fflush(stdout);
 113
 114   if (scanf("%d",&sel) != 1)
 115     gmx_fatal(FARGS,"Answer me for res %s %d!",title,resnr+1);
 116
 117   return name[sel];
 118 }
 119
 120 static const char *get_asptp(int resnr,int nrr,const rtprename_t *rr)
 121 {
 122   enum { easp, easpH, easpNR };
 123   const char *lh[easpNR] = { "ASP", "ASPH" };
 124   const char *expl[easpNR] = {
 125     "Not protonated (charge -1)",
 126     "Protonated (charge 0)"
 127   };
 128
 129   return select_res(easpNR,resnr,lh,expl,"ASPARTIC ACID",nrr,rr);
 130 }
 131
 132 static const char *get_glutp(int resnr,int nrr,const rtprename_t *rr)
 133 {
 134   enum { eglu, egluH, egluNR };
 135   const char *lh[egluNR] = { "GLU", "GLUH" };
 136   const char *expl[egluNR] = {
 137     "Not protonated (charge -1)",
 138     "Protonated (charge 0)"
 139   };
 140
 141   return select_res(egluNR,resnr,lh,expl,"GLUTAMIC ACID",nrr,rr);
 142 }
 143
 144 static const char *get_glntp(int resnr,int nrr,const rtprename_t *rr)
 145 {
 146   enum { egln, eglnH, eglnNR };
 147   const char *lh[eglnNR] = { "GLN", "QLN" };
 148   const char *expl[eglnNR] = {
 149     "Not protonated (charge 0)",
 150     "Protonated (charge +1)"
 151   };
 152
 153   return select_res(eglnNR,resnr,lh,expl,"GLUTAMINE",nrr,rr);
 154 }
 155
 156 static const char *get_lystp(int resnr,int nrr,const rtprename_t *rr)
 157 {
 158   enum { elys, elysH, elysNR };
 159   const  char *lh[elysNR] = { "LYSN", "LYS" };
 160   const char *expl[elysNR] = {
 161     "Not protonated (charge 0)",
 162     "Protonated (charge +1)"
 163   };
 164
 165   return select_res(elysNR,resnr,lh,expl,"LYSINE",nrr,rr);
 166 }
 167
 168 static const char *get_argtp(int resnr,int nrr,const rtprename_t *rr)
 169 {
 170   enum { earg, eargH, eargNR };
 171   const  char *lh[eargNR] = { "ARGN", "ARG" };
 172   const char *expl[eargNR] = {
 173     "Not protonated (charge 0)",
 174     "Protonated (charge +1)"
 175   };
 176
 177   return select_res(eargNR,resnr,lh,expl,"ARGININE",nrr,rr);
 178 }
 179
 180 static const char *get_cystp(int resnr,int nrr,const rtprename_t *rr)
 181 {
 182   enum { ecys, ecysH, ecysNR };
 183   const char *lh[ecysNR] = { "CYS2", "CYS" };
 184   const char *expl[ecysNR] = {
 185     "Cysteine in disulfide bridge",
 186     "Protonated"
 187   };
 188
 189   return select_res(ecysNR,resnr,lh,expl,"CYSTEINE",nrr,rr);
 190
 191 }
 192
 193 static const char *get_histp(int resnr,int nrr,const rtprename_t *rr)
 194 {
 195   const char *expl[ehisNR] = {
 196     "H on ND1 only",
 197     "H on NE2 only",
 198     "H on ND1 and NE2",
 199     "Coupled to Heme"
 200   };
 201
 202   return select_res(ehisNR,resnr,hh,expl,"HISTIDINE",nrr,rr);
 203 }
 204
 205 static void read_rtprename(const char *fname,FILE *fp,
 206                            int *nrtprename,rtprename_t **rtprename)
 207 {
 208   char line[STRLEN],buf[STRLEN];
 209   int  n;
 210   rtprename_t *rr;
 211   int  ncol,nc;
 212
 213   n  = *nrtprename;
 214   rr = *rtprename;
 215
 216   ncol = 0;
 217   while(get_a_line(fp,line,STRLEN)) {
 218     srenew(rr,n+1);
 219     nc = sscanf(line,"%s %s %s %s %s %s",
 220                 rr[n].gmx,rr[n].main,rr[n].nter,rr[n].cter,rr[n].bter,buf);
 221     if (ncol == 0) {
 222       if (nc != 2 && nc != 5) {
 223         gmx_fatal(FARGS,"Residue renaming database '%s' has %d columns instead of %d, %d or %d",fname,ncol,2,5);
 224       }
 225       ncol = nc;
 226     } else if (nc != ncol) {
 227         gmx_fatal(FARGS,"A line in residue renaming database '%s' has %d columns, while previous lines have %d columns",fname,nc,ncol);
 228     }
 229
 230     if (nc == 2) {
 231       /* This file does not have special termini names, copy them from main */
 232       strcpy(rr[n].nter,rr[n].main);
 233       strcpy(rr[n].cter,rr[n].main);
 234       strcpy(rr[n].bter,rr[n].main);
 235     }
 236
 237     n++;
 238   }
 239
 240   *nrtprename = n;
 241   *rtprename  = rr;
 242 }
 243
 244 static void rename_resrtp(t_atoms *pdba,int nterpairs,int *r_start,int *r_end,
 245                           int nrr,rtprename_t *rr,t_symtab *symtab,
 246                           bool bVerbose)
 247 {
 248   int  r,i,j;
 249   bool bStart,bEnd;
 250   char *nn;
 251
 252   for(r=0; r<pdba->nres; r++) {
 253     i = 0;
 254     while(i<nrr && strcmp(*pdba->resinfo[r].rtp,rr[i].gmx) != 0) {
 255       i++;
 256     }
 257
 258     /* If found in the database, rename this residue's rtp building block,
 259      * otherwise keep the old name.
 260      */
 261     if (i < nrr) {
 262       bStart = FALSE;
 263       bEnd   = FALSE;
 264       for(j=0; j<nterpairs; j++) {
 265         if (r == r_start[j]) {
 266           bStart = TRUE;
 267         }
 268       }
 269       for(j=0; j<nterpairs; j++) {
 270         if (r == r_end[j]) {
 271           bEnd = TRUE;
 272         }
 273       }
 274       if (bStart && bEnd) {
 275         nn = rr[i].bter;
 276       } else if (bStart) {
 277         nn = rr[i].nter;
 278       } else if (bEnd) {
 279         nn = rr[i].cter;
 280       } else {
 281         nn = rr[i].main;
 282       }
 283       if (nn[0] == '-') {
 284         gmx_fatal(FARGS,"In the chosen force field there is no residue type for '%s'%s",pdba->resinfo[r].rtp,bStart ? " as a starting terminus" : (bEnd ? " as an ending terminus" : ""));
 285       }
 286       if (strcmp(*pdba->resinfo[r].rtp,nn) != 0) {
 287         if (bVerbose) {
 288           printf("Changing rtp entry of residue %d %s to '%s'\n",
 289                  pdba->resinfo[r].nr,*pdba->resinfo[r].name,nn);
 290         }
 291         pdba->resinfo[r].rtp = put_symtab(symtab,nn);
 292       }
 293     }
 294   }
 295 }
 296
 297 static void pdbres_to_gmxrtp(t_atoms *pdba)
 298 {
 299   int i;
 300
 301   for(i=0; (i<pdba->nres); i++) {
 302     pdba->resinfo[i].rtp = pdba->resinfo[i].name;
 303   }
 304 }
 305
 306 static void rename_pdbres(t_atoms *pdba,const char *oldnm,const char *newnm,
 307                           bool bFullCompare,t_symtab *symtab)
 308 {
 309   char *resnm;
 310   int i;
 311
 312   for(i=0; (i<pdba->nres); i++) {
 313     resnm = *pdba->resinfo[i].name;
 314     if ((bFullCompare && (gmx_strcasecmp(resnm,oldnm) == 0)) ||
 315         (!bFullCompare && strstr(resnm,oldnm) != NULL)) {
 316       pdba->resinfo[i].name = put_symtab(symtab,newnm);
 317     }
 318   }
 319 }
 320
 321 static void rename_bb(t_atoms *pdba,const char *oldnm,const char *newnm,
 322                       bool bFullCompare,t_symtab *symtab)
 323 {
 324   char *bbnm;
 325   int i;
 326
 327   for(i=0; (i<pdba->nres); i++) {
 328     bbnm = *pdba->resinfo[i].rtp;
 329     if ((bFullCompare && (gmx_strcasecmp(bbnm,oldnm) == 0)) ||
 330         (!bFullCompare && strstr(bbnm,oldnm) != NULL)) {
 331       pdba->resinfo[i].rtp = put_symtab(symtab,newnm);
 332     }
 333   }
 334 }
 335
 336 static void rename_bbint(t_atoms *pdba,const char *oldnm,
 337                          const char *gettp(int,int,const rtprename_t *),
 338                          bool bFullCompare,
 339                          t_symtab *symtab,
 340                          int nrr,const rtprename_t *rr)
 341 {
 342   int  i;
 343   const char *ptr;
 344   char *bbnm;
 345
 346   for(i=0; i<pdba->nres; i++) {
 347     bbnm = *pdba->resinfo[i].rtp;
 348     if ((bFullCompare && (strcmp(bbnm,oldnm) == 0)) ||
 349         (!bFullCompare && strstr(bbnm,oldnm) != NULL)) {
 350       ptr = gettp(i,nrr,rr);
 351       pdba->resinfo[i].rtp = put_symtab(symtab,ptr);
 352     }
 353   }
 354 }
 355
 356 static void check_occupancy(t_atoms *atoms,const char *filename,bool bVerbose)
 357 {
 358   int i,ftp;
 359   int nzero=0;
 360   int nnotone=0;
 361
 362   ftp = fn2ftp(filename);
 363   if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
 364     fprintf(stderr,"No occupancies in %s\n",filename);
 365   else {
 366     for(i=0; (i<atoms->nr); i++) {
 367       if (atoms->pdbinfo[i].occup != 1) {
 368         if (bVerbose)
 369           fprintf(stderr,"Occupancy for atom %s%d-%s is %f rather than 1\n",
 370                   *atoms->resinfo[atoms->atom[i].resind].name,
 371                   atoms->resinfo[ atoms->atom[i].resind].nr,
 372                   *atoms->atomname[i],
 373                   atoms->pdbinfo[i].occup);
 374         if (atoms->pdbinfo[i].occup == 0)
 375           nzero++;
 376         else
 377           nnotone++;
 378       }
 379     }
 380     if (nzero == atoms->nr) {
 381       fprintf(stderr,"All occupancy fields zero. This is probably not an X-Ray structure\n");
 382     } else if ((nzero > 0) || (nnotone > 0)) {
 383       fprintf(stderr,
 384               "\n"
 385               "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
 386               "         occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
 387               "\n",
 388               nzero,nnotone,atoms->nr);
 389     } else {
 390       fprintf(stderr,"All occupancies are one\n");
 391     }
 392   }
 393 }
 394
 395 void write_posres(char *fn,t_atoms *pdba,real fc)
 396 {
 397   FILE *fp;
 398   int  i;
 399
 400   fp=gmx_fio_fopen(fn,"w");
 401   fprintf(fp,
 402           "; In this topology include file, you will find position restraint\n"
 403           "; entries for all the heavy atoms in your original pdb file.\n"
 404           "; This means that all the protons which were added by pdb2gmx are\n"
 405           "; not restrained.\n"
 406           "\n"
 407           "[ position_restraints ]\n"
 408           "; %4s%6s%8s%8s%8s\n","atom","type","fx","fy","fz"
 409           );
 410   for(i=0; (i<pdba->nr); i++) {
 411     if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
 412       fprintf(fp,"%6d%6d  %g  %g  %g\n",i+1,1,fc,fc,fc);
 413   }
 414   gmx_fio_fclose(fp);
 415 }
 416
 417 static int read_pdball(const char *inf, const char *outf,char *title,
 418                        t_atoms *atoms, rvec **x,
 419                        int *ePBC,matrix box, bool bRemoveH,
 420                        t_symtab *symtab,gmx_residuetype_t rt,const char *watres,
 421                        gmx_atomprop_t aps,bool bVerbose)
 422 /* Read a pdb file. (containing proteins) */
 423 {
 424   int  natom,new_natom,i;
 425
 426   /* READ IT */
 427   printf("Reading %s...\n",inf);
 428   get_stx_coordnum(inf,&natom);
 429   init_t_atoms(atoms,natom,TRUE);
 430   snew(*x,natom);
 431   read_stx_conf(inf,title,atoms,*x,NULL,ePBC,box);
 432   if (fn2ftp(inf) == efPDB)
 433     get_pdb_atomnumber(atoms,aps);
 434   if (bRemoveH) {
 435     new_natom=0;
 436     for(i=0; i<atoms->nr; i++)
 437       if (!is_hydrogen(*atoms->atomname[i])) {
 438         atoms->atom[new_natom]=atoms->atom[i];
 439         atoms->atomname[new_natom]=atoms->atomname[i];
 440         atoms->pdbinfo[new_natom]=atoms->pdbinfo[i];
 441         copy_rvec((*x)[i],(*x)[new_natom]);
 442         new_natom++;
 443       }
 444     atoms->nr=new_natom;
 445     natom=new_natom;
 446   }
 447
 448   printf("Read");
 449   if (title && title[0])
 450     printf(" '%s',",title);
 451   printf(" %d atoms\n",natom);
 452
 453   /* Rename residues */
 454   rename_pdbres(atoms,"HOH",watres,FALSE,symtab);
 455   rename_pdbres(atoms,"SOL",watres,FALSE,symtab);
 456   rename_pdbres(atoms,"WAT",watres,FALSE,symtab);
 457
 458   rename_atoms("xlateat.dat",NULL,
 459                atoms,symtab,NULL,TRUE,rt,TRUE,bVerbose);
 460
 461   if (natom == 0)
 462     return 0;
 463
 464   if (outf)
 465     write_sto_conf(outf,title,atoms,*x,NULL,*ePBC,box);
 466
 467   return natom;
 468 }
 469
 470 void process_chain(t_atoms *pdba, rvec *x,
 471                    bool bTrpU,bool bPheU,bool bTyrU,
 472                    bool bLysMan,bool bAspMan,bool bGluMan,
 473                    bool bHisMan,bool bArgMan,bool bGlnMan,
 474                    real angle,real distance,t_symtab *symtab,
 475                    int nrr,const rtprename_t *rr)
 476 {
 477   /* Initialize the rtp builing block names with the residue names */
 478   pdbres_to_gmxrtp(pdba);
 479
 480   /* Rename aromatics, lys, asp and histidine */
 481   if (bTyrU) rename_bb(pdba,"TYR","TYRU",FALSE,symtab);
 482   if (bTrpU) rename_bb(pdba,"TRP","TRPU",FALSE,symtab);
 483   if (bPheU) rename_bb(pdba,"PHE","PHEU",FALSE,symtab);
 484   if (bLysMan)
 485     rename_bbint(pdba,"LYS",get_lystp,FALSE,symtab,nrr,rr);
 486   if (bArgMan)
 487     rename_bbint(pdba,"ARG",get_argtp,FALSE,symtab,nrr,rr);
 488   if (bGlnMan)
 489     rename_bbint(pdba,"GLN",get_glntp,FALSE,symtab,nrr,rr);
 490   if (bAspMan)
 491     rename_bbint(pdba,"ASP",get_asptp,FALSE,symtab,nrr,rr);
 492   else
 493     rename_bb(pdba,"ASPH","ASP",FALSE,symtab);
 494   if (bGluMan)
 495     rename_bbint(pdba,"GLU",get_glutp,FALSE,symtab,nrr,rr);
 496   else
 497     rename_bb(pdba,"GLUH","GLU",FALSE,symtab);
 498
 499   if (!bHisMan)
 500     set_histp(pdba,x,angle,distance);
 501   else
 502     rename_bbint(pdba,"HIS",get_histp,TRUE,symtab,nrr,rr);
 503 }
 504
 505 /* struct for sorting the atoms from the pdb file */
 506 typedef struct {
 507   int  resnr;  /* residue number               */
 508   int  j;      /* database order index         */
 509   int  index;  /* original atom number         */
 510   char anm1;   /* second letter of atom name   */
 511   char altloc; /* alternate location indicator */
 512 } t_pdbindex;
 513
 514 int pdbicomp(const void *a,const void *b)
 515 {
 516   t_pdbindex *pa,*pb;
 517   int d;
 518
 519   pa=(t_pdbindex *)a;
 520   pb=(t_pdbindex *)b;
 521
 522   d = (pa->resnr - pb->resnr);
 523   if (d==0) {
 524     d = (pa->j - pb->j);
 525     if (d==0) {
 526       d = (pa->anm1 - pb->anm1);
 527       if (d==0)
 528         d = (pa->altloc - pb->altloc);
 529     }
 530   }
 531
 532   return d;
 533 }
 534
 535 static void sort_pdbatoms(int nrtp,t_restp restp[],t_hackblock hb[],
 536                           int natoms,t_atoms **pdbaptr,rvec **x,
 537                           t_blocka *block,char ***gnames)
 538 {
 539   t_atoms *pdba,*pdbnew;
 540   rvec **xnew;
 541   int     i,j;
 542   t_restp *rptr;
 543   t_hackblock *hbr;
 544   t_pdbindex *pdbi;
 545   atom_id *a;
 546   char *atomnm;
 547
 548   pdba=*pdbaptr;
 549   natoms=pdba->nr;
 550   pdbnew=NULL;
 551   snew(xnew,1);
 552   snew(pdbi, natoms);
 553
 554   for(i=0; i<natoms; i++) {
 555     atomnm = *pdba->atomname[i];
 556     rptr = &restp[pdba->atom[i].resind];
 557     for(j=0; (j<rptr->natom); j++) {
 558       if (gmx_strcasecmp(atomnm,*(rptr->atomname[j])) == 0) {
 559         break;
 560       }
 561     }
 562     if (j==rptr->natom) {
 563       if ( ( ( pdba->atom[i].resind == 0) && (atomnm[0] == 'H') &&
 564              ( (atomnm[1] == '1') || (atomnm[1] == '2') ||
 565                (atomnm[1] == '3') ) ) )
 566         j=1;
 567       else {
 568         char buf[STRLEN];
 569
 570         sprintf(buf,"Atom %s in residue %s %d not found in rtp entry %s with %d atoms\n"
 571                 "while sorting atoms%s",atomnm,
 572                 *pdba->resinfo[pdba->atom[i].resind].name,
 573                 pdba->resinfo[pdba->atom[i].resind].nr,
 574                 rptr->resname,
 575                 rptr->natom,
 576                 is_hydrogen(atomnm) ? ". Maybe different protonation state.\n"
 577                 "             Remove this hydrogen or choose a different "
 578                 "protonation state.\n"
 579                 "             Option -ignh will ignore all hydrogens "
 580                 "in the input." : "");
 581         gmx_fatal(FARGS,buf);
 582       }
 583     }
 584     /* make shadow array to be sorted into indexgroup */
 585     pdbi[i].resnr  = pdba->atom[i].resind;
 586     pdbi[i].j      = j;
 587     pdbi[i].index  = i;
 588     pdbi[i].anm1   = atomnm[1];
 589     pdbi[i].altloc = pdba->pdbinfo[i].altloc;
 590   }
 591   qsort(pdbi,natoms,(size_t)sizeof(pdbi[0]),pdbicomp);
 592
 593   /* pdba is sorted in pdbnew using the pdbi index */
 594   snew(a,natoms);
 595   snew(pdbnew,1);
 596   init_t_atoms(pdbnew,natoms,TRUE);
 597   snew(*xnew,natoms);
 598   pdbnew->nr=pdba->nr;
 599   pdbnew->nres=pdba->nres;
 600   sfree(pdbnew->resinfo);
 601   pdbnew->resinfo=pdba->resinfo;
 602   for (i=0; i<natoms; i++) {
 603     pdbnew->atom[i]     = pdba->atom[pdbi[i].index];
 604     pdbnew->atomname[i] = pdba->atomname[pdbi[i].index];
 605     pdbnew->pdbinfo[i]  = pdba->pdbinfo[pdbi[i].index];
 606     copy_rvec((*x)[pdbi[i].index],(*xnew)[i]);
 607      /* make indexgroup in block */
 608     a[i]=pdbi[i].index;
 609   }
 610   /* clean up */
 611   sfree(pdba->atomname);
 612   sfree(pdba->atom);
 613   sfree(pdba->pdbinfo);
 614   sfree(pdba);
 615   sfree(*x);
 616   /* copy the sorted pdbnew back to pdba */
 617   *pdbaptr=pdbnew;
 618   *x=*xnew;
 619   add_grp(block, gnames, natoms, a, "prot_sort");
 620   sfree(xnew);
 621   sfree(a);
 622   sfree(pdbi);
 623 }
 624
 625 static int remove_duplicate_atoms(t_atoms *pdba,rvec x[],bool bVerbose)
 626 {
 627   int     i,j,oldnatoms,ndel;
 628   t_resinfo *ri;
 629
 630   printf("Checking for duplicate atoms....\n");
 631   oldnatoms    = pdba->nr;
 632   ndel = 0;
 633   /* NOTE: pdba->nr is modified inside the loop */
 634   for(i=1; (i < pdba->nr); i++) {
 635     /* compare 'i' and 'i-1', throw away 'i' if they are identical
 636        this is a 'while' because multiple alternate locations can be present */
 637     while ( (i < pdba->nr) &&
 638             (pdba->atom[i-1].resind == pdba->atom[i].resind) &&
 639             (strcmp(*pdba->atomname[i-1],*pdba->atomname[i])==0) ) {
 640       ndel++;
 641       if (bVerbose) {
 642         ri = &pdba->resinfo[pdba->atom[i].resind];
 643         printf("deleting duplicate atom %4s  %s%4d%c",
 644                *pdba->atomname[i],*ri->name,ri->nr,ri->ic);
 645         if (ri->chainid && (ri->chainid != ' '))
 646           printf(" ch %c", ri->chainid);
 647         if (pdba->pdbinfo) {
 648           if (pdba->pdbinfo[i].atomnr)
 649             printf("  pdb nr %4d",pdba->pdbinfo[i].atomnr);
 650           if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc!=' '))
 651             printf("  altloc %c",pdba->pdbinfo[i].altloc);
 652         }
 653         printf("\n");
 654       }
 655       pdba->nr--;
 656       /* We can not free, since it might be in the symtab */
 657       /* sfree(pdba->atomname[i]); */
 658       for (j=i; j < pdba->nr; j++) {
 659         pdba->atom[j]     = pdba->atom[j+1];
 660         pdba->atomname[j] = pdba->atomname[j+1];
 661         pdba->pdbinfo[j]  = pdba->pdbinfo[j+1];
 662         copy_rvec(x[j+1],x[j]);
 663       }
 664       srenew(pdba->atom,     pdba->nr);
 665       /* srenew(pdba->atomname, pdba->nr); */
 666       srenew(pdba->pdbinfo,  pdba->nr);
 667     }
 668   }
 669   if (pdba->nr != oldnatoms)
 670     printf("Now there are %d atoms. Deleted %d duplicates.\n",pdba->nr,ndel);
 671
 672   return pdba->nr;
 673 }
 674
 675 void find_nc_ter(t_atoms *pdba,int r0,int r1,int *r_start,int *r_end,gmx_residuetype_t rt)
 676 {
 677     int i;
 678     const char *p_startrestype;
 679     const char *p_restype;
 680     int         nstartwarn,nendwarn;
 681
 682     *r_start = -1;
 683     *r_end   = -1;
 684
 685     nstartwarn = 0;
 686     nendwarn   = 0;
 687
 688     /* Find the starting terminus (typially N or 5') */
 689     for(i=r0;i<r1 && *r_start==-1;i++)
 690     {
 691         gmx_residuetype_get_type(rt,*pdba->resinfo[i].name,&p_startrestype);
 692         if( !gmx_strcasecmp(p_startrestype,"Protein") || !gmx_strcasecmp(p_startrestype,"DNA") || !gmx_strcasecmp(p_startrestype,"RNA") )
 693         {
 694             printf("Identified residue %s%d as a starting terminus.\n",*pdba->resinfo[i].name,pdba->resinfo[i].nr);
 695             *r_start=i;
 696         }
 697         else
 698         {
 699             if(nstartwarn < 5)
 700             {
 701                 printf("Warning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n",*pdba->resinfo[i].name,pdba->resinfo[i].nr);
 702             }
 703             if(nstartwarn == 5)
 704             {
 705                 printf("More than 5 unidentified residues at start of chain - disabling further warnings.\n");
 706             }
 707             nstartwarn++;
 708         }
 709     }
 710
 711     if(*r_start>=0)
 712     {
 713         /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
 714         for(i=*r_start;i<r1;i++)
 715         {
 716             gmx_residuetype_get_type(rt,*pdba->resinfo[i].name,&p_restype);
 717             if( !gmx_strcasecmp(p_restype,p_startrestype) && nendwarn==0)
 718             {
 719                 *r_end=i;
 720             }
 721             else
 722             {
 723                 if(nendwarn < 5)
 724                 {
 725                     printf("Warning: Residue %s%d in chain has different type (%s) from starting residue %s%d (%s).\n",
 726                            *pdba->resinfo[i].name,pdba->resinfo[i].nr,p_restype,
 727                            *pdba->resinfo[*r_start].name,pdba->resinfo[*r_start].nr,p_startrestype);
 728                 }
 729                 if(nendwarn == 5)
 730                 {
 731                     printf("More than 5 unidentified residues at end of chain - disabling further warnings.\n");
 732                 }
 733                 nendwarn++;
 734             }
 735         }
 736     }
 737
 738     if(*r_end>=0)
 739     {
 740         printf("Identified residue %s%d as a ending terminus.\n",*pdba->resinfo[*r_end].name,pdba->resinfo[*r_end].nr);
 741     }
 742 }
 743
 744
 745 static void
 746 modify_chain_numbers(t_atoms *       pdba,
 747                      const char *    chainsep)
 748 {
 749     int   i;
 750     char  old_prev_chainid;
 751     char  old_this_chainid;
 752     int   old_prev_chainnum;
 753     int   old_this_chainnum;
 754     t_resinfo *ri;
 755     int   new_chainnum;
 756
 757     enum
 758     {
 759         SPLIT_ID_OR_TER,
 760         SPLIT_ID_AND_TER,
 761         SPLIT_ID_ONLY,
 762         SPLIT_TER_ONLY
 763     }
 764     splitting;
 765
 766     printf("Splitting PDB chains based on ");
 767     splitting = SPLIT_TER_ONLY; /* keep compiler happy */
 768
 769     /* Be a bit flexible to catch typos */
 770     if (!strncmp(chainsep,"id_o",4) || !strncmp(chainsep,"int",3))
 771     {
 772         /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
 773         splitting = SPLIT_ID_OR_TER;
 774         printf("TER records or changing chain id.\n");
 775     }
 776     else if (!strncmp(chainsep,"id_a",4))
 777     {
 778         splitting = SPLIT_ID_AND_TER;
 779         printf("TER records and chain id.\n");
 780     }
 781     else if (strlen(chainsep)==2 && !strncmp(chainsep,"id",4))
 782     {
 783         splitting = SPLIT_ID_ONLY;
 784         printf("changing chain id only (ignoring TER records).\n");
 785     }
 786     else if (chainsep[0]=='t')
 787     {
 788         splitting = SPLIT_TER_ONLY;
 789         printf("TER records only (ignoring chain id).\n");
 790     }
 791     else
 792     {
 793         gmx_fatal(FARGS,"Unidentified setting for chain separation: %s\n",chainsep);
 794     }
 795
 796     /* The default chain enumeration is based on TER records only, which is reflected in chainnum below */
 797
 798     old_prev_chainid  = '?';
 799     old_prev_chainnum = -1;
 800     new_chainnum  = -1;
 801
 802     for(i=0;i<pdba->nres;i++)
 803     {
 804         ri = &pdba->resinfo[i];
 805         old_this_chainid  = ri->chainid;
 806         old_this_chainnum = ri->chainnum;
 807
 808         switch (splitting)
 809         {
 810             case SPLIT_ID_OR_TER:
 811                 if(old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
 812                 {
 813                     new_chainnum++;
 814                 }
 815                 break;
 816
 817             case SPLIT_ID_AND_TER:
 818                 if(old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
 819                 {
 820                     new_chainnum++;
 821                 }
 822                 break;
 823
 824             case SPLIT_ID_ONLY:
 825                 if(old_this_chainid != old_prev_chainid)
 826                 {
 827                     new_chainnum++;
 828                 }
 829                 break;
 830
 831             case SPLIT_TER_ONLY:
 832                 if(old_this_chainnum != old_prev_chainnum)
 833                 {
 834                     new_chainnum++;
 835                 }
 836                 break;
 837             default:
 838                 gmx_fatal(FARGS,"Internal inconsistency - this shouldn't happen...");
 839                 break;
 840         }
 841         old_prev_chainid  = old_this_chainid;
 842         old_prev_chainnum = old_this_chainnum;
 843
 844         ri->chainnum = new_chainnum;
 845     }
 846 }
 847
 848
 849 typedef struct {
 850   char chainid;
 851   char chainnum;
 852   int  start;
 853   int  natom;
 854   bool bAllWat;
 855   int  nterpairs;
 856   int  *chainstart;
 857 } t_pdbchain;
 858
 859 typedef struct {
 860   char chainid;
 861   int  chainnum;
 862   bool bAllWat;
 863   int nterpairs;
 864   int *chainstart;
 865   t_hackblock **ntdb;
 866   t_hackblock **ctdb;
 867   int *r_start;
 868   int *r_end;
 869   t_atoms *pdba;
 870   rvec *x;
 871 } t_chain;
 872
 873 int main(int argc, char *argv[])
 874 {
 875   const char *desc[] = {
 876     "This program reads a pdb (or gro) file, reads",
 877     "some database files, adds hydrogens to the molecules and generates",
 878     "coordinates in Gromacs (Gromos), or optionally pdb, format",
 879     "and a topology in Gromacs format.",
 880     "These files can subsequently be processed to generate a run input file.",
 881     "[PAR]",
 882     "pdb2gmx will search for force fields by looking for",
 883     "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
 884     "of the current working directory and of the Gomracs library directory",
 885     "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
 886     "variable.",
 887     "By default the forcefield selection is interactive,",
 888     "but you can use the [TT]-ff[tt] option to specify one of the short names",
 889     "in the list on the command line instead. In that case pdb2gmx just looks",
 890     "for the corresponding [TT]<forcefield>.ff[tt] directory.",
 891     "[PAR]",
 892     "After choosing a force field, all files will be read only from",
 893     "the corresponding force field directory.",
 894     "If you want to modify or add a residue types, you can copy the force",
 895     "field directory from the Gromacs library directory to your current",
 896     "working directory. If you want to add new protein residue types,",
 897     "you will need to modify residuetypes.dat in the libary directory",
 898     "or copy the whole library directory to a local directory and set",
 899     "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
 900     "Check chapter 5 of the manual for more information about file formats.",
 901     "[PAR]",
 902
 903     "Note that a pdb file is nothing more than a file format, and it",
 904     "need not necessarily contain a protein structure. Every kind of",
 905     "molecule for which there is support in the database can be converted.",
 906     "If there is no support in the database, you can add it yourself.[PAR]",
 907
 908     "The program has limited intelligence, it reads a number of database",
 909     "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
 910     "if necessary this can be done manually. The program can prompt the",
 911     "user to select which kind of LYS, ASP, GLU, CYS or HIS residue she",
 912     "wants. For LYS the choice is between neutral (two protons on NZ) or",
 913     "protonated (three protons, default), for ASP and GLU unprotonated",
 914     "(default) or protonated, for HIS the proton can be either on ND1,",
 915     "on NE2 or on both. By default these selections are done automatically.",
 916     "For His, this is based on an optimal hydrogen bonding",
 917     "conformation. Hydrogen bonds are defined based on a simple geometric",
 918     "criterium, specified by the maximum hydrogen-donor-acceptor angle",
 919     "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
 920     "[TT]-dist[tt] respectively.[PAR]",
 921
 922     "The separation of chains is not entirely trivial since the markup",
 923     "in user-generated PDB files frequently varies and sometimes it",
 924     "is desirable to merge entries across a TER record, for instance",
 925     "if you want a disulfide bridge or distance restraints between",
 926     "two protein chains or if you have a HEME group bound to a protein.",
 927     "In such cases multiple chains should be contained in a single",
 928     "[TT]molecule_type[tt] definition.",
 929     "To handle this, pdb2gmx has an option [TT]-chainsep[tt] so you can",
 930     "choose whether a new chain should start when we find a TER record,",
 931     "when the chain id changes, combinations of either or both of these",
 932     "or fully interactively.[PAR]",
 933
 934     "pdb2gmx will also check the occupancy field of the pdb file.",
 935     "If any of the occupancies are not one, indicating that the atom is",
 936     "not resolved well in the structure, a warning message is issued.",
 937     "When a pdb file does not originate from an X-Ray structure determination",
 938     "all occupancy fields may be zero. Either way, it is up to the user",
 939     "to verify the correctness of the input data (read the article!).[PAR]",
 940
 941     "During processing the atoms will be reordered according to Gromacs",
 942     "conventions. With [TT]-n[tt] an index file can be generated that",
 943     "contains one group reordered in the same way. This allows you to",
 944     "convert a Gromos trajectory and coordinate file to Gromos. There is",
 945     "one limitation: reordering is done after the hydrogens are stripped",
 946     "from the input and before new hydrogens are added. This means that",
 947     "you should not use [TT]-ignh[tt].[PAR]",
 948
 949     "The [TT].gro[tt] and [TT].g96[tt] file formats do not support chain",
 950     "identifiers. Therefore it is useful to enter a pdb file name at",
 951     "the [TT]-o[tt] option when you want to convert a multichain pdb file.",
 952     "[PAR]",
 953
 954     "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
 955     "motions. Angular and out-of-plane motions can be removed by changing",
 956     "hydrogens into virtual sites and fixing angles, which fixes their",
 957     "position relative to neighboring atoms. Additionally, all atoms in the",
 958     "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
 959     "can be converted into virtual sites, elminating the fast improper dihedral",
 960     "fluctuations in these rings. Note that in this case all other hydrogen",
 961     "atoms are also converted to virtual sites. The mass of all atoms that are",
 962     "converted into virtual sites, is added to the heavy atoms.[PAR]",
 963     "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
 964     "done by increasing the hydrogen-mass by a factor of 4. This is also",
 965     "done for water hydrogens to slow down the rotational motion of water.",
 966     "The increase in mass of the hydrogens is subtracted from the bonded",
 967     "(heavy) atom so that the total mass of the system remains the same."
 968   };
 969
 970
 971   FILE       *fp,*top_file,*top_file2,*itp_file=NULL;
 972   int        natom,nres;
 973   t_atoms    pdba_all,*pdba;
 974   t_atoms    *atoms;
 975   t_resinfo  *ri;
 976   t_blocka   *block;
 977   int        chain,nch,maxch,nwaterchain;
 978   t_pdbchain *pdb_ch;
 979   t_chain    *chains,*cc;
 980   char       select[STRLEN];
 981   int        nincl,nmol;
 982   char       **incls;
 983   t_mols     *mols;
 984   char       **gnames;
 985   int        ePBC;
 986   matrix     box;
 987   rvec       box_space;
 988   int        i,j,k,l,nrtp;
 989   int        *swap_index,si;
 990   t_restp    *restp;
 991   t_hackblock *ah;
 992   t_symtab   symtab;
 993   gpp_atomtype_t atype;
 994   gmx_residuetype_t rt;
 995   const char *top_fn;
 996   char       fn[256],itp_fn[STRLEN],posre_fn[STRLEN],buf_fn[STRLEN];
 997   char       molname[STRLEN],title[STRLEN],quote[STRLEN],generator[STRLEN];
 998   char       *c,forcefield[STRLEN],ffdir[STRLEN];
 999   char       ffname[STRLEN],suffix[STRLEN],buf[STRLEN];
1000   char       *watermodel;
1001   const char *watres;
1002   int        nrtpf;
1003   char       **rtpf;
1004   char       rtp[STRLEN];
1005   int        nrrn;
1006   char       **rrn;
1007   int        nrtprename,naa;
1008   rtprename_t *rtprename=NULL;
1009   int        nah,nNtdb,nCtdb,ntdblist;
1010   t_hackblock *ntdb,*ctdb,**tdblist;
1011   int        nssbonds;
1012   t_ssbond   *ssbonds;
1013   rvec       *pdbx,*x;
1014   bool       bVsites=FALSE,bWat,bPrevWat=FALSE,bITP,bVsiteAromatics=FALSE,bMerge;
1015   real       mHmult=0;
1016   t_hackblock *hb_chain;
1017   t_restp    *restp_chain;
1018   output_env_t oenv;
1019   const char *p_restype;
1020   int        rc;
1021   int           this_atomnum;
1022   int           prev_atomnum;
1023   const char *  prev_atomname;
1024   const char *  this_atomname;
1025   const char *  prev_resname;
1026   const char *  this_resname;
1027   int           prev_resnum;
1028   int           this_resnum;
1029   char          prev_chainid;
1030   char          this_chainid;
1031   int           prev_chainnumber;
1032   int           this_chainnumber;
1033   int           nid_used;
1034   int           this_chainstart;
1035   int           prev_chainstart;
1036
1037   gmx_atomprop_t aps;
1038
1039   t_filenm   fnm[] = {
1040     { efSTX, "-f", "eiwit.pdb", ffREAD  },
1041     { efSTO, "-o", "conf",      ffWRITE },
1042     { efTOP, NULL, NULL,        ffWRITE },
1043     { efITP, "-i", "posre",     ffWRITE },
1044     { efNDX, "-n", "clean",     ffOPTWR },
1045     { efSTO, "-q", "clean.pdb", ffOPTWR }
1046   };
1047 #define NFILE asize(fnm)
1048
1049
1050   /* Command line arguments must be static */
1051   static bool bNewRTP=FALSE;
1052   static bool bInter=FALSE, bCysMan=FALSE;
1053   static bool bLysMan=FALSE, bAspMan=FALSE, bGluMan=FALSE, bHisMan=FALSE;
1054   static bool bGlnMan=FALSE, bArgMan=FALSE;
1055   static bool bTerMan=FALSE, bUnA=FALSE, bHeavyH;
1056   static bool bSort=TRUE, bAllowMissing=FALSE, bRemoveH=FALSE;
1057   static bool bDeuterate=FALSE,bVerbose=FALSE,bChargeGroups=TRUE,bCmap=TRUE;
1058   static bool bRenumRes=FALSE,bRTPresname=FALSE;
1059   static real angle=135.0, distance=0.3,posre_fc=1000;
1060   static real long_bond_dist=0.25, short_bond_dist=0.05;
1061   static const char *vsitestr[] = { NULL, "none", "hydrogens", "aromatics", NULL };
1062   static const char *watstr[] = { NULL, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", NULL };
1063   static const char *chainsep[] = { NULL, "id_or_ter", "id_and_ter", "ter", "id", "interactive", NULL };
1064   static const char *ff = "select";
1065
1066   t_pargs pa[] = {
1067     { "-newrtp", FALSE, etBOOL, {&bNewRTP},
1068       "HIDDENWrite the residue database in new format to 'new.rtp'"},
1069     { "-lb",     FALSE, etREAL, {&long_bond_dist},
1070       "HIDDENLong bond warning distance" },
1071     { "-sb",     FALSE, etREAL, {&short_bond_dist},
1072       "HIDDENShort bond warning distance" },
1073     { "-chainsep", FALSE, etENUM, {chainsep},
1074       "Condition in PDB files when a new chain and molecule_type should be started" },
1075     { "-ff",     FALSE, etSTR,  {&ff},
1076       "Force field, interactive by default. Use -h for information." },
1077     { "-water",  FALSE, etENUM, {watstr},
1078       "Water model to use" },
1079     { "-inter",  FALSE, etBOOL, {&bInter},
1080       "Set the next 8 options to interactive"},
1081     { "-ss",     FALSE, etBOOL, {&bCysMan},
1082       "Interactive SS bridge selection" },
1083     { "-ter",    FALSE, etBOOL, {&bTerMan},
1084       "Interactive termini selection, iso charged" },
1085     { "-lys",    FALSE, etBOOL, {&bLysMan},
1086       "Interactive Lysine selection, iso charged" },
1087     { "-arg",    FALSE, etBOOL, {&bArgMan},
1088       "Interactive Arganine selection, iso charged" },
1089     { "-asp",    FALSE, etBOOL, {&bAspMan},
1090       "Interactive Aspartic Acid selection, iso charged" },
1091     { "-glu",    FALSE, etBOOL, {&bGluMan},
1092       "Interactive Glutamic Acid selection, iso charged" },
1093     { "-gln",    FALSE, etBOOL, {&bGlnMan},
1094       "Interactive Glutamine selection, iso neutral" },
1095     { "-his",    FALSE, etBOOL, {&bHisMan},
1096       "Interactive Histidine selection, iso checking H-bonds" },
1097     { "-angle",  FALSE, etREAL, {&angle},
1098       "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
1099     { "-dist",   FALSE, etREAL, {&distance},
1100       "Maximum donor-acceptor distance for a H-bond (nm)" },
1101     { "-una",    FALSE, etBOOL, {&bUnA},
1102       "Select aromatic rings with united CH atoms on Phenylalanine, "
1103       "Tryptophane and Tyrosine" },
1104     { "-sort",   FALSE, etBOOL, {&bSort},
1105       "HIDDENSort the residues according to database, turning this off is dangerous as charge groups might be broken in parts" },
1106     { "-ignh",   FALSE, etBOOL, {&bRemoveH},
1107       "Ignore hydrogen atoms that are in the pdb file" },
1108     { "-missing",FALSE, etBOOL, {&bAllowMissing},
1109       "Continue when atoms are missing, dangerous" },
1110     { "-v",      FALSE, etBOOL, {&bVerbose},
1111       "Be slightly more verbose in messages" },
1112     { "-posrefc",FALSE, etREAL, {&posre_fc},
1113       "Force constant for position restraints" },
1114     { "-vsite",  FALSE, etENUM, {vsitestr},
1115       "Convert atoms to virtual sites" },
1116     { "-heavyh", FALSE, etBOOL, {&bHeavyH},
1117       "Make hydrogen atoms heavy" },
1118     { "-deuterate", FALSE, etBOOL, {&bDeuterate},
1119       "Change the mass of hydrogens to 2 amu" },
1120     { "-chargegrp", TRUE, etBOOL, {&bChargeGroups},
1121       "Use charge groups in the rtp file"  },
1122     { "-cmap", TRUE, etBOOL, {&bCmap},
1123       "Use cmap torsions (if enabled in the rtp file)"  },
1124     { "-renum", TRUE, etBOOL, {&bRenumRes},
1125       "Renumber the residues consecutively in the output"  },
1126     { "-rtpres", TRUE, etBOOL, {&bRTPresname},
1127       "Use rtp entry names as residue names"  }
1128   };
1129 #define NPARGS asize(pa)
1130
1131   CopyRight(stderr,argv[0]);
1132   parse_common_args(&argc,argv,0,NFILE,fnm,asize(pa),pa,asize(desc),desc,
1133                     0,NULL,&oenv);
1134
1135   /* Force field selection, interactive or direct */
1136   choose_ff(strcmp(ff,"select") == 0 ? NULL : ff,
1137             forcefield,sizeof(forcefield),
1138             ffdir,sizeof(ffdir));
1139
1140   if (strlen(forcefield) > 0) {
1141     strcpy(ffname,forcefield);
1142     ffname[0] = toupper(ffname[0]);
1143   } else {
1144     gmx_fatal(FARGS,"Empty forcefield string");
1145   }
1146
1147   printf("\nUsing the %s force field in directory %s\n\n",
1148          ffname,ffdir);
1149
1150   choose_watermodel(watstr[0],ffdir,&watermodel);
1151
1152   if (bInter) {
1153     /* if anything changes here, also change description of -inter */
1154     bCysMan = TRUE;
1155     bTerMan = TRUE;
1156     bLysMan = TRUE;
1157     bArgMan = TRUE;
1158     bAspMan = TRUE;
1159     bGluMan = TRUE;
1160     bGlnMan = TRUE;
1161     bHisMan = TRUE;
1162   }
1163
1164   if (bHeavyH)
1165     mHmult=4.0;
1166   else if (bDeuterate)
1167     mHmult=2.0;
1168   else
1169     mHmult=1.0;
1170
1171   switch(vsitestr[0][0]) {
1172   case 'n': /* none */
1173     bVsites=FALSE;
1174     bVsiteAromatics=FALSE;
1175     break;
1176   case 'h': /* hydrogens */
1177     bVsites=TRUE;
1178     bVsiteAromatics=FALSE;
1179     break;
1180   case 'a': /* aromatics */
1181     bVsites=TRUE;
1182     bVsiteAromatics=TRUE;
1183     break;
1184   default:
1185     gmx_fatal(FARGS,"DEATH HORROR in $s (%d): vsitestr[0]='%s'",
1186                 __FILE__,__LINE__,vsitestr[0]);
1187   }/* end switch */
1188
1189   /* Open the symbol table */
1190   open_symtab(&symtab);
1191
1192   /* Residue type database */
1193   gmx_residuetype_init(&rt);
1194
1195   /* Read residue renaming database(s), if present */
1196   nrrn = fflib_search_file_end(ffdir,".r2b",FALSE,&rrn);
1197
1198   nrtprename = 0;
1199   rtprename  = NULL;
1200   for(i=0; i<nrrn; i++) {
1201     fp = fflib_open(rrn[i]);
1202     read_rtprename(rrn[i],fp,&nrtprename,&rtprename);
1203     fclose(fp);
1204     sfree(rrn[i]);
1205   }
1206   sfree(rrn);
1207
1208   /* Add all alternative names from the residue renaming database to the list of recognized amino/nucleic acids. */
1209   naa=0;
1210   for(i=0;i<nrtprename;i++)
1211   {
1212       rc=gmx_residuetype_get_type(rt,rtprename[i].gmx,&p_restype);
1213
1214       /* Only add names if the 'standard' gromacs/iupac base name was found */
1215       if(rc==0)
1216       {
1217           gmx_residuetype_add(rt,rtprename[i].main,p_restype);
1218           gmx_residuetype_add(rt,rtprename[i].nter,p_restype);
1219           gmx_residuetype_add(rt,rtprename[i].cter,p_restype);
1220           gmx_residuetype_add(rt,rtprename[i].bter,p_restype);
1221       }
1222   }
1223
1224   clear_mat(box);
1225   if (watermodel != NULL && (strstr(watermodel,"4p") ||
1226                              strstr(watermodel,"4P"))) {
1227     watres = "HO4";
1228   } else if (watermodel != NULL && (strstr(watermodel,"5p") ||
1229                                     strstr(watermodel,"5P"))) {
1230     watres = "HO5";
1231   } else {
1232     watres = "HOH";
1233   }
1234
1235   aps = gmx_atomprop_init();
1236   natom = read_pdball(opt2fn("-f",NFILE,fnm),opt2fn_null("-q",NFILE,fnm),title,
1237                       &pdba_all,&pdbx,&ePBC,box,bRemoveH,&symtab,rt,watres,
1238                       aps,bVerbose);
1239
1240   if (natom==0)
1241     gmx_fatal(FARGS,"No atoms found in pdb file %s\n",opt2fn("-f",NFILE,fnm));
1242
1243   printf("Analyzing pdb file\n");
1244   nch=0;
1245   maxch=0;
1246   nwaterchain=0;
1247
1248   modify_chain_numbers(&pdba_all,chainsep[0]);
1249
1250
1251   bMerge = !strncmp(chainsep[0],"int",3);
1252
1253   this_atomname       = NULL;
1254   this_atomnum        = -1;
1255   this_resname        = NULL;
1256   this_resnum         = -1;
1257   this_chainid        = '?';
1258   this_chainnumber    = -1;
1259   this_chainstart     = 0;
1260
1261   pdb_ch=NULL;
1262   for (i=0; (i<natom); i++)
1263   {
1264       ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1265
1266       prev_atomname      = this_atomname;
1267       prev_atomnum       = this_atomnum;
1268       prev_resname       = this_resname;
1269       prev_resnum        = this_resnum;
1270       prev_chainid       = this_chainid;
1271       prev_chainnumber   = this_chainnumber;
1272       prev_chainstart    = this_chainstart;
1273
1274       this_atomname      = *pdba_all.atomname[i];
1275       this_atomnum       = (pdba_all.pdbinfo != NULL) ? pdba_all.pdbinfo[i].atomnr : i+1;
1276       this_resname       = *ri->name;
1277       this_resnum        = ri->nr;
1278       this_chainid       = ri->chainid;
1279       this_chainnumber   = ri->chainnum;
1280
1281       bWat = gmx_strcasecmp(*ri->name,watres) == 0;
1282       if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat != bPrevWat))
1283       {
1284           this_chainstart = pdba_all.atom[i].resind;
1285           if (bMerge && i>0 && !bWat)
1286           {
1287               printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) with\n"
1288                      "chain starting with residue %s%d (chain id '%c', atom %d %s)? [n/y]\n",
1289                      prev_resname,prev_resnum,prev_chainid,prev_atomnum,prev_atomname,
1290                      this_resname,this_resnum,this_chainid,this_atomnum,this_atomname);
1291
1292               if(NULL==fgets(select,STRLEN-1,stdin))
1293               {
1294                   gmx_fatal(FARGS,"Error reading from stdin");
1295               }
1296           }
1297           else
1298           {
1299               select[0] = 'n';
1300           }
1301
1302           if (select[0] == 'y')
1303           {
1304               pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] =
1305               pdba_all.atom[i].resind - prev_chainstart;
1306               pdb_ch[nch-1].nterpairs++;
1307               srenew(pdb_ch[nch-1].chainstart,pdb_ch[nch-1].nterpairs+1);
1308           }
1309           else
1310           {
1311               /* set natom for previous chain */
1312               if (nch > 0)
1313               {
1314                   pdb_ch[nch-1].natom=i-pdb_ch[nch-1].start;
1315               }
1316               if (bWat)
1317               {
1318                   nwaterchain++;
1319                   ri->chainid = ' ';
1320               }
1321               /* check if chain identifier was used before */
1322               for (j=0; (j<nch); j++)
1323               {
1324                   if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
1325                   {
1326                       printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1327                              "They will be treated as separate chains unless you reorder your file.\n",
1328                              ri->chainid);
1329                   }
1330               }
1331               if (nch == maxch)
1332               {
1333                   maxch += 16;
1334                   srenew(pdb_ch,maxch);
1335               }
1336               pdb_ch[nch].chainid = ri->chainid;
1337               pdb_ch[nch].chainnum = ri->chainnum;
1338               pdb_ch[nch].start=i;
1339               pdb_ch[nch].bAllWat=bWat;
1340               if (bWat)
1341                   pdb_ch[nch].nterpairs=0;
1342               else
1343                   pdb_ch[nch].nterpairs=1;
1344               snew(pdb_ch[nch].chainstart,pdb_ch[nch].nterpairs+1);
1345               /* modified [nch] to [0] below */
1346               pdb_ch[nch].chainstart[0]=0;
1347               nch++;
1348           }
1349       }
1350       bPrevWat=bWat;
1351   }
1352   pdb_ch[nch-1].natom=natom-pdb_ch[nch-1].start;
1353
1354   /* set all the water blocks at the end of the chain */
1355   snew(swap_index,nch);
1356   j=0;
1357   for(i=0; i<nch; i++)
1358     if (!pdb_ch[i].bAllWat) {
1359       swap_index[j]=i;
1360       j++;
1361     }
1362   for(i=0; i<nch; i++)
1363     if (pdb_ch[i].bAllWat) {
1364       swap_index[j]=i;
1365       j++;
1366     }
1367   if (nwaterchain>1)
1368     printf("Moved all the water blocks to the end\n");
1369
1370   snew(chains,nch);
1371   /* copy pdb data and x for all chains */
1372   for (i=0; (i<nch); i++) {
1373     si=swap_index[i];
1374     chains[i].chainid = pdb_ch[si].chainid;
1375     chains[i].chainnum = pdb_ch[si].chainnum;
1376     chains[i].bAllWat = pdb_ch[si].bAllWat;
1377     chains[i].nterpairs = pdb_ch[si].nterpairs;
1378     chains[i].chainstart = pdb_ch[si].chainstart;
1379     snew(chains[i].ntdb,pdb_ch[si].nterpairs);
1380     snew(chains[i].ctdb,pdb_ch[si].nterpairs);
1381     snew(chains[i].r_start,pdb_ch[si].nterpairs);
1382     snew(chains[i].r_end,pdb_ch[si].nterpairs);
1383
1384     snew(chains[i].pdba,1);
1385     init_t_atoms(chains[i].pdba,pdb_ch[si].natom,TRUE);
1386     snew(chains[i].x,chains[i].pdba->nr);
1387     for (j=0; j<chains[i].pdba->nr; j++) {
1388       chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start+j];
1389       snew(chains[i].pdba->atomname[j],1);
1390       *chains[i].pdba->atomname[j] =
1391         strdup(*pdba_all.atomname[pdb_ch[si].start+j]);
1392       chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start+j];
1393       copy_rvec(pdbx[pdb_ch[si].start+j],chains[i].x[j]);
1394     }
1395     /* Re-index the residues assuming that the indices are continuous */
1396     k    = chains[i].pdba->atom[0].resind;
1397     nres = chains[i].pdba->atom[chains[i].pdba->nr-1].resind - k + 1;
1398     chains[i].pdba->nres = nres;
1399     for(j=0; j < chains[i].pdba->nr; j++) {
1400       chains[i].pdba->atom[j].resind -= k;
1401     }
1402     srenew(chains[i].pdba->resinfo,nres);
1403     for(j=0; j<nres; j++) {
1404       chains[i].pdba->resinfo[j] = pdba_all.resinfo[k+j];
1405       snew(chains[i].pdba->resinfo[j].name,1);
1406       *chains[i].pdba->resinfo[j].name = strdup(*pdba_all.resinfo[k+j].name);
1407       /* make all chain identifiers equal to that of the chain */
1408       chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
1409     }
1410   }
1411
1412   if (bMerge)
1413     printf("\nMerged %d chains into one molecule definition\n\n",
1414            pdb_ch[0].nterpairs);
1415
1416   printf("There are %d chains and %d blocks of water and "
1417          "%d residues with %d atoms\n",
1418          nch-nwaterchain,nwaterchain,
1419          pdba_all.resinfo[pdba_all.atom[natom-1].resind].nr,natom);
1420
1421   printf("\n  %5s  %4s %6s\n","chain","#res","#atoms");
1422   for (i=0; (i<nch); i++)
1423     printf("  %d '%c' %5d %6d  %s\n",
1424            i+1, chains[i].chainid ? chains[i].chainid:'-',
1425            chains[i].pdba->nres, chains[i].pdba->nr,
1426            chains[i].bAllWat ? "(only water)":"");
1427   printf("\n");
1428
1429   check_occupancy(&pdba_all,opt2fn("-f",NFILE,fnm),bVerbose);
1430
1431   /* Read atomtypes... */
1432   atype = read_atype(ffdir,&symtab);
1433
1434   /* read residue database */
1435   printf("Reading residue database... (%s)\n",forcefield);
1436   nrtpf = fflib_search_file_end(ffdir,".rtp",TRUE,&rtpf);
1437   nrtp  = 0;
1438   restp = NULL;
1439   for(i=0; i<nrtpf; i++) {
1440     read_resall(rtpf[i],&nrtp,&restp,atype,&symtab,FALSE);
1441     sfree(rtpf[i]);
1442   }
1443   sfree(rtpf);
1444   if (bNewRTP) {
1445     /* Not correct with multiple rtp input files with different bonded types */
1446     fp=gmx_fio_fopen("new.rtp","w");
1447     print_resall(fp,nrtp,restp,atype);
1448     gmx_fio_fclose(fp);
1449   }
1450
1451   /* read hydrogen database */
1452   nah = read_h_db(ffdir,&ah);
1453
1454   /* Read Termini database... */
1455   nNtdb=read_ter_db(ffdir,'n',&ntdb,atype);
1456   nCtdb=read_ter_db(ffdir,'c',&ctdb,atype);
1457
1458   top_fn=ftp2fn(efTOP,NFILE,fnm);
1459   top_file=gmx_fio_fopen(top_fn,"w");
1460
1461 #ifdef PACKAGE_VERSION
1462   sprintf(generator,"%s - version %s",ShortProgram(), PACKAGE_VERSION );
1463 #else
1464   sprintf(generator,"%s - version %s",ShortProgram(), "unknown" );
1465 #endif
1466   print_top_header(top_file,top_fn,generator,FALSE,ffdir,mHmult);
1467
1468   nincl=0;
1469   nmol=0;
1470   incls=NULL;
1471   mols=NULL;
1472   nres=0;
1473   for(chain=0; (chain<nch); chain++) {
1474     cc = &(chains[chain]);
1475
1476     /* set pdba, natom and nres to the current chain */
1477     pdba =cc->pdba;
1478     x    =cc->x;
1479     natom=cc->pdba->nr;
1480     nres =cc->pdba->nres;
1481
1482     if (cc->chainid && ( cc->chainid != ' ' ) )
1483       printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1484               chain+1,cc->chainid,natom,nres);
1485     else
1486       printf("Processing chain %d (%d atoms, %d residues)\n",
1487               chain+1,natom,nres);
1488
1489     process_chain(pdba,x,bUnA,bUnA,bUnA,bLysMan,bAspMan,bGluMan,
1490                   bHisMan,bArgMan,bGlnMan,angle,distance,&symtab,
1491                   nrtprename,rtprename);
1492
1493     for(i=0; i<cc->nterpairs; i++) {
1494
1495       cc->chainstart[cc->nterpairs] = pdba->nres;
1496
1497       find_nc_ter(pdba,cc->chainstart[i],cc->chainstart[i+1],
1498                   &(cc->r_start[i]),&(cc->r_end[i]),rt);
1499
1500
1501       if ( (cc->r_start[i]<0) || (cc->r_end[i]<0) ) {
1502         printf("Problem with chain definition, or missing terminal residues.\n"
1503                "This chain does not appear to contain a recognized chain molecule.\n"
1504            "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
1505
1506         cc->nterpairs = i;
1507         break;
1508       }
1509     }
1510
1511     /* Check for disulfides and other special bonds */
1512     nssbonds = mk_specbonds(pdba,x,bCysMan,&ssbonds,bVerbose);
1513
1514     if (nrtprename > 0) {
1515       rename_resrtp(pdba,cc->nterpairs,cc->r_start,cc->r_end,nrtprename,rtprename,
1516                     &symtab,bVerbose);
1517     }
1518
1519     if (debug) {
1520       if (nch==1) {
1521         sprintf(fn,"chain.pdb");
1522       } else {
1523         sprintf(fn,"chain_%c%d.pdb",cc->chainid,cc->chainnum);
1524       }
1525       write_sto_conf(fn,title,pdba,x,NULL,ePBC,box);
1526     }
1527
1528
1529     for(i=0; i<cc->nterpairs; i++)
1530     {
1531
1532         /* Set termini.
1533          * We first apply a filter so we only have the
1534          * termini that can be applied to the residue in question
1535          * (or a generic terminus if no-residue specific is available).
1536          */
1537         /* First the N terminus */
1538         if (nNtdb > 0)
1539         {
1540             tdblist = filter_ter(nrtp,restp,nNtdb,ntdb,
1541                                  *pdba->resinfo[cc->r_start[i]].name,
1542                                  *pdba->resinfo[cc->r_start[i]].rtp,
1543                                  &ntdblist);
1544             if(ntdblist==0)
1545             {
1546                 printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
1547                        "is already in a terminus-specific form and skipping terminus selection.\n");
1548                 cc->ntdb[i]=NULL;
1549             }
1550             else
1551             {
1552                 if(bTerMan && ntdblist>1)
1553                 {
1554                     cc->ntdb[i] = choose_ter(ntdblist,tdblist,"Select start terminus type");
1555                 }
1556                 else
1557                 {
1558                     cc->ntdb[i] = tdblist[0];
1559                 }
1560
1561                 printf("Start terminus: %s\n",(cc->ntdb[i])->name);
1562                 sfree(tdblist);
1563             }
1564         }
1565         else
1566         {
1567             cc->ntdb[i] = NULL;
1568         }
1569
1570         /* And the C terminus */
1571         if (nCtdb > 0)
1572         {
1573             tdblist = filter_ter(nrtp,restp,nCtdb,ctdb,
1574                                  *pdba->resinfo[cc->r_start[i]].name,
1575                                  *pdba->resinfo[cc->r_end[i]].rtp,
1576                                  &ntdblist);
1577             if(ntdblist==0)
1578             {
1579                 printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
1580                        "is already in a terminus-specific form and skipping terminus selection.\n");
1581                 cc->ctdb[i] = NULL;
1582             }
1583             else
1584             {
1585                 if(bTerMan && ntdblist>1)
1586                 {
1587                     cc->ctdb[i] = choose_ter(ntdblist,tdblist,"Select end terminus type");
1588                 }
1589                 else
1590                 {
1591                     cc->ctdb[i] = tdblist[0];
1592                 }
1593                 printf("End terminus: %s\n",(cc->ctdb[i])->name);
1594                 sfree(tdblist);
1595             }
1596         }
1597         else
1598         {
1599             cc->ctdb[i] = NULL;
1600         }
1601     }
1602     /* lookup hackblocks and rtp for all residues */
1603     get_hackblocks_rtp(&hb_chain, &restp_chain,
1604                        nrtp, restp, pdba->nres, pdba->resinfo,
1605                        cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end);
1606     /* ideally, now we would not need the rtp itself anymore, but do
1607      everything using the hb and restp arrays. Unfortunately, that
1608      requires some re-thinking of code in gen_vsite.c, which I won't
1609      do now :( AF 26-7-99 */
1610
1611     rename_atoms(NULL,ffdir,
1612                  pdba,&symtab,restp_chain,FALSE,rt,FALSE,bVerbose);
1613
1614     match_atomnames_with_rtp(restp_chain,hb_chain,pdba,x,bVerbose);
1615
1616     if (bSort) {
1617       block = new_blocka();
1618       snew(gnames,1);
1619       sort_pdbatoms(pdba->nres,restp_chain,hb_chain,
1620                     natom,&pdba,&x,block,&gnames);
1621       natom = remove_duplicate_atoms(pdba,x,bVerbose);
1622       if (ftp2bSet(efNDX,NFILE,fnm)) {
1623         if (bRemoveH) {
1624           fprintf(stderr,"WARNING: with the -remh option the generated "
1625                   "index file (%s) might be useless\n"
1626                   "(the index file is generated before hydrogens are added)",
1627                   ftp2fn(efNDX,NFILE,fnm));
1628         }
1629         write_index(ftp2fn(efNDX,NFILE,fnm),block,gnames);
1630       }
1631       for(i=0; i < block->nr; i++)
1632         sfree(gnames[i]);
1633       sfree(gnames);
1634       done_blocka(block);
1635     } else {
1636       fprintf(stderr,"WARNING: "
1637               "without sorting no check for duplicate atoms can be done\n");
1638     }
1639
1640     /* Generate Hydrogen atoms (and termini) in the sequence */
1641     natom=add_h(&pdba,&x,nah,ah,
1642                 cc->nterpairs,cc->ntdb,cc->ctdb,cc->r_start,cc->r_end,bAllowMissing,
1643                 NULL,NULL,TRUE,FALSE);
1644     printf("Now there are %d residues with %d atoms\n",
1645            pdba->nres,pdba->nr);
1646     if (debug) write_pdbfile(debug,title,pdba,x,ePBC,box,' ',0,NULL,TRUE);
1647
1648     if (debug)
1649       for(i=0; (i<natom); i++)
1650         fprintf(debug,"Res %s%d atom %d %s\n",
1651                 *(pdba->resinfo[pdba->atom[i].resind].name),
1652                 pdba->resinfo[pdba->atom[i].resind].nr,i+1,*pdba->atomname[i]);
1653
1654     strcpy(posre_fn,ftp2fn(efITP,NFILE,fnm));
1655
1656     /* make up molecule name(s) */
1657
1658       k = (cc->nterpairs>0 && cc->r_start[0]>=0) ? cc->r_start[0] : 0;
1659
1660     gmx_residuetype_get_type(rt,*pdba->resinfo[k].name,&p_restype);
1661
1662     suffix[0]='\0';
1663
1664     if (cc->bAllWat)
1665     {
1666         sprintf(molname,"Water");
1667     }
1668     else
1669     {
1670         this_chainid = cc->chainid;
1671
1672         /* Add the chain id if we have one */
1673         if(this_chainid != ' ')
1674         {
1675             sprintf(buf,"_chain_%c",this_chainid);
1676             strcat(suffix,buf);
1677         }
1678
1679         /* Check if there have been previous chains with the same id */
1680         nid_used = 0;
1681         for(k=0;k<chain;k++)
1682         {
1683             if(cc->chainid == chains[k].chainid)
1684             {
1685                 nid_used++;
1686             }
1687         }
1688         /* Add the number for this chain identifier if there are multiple copies */
1689         if(nid_used>0)
1690         {
1691
1692             sprintf(buf,"%d",nid_used+1);
1693             strcat(suffix,buf);
1694         }
1695
1696         if(strlen(suffix)>0)
1697         {
1698             sprintf(molname,"%s%s",p_restype,suffix);
1699         }
1700         else
1701         {
1702             strcpy(molname,p_restype);
1703         }
1704     }
1705
1706     if ((nch-nwaterchain>1) && !cc->bAllWat) {
1707       bITP=TRUE;
1708       strcpy(itp_fn,top_fn);
1709       printf("Chain time...\n");
1710       c=strrchr(itp_fn,'.');
1711       sprintf(c,"_%s.itp",molname);
1712       c=strrchr(posre_fn,'.');
1713       sprintf(c,"_%s.itp",molname);
1714       if (strcmp(itp_fn,posre_fn) == 0) {
1715         strcpy(buf_fn,posre_fn);
1716         c  = strrchr(buf_fn,'.');
1717         *c = '\0';
1718         sprintf(posre_fn,"%s_pr.itp",buf_fn);
1719       }
1720
1721       nincl++;
1722       srenew(incls,nincl);
1723       incls[nincl-1]=strdup(itp_fn);
1724       itp_file=gmx_fio_fopen(itp_fn,"w");
1725     } else
1726       bITP=FALSE;
1727
1728     srenew(mols,nmol+1);
1729     if (cc->bAllWat) {
1730       mols[nmol].name = strdup("SOL");
1731       mols[nmol].nr   = pdba->nres;
1732     } else {
1733       mols[nmol].name = strdup(molname);
1734       mols[nmol].nr   = 1;
1735     }
1736     nmol++;
1737
1738     if (bITP)
1739       print_top_comment(itp_file,itp_fn,generator,TRUE);
1740
1741     if (cc->bAllWat)
1742       top_file2=NULL;
1743     else
1744       if (bITP)
1745         top_file2=itp_file;
1746       else
1747         top_file2=top_file;
1748
1749     pdb2top(top_file2,posre_fn,molname,pdba,&x,atype,&symtab,
1750             nrtp,restp,
1751             restp_chain,hb_chain,
1752             cc->nterpairs,cc->ntdb,cc->ctdb,cc->r_start,cc->r_end,bAllowMissing,
1753             bVsites,bVsiteAromatics,forcefield,ffdir,
1754             mHmult,nssbonds,ssbonds,
1755             long_bond_dist,short_bond_dist,bDeuterate,bChargeGroups,bCmap,
1756             bRenumRes,bRTPresname);
1757
1758     if (!cc->bAllWat)
1759       write_posres(posre_fn,pdba,posre_fc);
1760
1761     if (bITP)
1762       gmx_fio_fclose(itp_file);
1763
1764     /* pdba and natom have been reassigned somewhere so: */
1765     cc->pdba = pdba;
1766     cc->x = x;
1767
1768     if (debug) {
1769       if (cc->chainid == ' ')
1770         sprintf(fn,"chain.pdb");
1771       else
1772         sprintf(fn,"chain_%c.pdb",cc->chainid);
1773       cool_quote(quote,255,NULL);
1774       write_sto_conf(fn,quote,pdba,x,NULL,ePBC,box);
1775     }
1776   }
1777
1778   if (watermodel == NULL) {
1779     for(chain=0; chain<nch; chain++) {
1780       if (chains[chain].bAllWat) {
1781         gmx_fatal(FARGS,"You have chosen not to include a water model, but there is water in the input file. Select a water model or remove the water from your input file.");
1782       }
1783     }
1784   } else {
1785     sprintf(buf_fn,"%s%c%s.itp",ffdir,DIR_SEPARATOR,watermodel);
1786     if (!fflib_fexist(buf_fn)) {
1787       gmx_fatal(FARGS,"The topology file '%s' for the selected water model '%s' can not be found in the force field directory. Select a different water model.",
1788                 buf_fn,watermodel);
1789     }
1790   }
1791
1792   print_top_mols(top_file,title,ffdir,watermodel,nincl,incls,nmol,mols);
1793   gmx_fio_fclose(top_file);
1794
1795   gmx_residuetype_destroy(rt);
1796
1797   /* now merge all chains back together */
1798   natom=0;
1799   nres=0;
1800   for (i=0; (i<nch); i++) {
1801     natom+=chains[i].pdba->nr;
1802     nres+=chains[i].pdba->nres;
1803   }
1804   snew(atoms,1);
1805   init_t_atoms(atoms,natom,FALSE);
1806   for(i=0; i < atoms->nres; i++)
1807     sfree(atoms->resinfo[i].name);
1808   sfree(atoms->resinfo);
1809   atoms->nres=nres;
1810   snew(atoms->resinfo,nres);
1811   snew(x,natom);
1812   k=0;
1813   l=0;
1814   for (i=0; (i<nch); i++) {
1815     if (nch>1)
1816       printf("Including chain %d in system: %d atoms %d residues\n",
1817              i+1,chains[i].pdba->nr,chains[i].pdba->nres);
1818     for (j=0; (j<chains[i].pdba->nr); j++) {
1819       atoms->atom[k]=chains[i].pdba->atom[j];
1820       atoms->atom[k].resind += l; /* l is processed nr of residues */
1821       atoms->atomname[k]=chains[i].pdba->atomname[j];
1822       atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
1823       copy_rvec(chains[i].x[j],x[k]);
1824       k++;
1825     }
1826     for (j=0; (j<chains[i].pdba->nres); j++) {
1827       atoms->resinfo[l] = chains[i].pdba->resinfo[j];
1828       if (bRTPresname) {
1829         atoms->resinfo[l].name = atoms->resinfo[l].rtp;
1830       }
1831       l++;
1832     }
1833   }
1834
1835   if (nch>1) {
1836     fprintf(stderr,"Now there are %d atoms and %d residues\n",k,l);
1837     print_sums(atoms, TRUE);
1838   }
1839
1840   fprintf(stderr,"\nWriting coordinate file...\n");
1841   clear_rvec(box_space);
1842   if (box[0][0] == 0)
1843     gen_box(0,atoms->nr,x,box,box_space,FALSE);
1844   write_sto_conf(ftp2fn(efSTO,NFILE,fnm),title,atoms,x,NULL,ePBC,box);
1845
1846   printf("\t\t--------- PLEASE NOTE ------------\n");
1847   printf("You have successfully generated a topology from: %s.\n",
1848          opt2fn("-f",NFILE,fnm));
1849   if (watermodel != NULL) {
1850     printf("The %s force field and the %s water model are used.\n",
1851            ffname,watermodel);
1852   } else {
1853     printf("The %s force field is used.\n",
1854            ffname);
1855   }
1856   printf("\t\t--------- ETON ESAELP ------------\n");
1857
1858
1859   thanx(stdout);
1860
1861   return 0;
1862 }