src/kernel/pdb2gmx.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #include <time.h>
  41 #include <ctype.h>
  42 #include "sysstuff.h"
  43 #include "typedefs.h"
  44 #include "gmxfio.h"
  45 #include "smalloc.h"
  46 #include "copyrite.h"
  47 #include "string2.h"
  48 #include "confio.h"
  49 #include "symtab.h"
  50 #include "vec.h"
  51 #include "statutil.h"
  52 #include "futil.h"
  53 #include "gmx_fatal.h"
  54 #include "pdbio.h"
  55 #include "toputil.h"
  56 #include "h_db.h"
  57 #include "physics.h"
  58 #include "pgutil.h"
  59 #include "calch.h"
  60 #include "resall.h"
  61 #include "pdb2top.h"
  62 #include "ter_db.h"
  63 #include "strdb.h"
  64 #include "gbutil.h"
  65 #include "genhydro.h"
  66 #include "readinp.h"
  67 #include "atomprop.h"
  68 #include "xlate.h"
  69 #include "specbond.h"
  70 #include "index.h"
  71 #include "hizzie.h"
  72 #include "fflibutil.h"
  73
  74
  75 typedef struct {
  76   char gmx[6];
  77   char main[6];
  78   char nter[6];
  79   char cter[6];
  80   char bter[6];
  81 } rtprename_t;
  82
  83
  84 static const char *res2bb_notermini(const char *name,
  85                                     int nrr,const rtprename_t *rr)
  86 {
  87   /* NOTE: This function returns the main building block name,
  88    *       it does not take terminal renaming into account.
  89    */
  90   int i;
  91
  92   i = 0;
  93   while (i < nrr && gmx_strcasecmp(name,rr[i].gmx) != 0) {
  94     i++;
  95   }
  96
  97   return (i < nrr ? rr[i].main : name);
  98 }
  99
 100 static const char *select_res(int nr,int resnr,
 101                               const char *name[],const char *expl[],
 102                               const char *title,
 103                               int nrr,const rtprename_t *rr)
 104 {
 105   int sel=0;
 106
 107   printf("Which %s type do you want for residue %d\n",title,resnr+1);
 108   for(sel=0; (sel < nr); sel++) {
 109     printf("%d. %s (%s)\n",
 110            sel,expl[sel],res2bb_notermini(name[sel],nrr,rr));
 111   }
 112   printf("\nType a number:"); fflush(stdout);
 113
 114   if (scanf("%d",&sel) != 1)
 115     gmx_fatal(FARGS,"Answer me for res %s %d!",title,resnr+1);
 116
 117   return name[sel];
 118 }
 119
 120 static const char *get_asptp(int resnr,int nrr,const rtprename_t *rr)
 121 {
 122   enum { easp, easpH, easpNR };
 123   const char *lh[easpNR] = { "ASP", "ASPH" };
 124   const char *expl[easpNR] = {
 125     "Not protonated (charge -1)",
 126     "Protonated (charge 0)"
 127   };
 128
 129   return select_res(easpNR,resnr,lh,expl,"ASPARTIC ACID",nrr,rr);
 130 }
 131
 132 static const char *get_glutp(int resnr,int nrr,const rtprename_t *rr)
 133 {
 134   enum { eglu, egluH, egluNR };
 135   const char *lh[egluNR] = { "GLU", "GLUH" };
 136   const char *expl[egluNR] = {
 137     "Not protonated (charge -1)",
 138     "Protonated (charge 0)"
 139   };
 140
 141   return select_res(egluNR,resnr,lh,expl,"GLUTAMIC ACID",nrr,rr);
 142 }
 143
 144 static const char *get_glntp(int resnr,int nrr,const rtprename_t *rr)
 145 {
 146   enum { egln, eglnH, eglnNR };
 147   const char *lh[eglnNR] = { "GLN", "QLN" };
 148   const char *expl[eglnNR] = {
 149     "Not protonated (charge 0)",
 150     "Protonated (charge +1)"
 151   };
 152
 153   return select_res(eglnNR,resnr,lh,expl,"GLUTAMINE",nrr,rr);
 154 }
 155
 156 static const char *get_lystp(int resnr,int nrr,const rtprename_t *rr)
 157 {
 158   enum { elys, elysH, elysNR };
 159   const  char *lh[elysNR] = { "LYSN", "LYS" };
 160   const char *expl[elysNR] = {
 161     "Not protonated (charge 0)",
 162     "Protonated (charge +1)"
 163   };
 164
 165   return select_res(elysNR,resnr,lh,expl,"LYSINE",nrr,rr);
 166 }
 167
 168 static const char *get_argtp(int resnr,int nrr,const rtprename_t *rr)
 169 {
 170   enum { earg, eargH, eargNR };
 171   const  char *lh[eargNR] = { "ARGN", "ARG" };
 172   const char *expl[eargNR] = {
 173     "Not protonated (charge 0)",
 174     "Protonated (charge +1)"
 175   };
 176
 177   return select_res(eargNR,resnr,lh,expl,"ARGININE",nrr,rr);
 178 }
 179
 180 static const char *get_cystp(int resnr,int nrr,const rtprename_t *rr)
 181 {
 182   enum { ecys, ecysH, ecysNR };
 183   const char *lh[ecysNR] = { "CYS2", "CYS" };
 184   const char *expl[ecysNR] = {
 185     "Cysteine in disulfide bridge",
 186     "Protonated"
 187   };
 188
 189   return select_res(ecysNR,resnr,lh,expl,"CYSTEINE",nrr,rr);
 190
 191 }
 192
 193 static const char *get_histp(int resnr,int nrr,const rtprename_t *rr)
 194 {
 195   const char *expl[ehisNR] = {
 196     "H on ND1 only",
 197     "H on NE2 only",
 198     "H on ND1 and NE2",
 199     "Coupled to Heme"
 200   };
 201
 202   return select_res(ehisNR,resnr,hh,expl,"HISTIDINE",nrr,rr);
 203 }
 204
 205 static void read_rtprename(const char *fname,FILE *fp,
 206                            int *nrtprename,rtprename_t **rtprename)
 207 {
 208   char line[STRLEN],buf[STRLEN];
 209   int  n;
 210   rtprename_t *rr;
 211   int  ncol,nc;
 212
 213   n  = *nrtprename;
 214   rr = *rtprename;
 215
 216   ncol = 0;
 217   while(get_a_line(fp,line,STRLEN)) {
 218     srenew(rr,n+1);
 219     nc = sscanf(line,"%s %s %s %s %s %s",
 220                 rr[n].gmx,rr[n].main,rr[n].nter,rr[n].cter,rr[n].bter,buf);
 221     if (ncol == 0) {
 222       if (nc != 2 && nc != 5) {
 223         gmx_fatal(FARGS,"Residue renaming database '%s' has %d columns instead of %d, %d or %d",fname,ncol,2,5);
 224       }
 225       ncol = nc;
 226     } else if (nc != ncol) {
 227         gmx_fatal(FARGS,"A line in residue renaming database '%s' has %d columns, while previous lines have %d columns",fname,nc,ncol);
 228     }
 229
 230     if (nc == 2) {
 231       /* This file does not have special termini names, copy them from main */
 232       strcpy(rr[n].nter,rr[n].main);
 233       strcpy(rr[n].cter,rr[n].main);
 234       strcpy(rr[n].bter,rr[n].main);
 235     }
 236
 237     n++;
 238   }
 239
 240   *nrtprename = n;
 241   *rtprename  = rr;
 242 }
 243
 244 static char *search_resrename(int nrr,rtprename_t *rr,
 245                               const char *name,
 246                               gmx_bool bStart,gmx_bool bEnd,
 247                               gmx_bool bCompareFFRTPname)
 248 {
 249     char *nn;
 250     int i;
 251
 252     nn = NULL;
 253
 254     i = 0;
 255     while (i<nrr && ((!bCompareFFRTPname && strcmp(name,rr[i].gmx)  != 0) ||
 256                      ( bCompareFFRTPname && strcmp(name,rr[i].main) != 0)))
 257     {
 258         i++;
 259     }
 260
 261     /* If found in the database, rename this residue's rtp building block,
 262      * otherwise keep the old name.
 263      */
 264     if (i < nrr)
 265     {
 266         if (bStart && bEnd)
 267         {
 268             nn = rr[i].bter;
 269         }
 270         else if (bStart)
 271         {
 272             nn = rr[i].nter;
 273         }
 274         else if (bEnd)
 275         {
 276             nn = rr[i].cter;
 277         }
 278         else
 279         {
 280             nn = rr[i].main;
 281         }
 282         if (nn[0] == '-')
 283         {
 284             gmx_fatal(FARGS,"In the chosen force field there is no residue type for '%s'%s",name,bStart ? " as a starting terminus" : (bEnd ? " as an ending terminus" : ""));
 285         }
 286     }
 287
 288     return nn;
 289 }
 290
 291
 292 static void rename_resrtp(t_atoms *pdba,int nterpairs,int *r_start,int *r_end,
 293                           int nrr,rtprename_t *rr,t_symtab *symtab,
 294                           gmx_bool bVerbose)
 295 {
 296     int  r,i,j;
 297     gmx_bool bStart,bEnd;
 298     char *nn;
 299     gmx_bool bFFRTPTERRNM;
 300
 301     bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == NULL);
 302
 303     for(r=0; r<pdba->nres; r++)
 304     {
 305         bStart = FALSE;
 306         bEnd   = FALSE;
 307         for(j=0; j<nterpairs; j++)
 308         {
 309             if (r == r_start[j])
 310             {
 311                 bStart = TRUE;
 312             }
 313         }
 314         for(j=0; j<nterpairs; j++)
 315         {
 316             if (r == r_end[j])
 317             {
 318                 bEnd = TRUE;
 319             }
 320         }
 321
 322         nn = search_resrename(nrr,rr,*pdba->resinfo[r].rtp,bStart,bEnd,FALSE);
 323
 324         if (bFFRTPTERRNM && nn == NULL && (bStart || bEnd))
 325         {
 326             /* This is a terminal residue, but the residue name,
 327              * currently stored in .rtp, is not a standard residue name,
 328              * but probably a force field specific rtp name.
 329              * Check if we need to rename it because it is terminal.
 330              */
 331             nn = search_resrename(nrr,rr,
 332                                   *pdba->resinfo[r].rtp,bStart,bEnd,TRUE);
 333         }
 334
 335         if (nn != NULL && strcmp(*pdba->resinfo[r].rtp,nn) != 0)
 336         {
 337             if (bVerbose)
 338             {
 339                 printf("Changing rtp entry of residue %d %s to '%s'\n",
 340                        pdba->resinfo[r].nr,*pdba->resinfo[r].name,nn);
 341             }
 342             pdba->resinfo[r].rtp = put_symtab(symtab,nn);
 343         }
 344     }
 345 }
 346
 347 static void pdbres_to_gmxrtp(t_atoms *pdba)
 348 {
 349     int i;
 350
 351     for(i=0; (i<pdba->nres); i++)
 352     {
 353         if (pdba->resinfo[i].rtp == NULL)
 354         {
 355             pdba->resinfo[i].rtp = pdba->resinfo[i].name;
 356         }
 357     }
 358 }
 359
 360 static void rename_pdbres(t_atoms *pdba,const char *oldnm,const char *newnm,
 361                           gmx_bool bFullCompare,t_symtab *symtab)
 362 {
 363     char *resnm;
 364     int i;
 365
 366     for(i=0; (i<pdba->nres); i++)
 367     {
 368         resnm = *pdba->resinfo[i].name;
 369         if ((bFullCompare && (gmx_strcasecmp(resnm,oldnm) == 0)) ||
 370             (!bFullCompare && strstr(resnm,oldnm) != NULL))
 371         {
 372             /* Rename the residue name (not the rtp name) */
 373             pdba->resinfo[i].name = put_symtab(symtab,newnm);
 374         }
 375     }
 376 }
 377
 378 static void rename_bb(t_atoms *pdba,const char *oldnm,const char *newnm,
 379                       gmx_bool bFullCompare,t_symtab *symtab)
 380 {
 381     char *bbnm;
 382     int i;
 383
 384     for(i=0; (i<pdba->nres); i++)
 385     {
 386         /* We have not set the rtp name yes, use the residue name */
 387         bbnm = *pdba->resinfo[i].name;
 388         if ((bFullCompare && (gmx_strcasecmp(bbnm,oldnm) == 0)) ||
 389             (!bFullCompare && strstr(bbnm,oldnm) != NULL))
 390         {
 391             /* Change the rtp builing block name */
 392             pdba->resinfo[i].rtp = put_symtab(symtab,newnm);
 393         }
 394     }
 395 }
 396
 397 static void rename_bbint(t_atoms *pdba,const char *oldnm,
 398                          const char *gettp(int,int,const rtprename_t *),
 399                          gmx_bool bFullCompare,
 400                          t_symtab *symtab,
 401                          int nrr,const rtprename_t *rr)
 402 {
 403     int  i;
 404     const char *ptr;
 405     char *bbnm;
 406
 407     for(i=0; i<pdba->nres; i++)
 408     {
 409         /* We have not set the rtp name yes, use the residue name */
 410         bbnm = *pdba->resinfo[i].name;
 411         if ((bFullCompare && (strcmp(bbnm,oldnm) == 0)) ||
 412             (!bFullCompare && strstr(bbnm,oldnm) != NULL))
 413         {
 414             ptr = gettp(i,nrr,rr);
 415             pdba->resinfo[i].rtp = put_symtab(symtab,ptr);
 416         }
 417     }
 418 }
 419
 420 static void check_occupancy(t_atoms *atoms,const char *filename,gmx_bool bVerbose)
 421 {
 422   int i,ftp;
 423   int nzero=0;
 424   int nnotone=0;
 425
 426   ftp = fn2ftp(filename);
 427   if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
 428     fprintf(stderr,"No occupancies in %s\n",filename);
 429   else {
 430     for(i=0; (i<atoms->nr); i++) {
 431       if (atoms->pdbinfo[i].occup != 1) {
 432         if (bVerbose)
 433           fprintf(stderr,"Occupancy for atom %s%d-%s is %f rather than 1\n",
 434                   *atoms->resinfo[atoms->atom[i].resind].name,
 435                   atoms->resinfo[ atoms->atom[i].resind].nr,
 436                   *atoms->atomname[i],
 437                   atoms->pdbinfo[i].occup);
 438         if (atoms->pdbinfo[i].occup == 0)
 439           nzero++;
 440         else
 441           nnotone++;
 442       }
 443     }
 444     if (nzero == atoms->nr) {
 445       fprintf(stderr,"All occupancy fields zero. This is probably not an X-Ray structure\n");
 446     } else if ((nzero > 0) || (nnotone > 0)) {
 447       fprintf(stderr,
 448               "\n"
 449               "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
 450               "         occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
 451               "\n",
 452               nzero,nnotone,atoms->nr);
 453     } else {
 454       fprintf(stderr,"All occupancies are one\n");
 455     }
 456   }
 457 }
 458
 459 void write_posres(char *fn,t_atoms *pdba,real fc)
 460 {
 461   FILE *fp;
 462   int  i;
 463
 464   fp=gmx_fio_fopen(fn,"w");
 465   fprintf(fp,
 466           "; In this topology include file, you will find position restraint\n"
 467           "; entries for all the heavy atoms in your original pdb file.\n"
 468           "; This means that all the protons which were added by pdb2gmx are\n"
 469           "; not restrained.\n"
 470           "\n"
 471           "[ position_restraints ]\n"
 472           "; %4s%6s%8s%8s%8s\n","atom","type","fx","fy","fz"
 473           );
 474   for(i=0; (i<pdba->nr); i++) {
 475     if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
 476       fprintf(fp,"%6d%6d  %g  %g  %g\n",i+1,1,fc,fc,fc);
 477   }
 478   gmx_fio_fclose(fp);
 479 }
 480
 481 static int read_pdball(const char *inf, const char *outf,char *title,
 482                        t_atoms *atoms, rvec **x,
 483                        int *ePBC,matrix box, gmx_bool bRemoveH,
 484                        t_symtab *symtab,gmx_residuetype_t rt,const char *watres,
 485                        gmx_atomprop_t aps,gmx_bool bVerbose)
 486 /* Read a pdb file. (containing proteins) */
 487 {
 488   int  natom,new_natom,i;
 489
 490   /* READ IT */
 491   printf("Reading %s...\n",inf);
 492   get_stx_coordnum(inf,&natom);
 493   init_t_atoms(atoms,natom,TRUE);
 494   snew(*x,natom);
 495   read_stx_conf(inf,title,atoms,*x,NULL,ePBC,box);
 496   if (fn2ftp(inf) == efPDB)
 497     get_pdb_atomnumber(atoms,aps);
 498   if (bRemoveH) {
 499     new_natom=0;
 500     for(i=0; i<atoms->nr; i++)
 501       if (!is_hydrogen(*atoms->atomname[i])) {
 502         atoms->atom[new_natom]=atoms->atom[i];
 503         atoms->atomname[new_natom]=atoms->atomname[i];
 504         atoms->pdbinfo[new_natom]=atoms->pdbinfo[i];
 505         copy_rvec((*x)[i],(*x)[new_natom]);
 506         new_natom++;
 507       }
 508     atoms->nr=new_natom;
 509     natom=new_natom;
 510   }
 511
 512   printf("Read");
 513   if (title && title[0])
 514     printf(" '%s',",title);
 515   printf(" %d atoms\n",natom);
 516
 517   /* Rename residues */
 518   rename_pdbres(atoms,"HOH",watres,FALSE,symtab);
 519   rename_pdbres(atoms,"SOL",watres,FALSE,symtab);
 520   rename_pdbres(atoms,"WAT",watres,FALSE,symtab);
 521
 522   rename_atoms("xlateat.dat",NULL,
 523                atoms,symtab,NULL,TRUE,rt,TRUE,bVerbose);
 524
 525   if (natom == 0)
 526     return 0;
 527
 528   if (outf)
 529     write_sto_conf(outf,title,atoms,*x,NULL,*ePBC,box);
 530
 531   return natom;
 532 }
 533
 534 void process_chain(t_atoms *pdba, rvec *x,
 535                    gmx_bool bTrpU,gmx_bool bPheU,gmx_bool bTyrU,
 536                    gmx_bool bLysMan,gmx_bool bAspMan,gmx_bool bGluMan,
 537                    gmx_bool bHisMan,gmx_bool bArgMan,gmx_bool bGlnMan,
 538                    real angle,real distance,t_symtab *symtab,
 539                    int nrr,const rtprename_t *rr)
 540 {
 541   /* Rename aromatics, lys, asp and histidine */
 542   if (bTyrU) rename_bb(pdba,"TYR","TYRU",FALSE,symtab);
 543   if (bTrpU) rename_bb(pdba,"TRP","TRPU",FALSE,symtab);
 544   if (bPheU) rename_bb(pdba,"PHE","PHEU",FALSE,symtab);
 545   if (bLysMan)
 546     rename_bbint(pdba,"LYS",get_lystp,FALSE,symtab,nrr,rr);
 547   if (bArgMan)
 548     rename_bbint(pdba,"ARG",get_argtp,FALSE,symtab,nrr,rr);
 549   if (bGlnMan)
 550     rename_bbint(pdba,"GLN",get_glntp,FALSE,symtab,nrr,rr);
 551   if (bAspMan)
 552     rename_bbint(pdba,"ASP",get_asptp,FALSE,symtab,nrr,rr);
 553   else
 554     rename_bb(pdba,"ASPH","ASP",FALSE,symtab);
 555   if (bGluMan)
 556     rename_bbint(pdba,"GLU",get_glutp,FALSE,symtab,nrr,rr);
 557   else
 558     rename_bb(pdba,"GLUH","GLU",FALSE,symtab);
 559
 560   if (!bHisMan)
 561     set_histp(pdba,x,angle,distance);
 562   else
 563     rename_bbint(pdba,"HIS",get_histp,TRUE,symtab,nrr,rr);
 564
 565   /* Initialize the rtp builing block names with the residue names
 566    * for the residues that have not been processed above.
 567    */
 568   pdbres_to_gmxrtp(pdba);
 569
 570   /* Now we have all rtp names set.
 571    * The rtp names will conform to Gromacs naming,
 572    * unless the input pdb file contained one or more force field specific
 573    * rtp names as residue names.
 574    */
 575 }
 576
 577 /* struct for sorting the atoms from the pdb file */
 578 typedef struct {
 579   int  resnr;  /* residue number               */
 580   int  j;      /* database order index         */
 581   int  index;  /* original atom number         */
 582   char anm1;   /* second letter of atom name   */
 583   char altloc; /* alternate location indicator */
 584 } t_pdbindex;
 585
 586 int pdbicomp(const void *a,const void *b)
 587 {
 588   t_pdbindex *pa,*pb;
 589   int d;
 590
 591   pa=(t_pdbindex *)a;
 592   pb=(t_pdbindex *)b;
 593
 594   d = (pa->resnr - pb->resnr);
 595   if (d==0) {
 596     d = (pa->j - pb->j);
 597     if (d==0) {
 598       d = (pa->anm1 - pb->anm1);
 599       if (d==0)
 600         d = (pa->altloc - pb->altloc);
 601     }
 602   }
 603
 604   return d;
 605 }
 606
 607 static void sort_pdbatoms(int nrtp,t_restp restp[],t_hackblock hb[],
 608                           int natoms,t_atoms **pdbaptr,rvec **x,
 609                           t_blocka *block,char ***gnames)
 610 {
 611   t_atoms *pdba,*pdbnew;
 612   rvec **xnew;
 613   int     i,j;
 614   t_restp *rptr;
 615   t_hackblock *hbr;
 616   t_pdbindex *pdbi;
 617   atom_id *a;
 618   char *atomnm;
 619
 620   pdba=*pdbaptr;
 621   natoms=pdba->nr;
 622   pdbnew=NULL;
 623   snew(xnew,1);
 624   snew(pdbi, natoms);
 625
 626   for(i=0; i<natoms; i++)
 627   {
 628       atomnm = *pdba->atomname[i];
 629       rptr = &restp[pdba->atom[i].resind];
 630       for(j=0; (j<rptr->natom); j++)
 631       {
 632           if (gmx_strcasecmp(atomnm,*(rptr->atomname[j])) == 0)
 633           {
 634               break;
 635           }
 636       }
 637       if (j==rptr->natom)
 638       {
 639           char buf[STRLEN];
 640
 641           sprintf(buf,
 642                   "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
 643                   "while sorting atoms.\n%s",atomnm,
 644                   *pdba->resinfo[pdba->atom[i].resind].name,
 645                   pdba->resinfo[pdba->atom[i].resind].nr,
 646                   rptr->resname,
 647                   rptr->natom,
 648                   is_hydrogen(atomnm) ?
 649                   "\nFor a hydrogen, this can be a different protonation state, or it\n"
 650                   "might have had a different number in the PDB file and was rebuilt\n"
 651                   "(it might for instance have been H3, and we only expected H1 & H2).\n"
 652                   "Note that hydrogens might have been added to the entry for the N-terminus.\n"
 653                   "Remove this hydrogen or choose a different protonation state to solve it.\n"
 654                   "Option -ignh will ignore all hydrogens in the input." : ".");
 655           gmx_fatal(FARGS,buf);
 656       }
 657       /* make shadow array to be sorted into indexgroup */
 658       pdbi[i].resnr  = pdba->atom[i].resind;
 659       pdbi[i].j      = j;
 660       pdbi[i].index  = i;
 661       pdbi[i].anm1   = atomnm[1];
 662       pdbi[i].altloc = pdba->pdbinfo[i].altloc;
 663   }
 664   qsort(pdbi,natoms,(size_t)sizeof(pdbi[0]),pdbicomp);
 665
 666   /* pdba is sorted in pdbnew using the pdbi index */
 667   snew(a,natoms);
 668   snew(pdbnew,1);
 669   init_t_atoms(pdbnew,natoms,TRUE);
 670   snew(*xnew,natoms);
 671   pdbnew->nr=pdba->nr;
 672   pdbnew->nres=pdba->nres;
 673   sfree(pdbnew->resinfo);
 674   pdbnew->resinfo=pdba->resinfo;
 675   for (i=0; i<natoms; i++) {
 676     pdbnew->atom[i]     = pdba->atom[pdbi[i].index];
 677     pdbnew->atomname[i] = pdba->atomname[pdbi[i].index];
 678     pdbnew->pdbinfo[i]  = pdba->pdbinfo[pdbi[i].index];
 679     copy_rvec((*x)[pdbi[i].index],(*xnew)[i]);
 680      /* make indexgroup in block */
 681     a[i]=pdbi[i].index;
 682   }
 683   /* clean up */
 684   sfree(pdba->atomname);
 685   sfree(pdba->atom);
 686   sfree(pdba->pdbinfo);
 687   sfree(pdba);
 688   sfree(*x);
 689   /* copy the sorted pdbnew back to pdba */
 690   *pdbaptr=pdbnew;
 691   *x=*xnew;
 692   add_grp(block, gnames, natoms, a, "prot_sort");
 693   sfree(xnew);
 694   sfree(a);
 695   sfree(pdbi);
 696 }
 697
 698 static int remove_duplicate_atoms(t_atoms *pdba,rvec x[],gmx_bool bVerbose)
 699 {
 700   int     i,j,oldnatoms,ndel;
 701   t_resinfo *ri;
 702
 703   printf("Checking for duplicate atoms....\n");
 704   oldnatoms    = pdba->nr;
 705   ndel = 0;
 706   /* NOTE: pdba->nr is modified inside the loop */
 707   for(i=1; (i < pdba->nr); i++) {
 708     /* compare 'i' and 'i-1', throw away 'i' if they are identical
 709        this is a 'while' because multiple alternate locations can be present */
 710     while ( (i < pdba->nr) &&
 711             (pdba->atom[i-1].resind == pdba->atom[i].resind) &&
 712             (strcmp(*pdba->atomname[i-1],*pdba->atomname[i])==0) ) {
 713       ndel++;
 714       if (bVerbose) {
 715         ri = &pdba->resinfo[pdba->atom[i].resind];
 716         printf("deleting duplicate atom %4s  %s%4d%c",
 717                *pdba->atomname[i],*ri->name,ri->nr,ri->ic);
 718         if (ri->chainid && (ri->chainid != ' '))
 719           printf(" ch %c", ri->chainid);
 720         if (pdba->pdbinfo) {
 721           if (pdba->pdbinfo[i].atomnr)
 722             printf("  pdb nr %4d",pdba->pdbinfo[i].atomnr);
 723           if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc!=' '))
 724             printf("  altloc %c",pdba->pdbinfo[i].altloc);
 725         }
 726         printf("\n");
 727       }
 728       pdba->nr--;
 729       /* We can not free, since it might be in the symtab */
 730       /* sfree(pdba->atomname[i]); */
 731       for (j=i; j < pdba->nr; j++) {
 732         pdba->atom[j]     = pdba->atom[j+1];
 733         pdba->atomname[j] = pdba->atomname[j+1];
 734         pdba->pdbinfo[j]  = pdba->pdbinfo[j+1];
 735         copy_rvec(x[j+1],x[j]);
 736       }
 737       srenew(pdba->atom,     pdba->nr);
 738       /* srenew(pdba->atomname, pdba->nr); */
 739       srenew(pdba->pdbinfo,  pdba->nr);
 740     }
 741   }
 742   if (pdba->nr != oldnatoms)
 743     printf("Now there are %d atoms. Deleted %d duplicates.\n",pdba->nr,ndel);
 744
 745   return pdba->nr;
 746 }
 747
 748 void find_nc_ter(t_atoms *pdba,int r0,int r1,int *r_start,int *r_end,gmx_residuetype_t rt)
 749 {
 750     int i;
 751     const char *p_startrestype;
 752     const char *p_restype;
 753     int         nstartwarn,nendwarn;
 754
 755     *r_start = -1;
 756     *r_end   = -1;
 757
 758     nstartwarn = 0;
 759     nendwarn   = 0;
 760
 761     /* Find the starting terminus (typially N or 5') */
 762     for(i=r0;i<r1 && *r_start==-1;i++)
 763     {
 764         gmx_residuetype_get_type(rt,*pdba->resinfo[i].name,&p_startrestype);
 765         if( !gmx_strcasecmp(p_startrestype,"Protein") || !gmx_strcasecmp(p_startrestype,"DNA") || !gmx_strcasecmp(p_startrestype,"RNA") )
 766         {
 767             printf("Identified residue %s%d as a starting terminus.\n",*pdba->resinfo[i].name,pdba->resinfo[i].nr);
 768             *r_start=i;
 769         }
 770         else
 771         {
 772             if(nstartwarn < 5)
 773             {
 774                 printf("Warning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n",*pdba->resinfo[i].name,pdba->resinfo[i].nr);
 775             }
 776             if(nstartwarn == 5)
 777             {
 778                 printf("More than 5 unidentified residues at start of chain - disabling further warnings.\n");
 779             }
 780             nstartwarn++;
 781         }
 782     }
 783
 784     if(*r_start>=0)
 785     {
 786         /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
 787         for(i=*r_start;i<r1;i++)
 788         {
 789             gmx_residuetype_get_type(rt,*pdba->resinfo[i].name,&p_restype);
 790             if( !gmx_strcasecmp(p_restype,p_startrestype) && nendwarn==0)
 791             {
 792                 *r_end=i;
 793             }
 794             else
 795             {
 796                 if(nendwarn < 5)
 797                 {
 798                     printf("Warning: Residue %s%d in chain has different type (%s) from starting residue %s%d (%s).\n",
 799                            *pdba->resinfo[i].name,pdba->resinfo[i].nr,p_restype,
 800                            *pdba->resinfo[*r_start].name,pdba->resinfo[*r_start].nr,p_startrestype);
 801                 }
 802                 if(nendwarn == 5)
 803                 {
 804                     printf("More than 5 unidentified residues at end of chain - disabling further warnings.\n");
 805                 }
 806                 nendwarn++;
 807             }
 808         }
 809     }
 810
 811     if(*r_end>=0)
 812     {
 813         printf("Identified residue %s%d as a ending terminus.\n",*pdba->resinfo[*r_end].name,pdba->resinfo[*r_end].nr);
 814     }
 815 }
 816
 817
 818 static void
 819 modify_chain_numbers(t_atoms *       pdba,
 820                      const char *    chainsep)
 821 {
 822     int   i;
 823     char  old_prev_chainid;
 824     char  old_this_chainid;
 825     int   old_prev_chainnum;
 826     int   old_this_chainnum;
 827     t_resinfo *ri;
 828     int   new_chainnum;
 829
 830     enum
 831     {
 832         SPLIT_ID_OR_TER,
 833         SPLIT_ID_AND_TER,
 834         SPLIT_ID_ONLY,
 835         SPLIT_TER_ONLY
 836     }
 837     splitting;
 838
 839     printf("Splitting PDB chains based on ");
 840     splitting = SPLIT_TER_ONLY; /* keep compiler happy */
 841
 842     /* Be a bit flexible to catch typos */
 843     if (!strncmp(chainsep,"id_o",4) || !strncmp(chainsep,"int",3))
 844     {
 845         /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
 846         splitting = SPLIT_ID_OR_TER;
 847         printf("TER records or changing chain id.\n");
 848     }
 849     else if (!strncmp(chainsep,"id_a",4))
 850     {
 851         splitting = SPLIT_ID_AND_TER;
 852         printf("TER records and chain id.\n");
 853     }
 854     else if (strlen(chainsep)==2 && !strncmp(chainsep,"id",4))
 855     {
 856         splitting = SPLIT_ID_ONLY;
 857         printf("changing chain id only (ignoring TER records).\n");
 858     }
 859     else if (chainsep[0]=='t')
 860     {
 861         splitting = SPLIT_TER_ONLY;
 862         printf("TER records only (ignoring chain id).\n");
 863     }
 864     else
 865     {
 866         gmx_fatal(FARGS,"Unidentified setting for chain separation: %s\n",chainsep);
 867     }
 868
 869     /* The default chain enumeration is based on TER records only, which is reflected in chainnum below */
 870
 871     old_prev_chainid  = '?';
 872     old_prev_chainnum = -1;
 873     new_chainnum  = -1;
 874
 875     for(i=0;i<pdba->nres;i++)
 876     {
 877         ri = &pdba->resinfo[i];
 878         old_this_chainid  = ri->chainid;
 879         old_this_chainnum = ri->chainnum;
 880
 881         switch (splitting)
 882         {
 883             case SPLIT_ID_OR_TER:
 884                 if(old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
 885                 {
 886                     new_chainnum++;
 887                 }
 888                 break;
 889
 890             case SPLIT_ID_AND_TER:
 891                 if(old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
 892                 {
 893                     new_chainnum++;
 894                 }
 895                 break;
 896
 897             case SPLIT_ID_ONLY:
 898                 if(old_this_chainid != old_prev_chainid)
 899                 {
 900                     new_chainnum++;
 901                 }
 902                 break;
 903
 904             case SPLIT_TER_ONLY:
 905                 if(old_this_chainnum != old_prev_chainnum)
 906                 {
 907                     new_chainnum++;
 908                 }
 909                 break;
 910             default:
 911                 gmx_fatal(FARGS,"Internal inconsistency - this shouldn't happen...");
 912                 break;
 913         }
 914         old_prev_chainid  = old_this_chainid;
 915         old_prev_chainnum = old_this_chainnum;
 916
 917         ri->chainnum = new_chainnum;
 918     }
 919 }
 920
 921
 922 typedef struct {
 923   char chainid;
 924   char chainnum;
 925   int  start;
 926   int  natom;
 927   gmx_bool bAllWat;
 928   int  nterpairs;
 929   int  *chainstart;
 930 } t_pdbchain;
 931
 932 typedef struct {
 933   char chainid;
 934   int  chainnum;
 935   gmx_bool bAllWat;
 936   int nterpairs;
 937   int *chainstart;
 938   t_hackblock **ntdb;
 939   t_hackblock **ctdb;
 940   int *r_start;
 941   int *r_end;
 942   t_atoms *pdba;
 943   rvec *x;
 944 } t_chain;
 945
 946 int main(int argc, char *argv[])
 947 {
 948   const char *desc[] = {
 949     "This program reads a pdb (or gro) file, reads",
 950     "some database files, adds hydrogens to the molecules and generates",
 951     "coordinates in Gromacs (Gromos), or optionally pdb, format",
 952     "and a topology in Gromacs format.",
 953     "These files can subsequently be processed to generate a run input file.",
 954     "[PAR]",
 955     "pdb2gmx will search for force fields by looking for",
 956     "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
 957     "of the current working directory and of the Gromacs library directory",
 958     "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
 959     "variable.",
 960     "By default the forcefield selection is interactive,",
 961     "but you can use the [TT]-ff[tt] option to specify one of the short names",
 962     "in the list on the command line instead. In that case pdb2gmx just looks",
 963     "for the corresponding [TT]<forcefield>.ff[tt] directory.",
 964     "[PAR]",
 965     "After choosing a force field, all files will be read only from",
 966     "the corresponding force field directory.",
 967     "If you want to modify or add a residue types, you can copy the force",
 968     "field directory from the Gromacs library directory to your current",
 969     "working directory. If you want to add new protein residue types,",
 970     "you will need to modify residuetypes.dat in the library directory",
 971     "or copy the whole library directory to a local directory and set",
 972     "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
 973     "Check chapter 5 of the manual for more information about file formats.",
 974     "[PAR]",
 975
 976     "Note that a pdb file is nothing more than a file format, and it",
 977     "need not necessarily contain a protein structure. Every kind of",
 978     "molecule for which there is support in the database can be converted.",
 979     "If there is no support in the database, you can add it yourself.[PAR]",
 980
 981     "The program has limited intelligence, it reads a number of database",
 982     "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
 983     "if necessary this can be done manually. The program can prompt the",
 984     "user to select which kind of LYS, ASP, GLU, CYS or HIS residue she",
 985     "wants. For LYS the choice is between neutral (two protons on NZ) or",
 986     "protonated (three protons, default), for ASP and GLU unprotonated",
 987     "(default) or protonated, for HIS the proton can be either on ND1,",
 988     "on NE2 or on both. By default these selections are done automatically.",
 989     "For His, this is based on an optimal hydrogen bonding",
 990     "conformation. Hydrogen bonds are defined based on a simple geometric",
 991     "criterion, specified by the maximum hydrogen-donor-acceptor angle",
 992     "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
 993     "[TT]-dist[tt] respectively.[PAR]",
 994
 995     "The separation of chains is not entirely trivial since the markup",
 996     "in user-generated PDB files frequently varies and sometimes it",
 997     "is desirable to merge entries across a TER record, for instance",
 998     "if you want a disulfide bridge or distance restraints between",
 999     "two protein chains or if you have a HEME group bound to a protein.",
1000     "In such cases multiple chains should be contained in a single",
1001     "[TT]molecule_type[tt] definition.",
1002     "To handle this, pdb2gmx has an option [TT]-chainsep[tt] so you can",
1003     "choose whether a new chain should start when we find a TER record,",
1004     "when the chain id changes, combinations of either or both of these",
1005     "or fully interactively.[PAR]",
1006
1007     "pdb2gmx will also check the occupancy field of the pdb file.",
1008     "If any of the occupancies are not one, indicating that the atom is",
1009     "not resolved well in the structure, a warning message is issued.",
1010     "When a pdb file does not originate from an X-Ray structure determination",
1011     "all occupancy fields may be zero. Either way, it is up to the user",
1012     "to verify the correctness of the input data (read the article!).[PAR]",
1013
1014     "During processing the atoms will be reordered according to Gromacs",
1015     "conventions. With [TT]-n[tt] an index file can be generated that",
1016     "contains one group reordered in the same way. This allows you to",
1017     "convert a Gromos trajectory and coordinate file to Gromos. There is",
1018     "one limitation: reordering is done after the hydrogens are stripped",
1019     "from the input and before new hydrogens are added. This means that",
1020     "you should not use [TT]-ignh[tt].[PAR]",
1021
1022     "The [TT].gro[tt] and [TT].g96[tt] file formats do not support chain",
1023     "identifiers. Therefore it is useful to enter a pdb file name at",
1024     "the [TT]-o[tt] option when you want to convert a multi-chain pdb file.",
1025     "[PAR]",
1026
1027     "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1028     "motions. Angular and out-of-plane motions can be removed by changing",
1029     "hydrogens into virtual sites and fixing angles, which fixes their",
1030     "position relative to neighboring atoms. Additionally, all atoms in the",
1031     "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1032     "can be converted into virtual sites, eliminating the fast improper dihedral",
1033     "fluctuations in these rings. Note that in this case all other hydrogen",
1034     "atoms are also converted to virtual sites. The mass of all atoms that are",
1035     "converted into virtual sites, is added to the heavy atoms.[PAR]",
1036     "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1037     "done by increasing the hydrogen-mass by a factor of 4. This is also",
1038     "done for water hydrogens to slow down the rotational motion of water.",
1039     "The increase in mass of the hydrogens is subtracted from the bonded",
1040     "(heavy) atom so that the total mass of the system remains the same."
1041   };
1042
1043
1044   FILE       *fp,*top_file,*top_file2,*itp_file=NULL;
1045   int        natom,nres;
1046   t_atoms    pdba_all,*pdba;
1047   t_atoms    *atoms;
1048   t_resinfo  *ri;
1049   t_blocka   *block;
1050   int        chain,nch,maxch,nwaterchain;
1051   t_pdbchain *pdb_ch;
1052   t_chain    *chains,*cc;
1053   char       select[STRLEN];
1054   int        nincl,nmol;
1055   char       **incls;
1056   t_mols     *mols;
1057   char       **gnames;
1058   int        ePBC;
1059   matrix     box;
1060   rvec       box_space;
1061   int        i,j,k,l,nrtp;
1062   int        *swap_index,si;
1063   t_restp    *restp;
1064   t_hackblock *ah;
1065   t_symtab   symtab;
1066   gpp_atomtype_t atype;
1067   gmx_residuetype_t rt;
1068   const char *top_fn;
1069   char       fn[256],itp_fn[STRLEN],posre_fn[STRLEN],buf_fn[STRLEN];
1070   char       molname[STRLEN],title[STRLEN],quote[STRLEN],generator[STRLEN];
1071   char       *c,forcefield[STRLEN],ffdir[STRLEN];
1072   char       ffname[STRLEN],suffix[STRLEN],buf[STRLEN];
1073   char       *watermodel;
1074   const char *watres;
1075   int        nrtpf;
1076   char       **rtpf;
1077   char       rtp[STRLEN];
1078   int        nrrn;
1079   char       **rrn;
1080   int        nrtprename,naa;
1081   rtprename_t *rtprename=NULL;
1082   int        nah,nNtdb,nCtdb,ntdblist;
1083   t_hackblock *ntdb,*ctdb,**tdblist;
1084   int        nssbonds;
1085   t_ssbond   *ssbonds;
1086   rvec       *pdbx,*x;
1087   gmx_bool       bVsites=FALSE,bWat,bPrevWat=FALSE,bITP,bVsiteAromatics=FALSE,bMerge;
1088   real       mHmult=0;
1089   t_hackblock *hb_chain;
1090   t_restp    *restp_chain;
1091   output_env_t oenv;
1092   const char *p_restype;
1093   int        rc;
1094   int           this_atomnum;
1095   int           prev_atomnum;
1096   const char *  prev_atomname;
1097   const char *  this_atomname;
1098   const char *  prev_resname;
1099   const char *  this_resname;
1100   int           prev_resnum;
1101   int           this_resnum;
1102   char          prev_chainid;
1103   char          this_chainid;
1104   int           prev_chainnumber;
1105   int           this_chainnumber;
1106   int           nid_used;
1107   int           this_chainstart;
1108   int           prev_chainstart;
1109   gmx_bool      bMerged;
1110
1111   gmx_atomprop_t aps;
1112
1113   t_filenm   fnm[] = {
1114     { efSTX, "-f", "eiwit.pdb", ffREAD  },
1115     { efSTO, "-o", "conf",      ffWRITE },
1116     { efTOP, NULL, NULL,        ffWRITE },
1117     { efITP, "-i", "posre",     ffWRITE },
1118     { efNDX, "-n", "clean",     ffOPTWR },
1119     { efSTO, "-q", "clean.pdb", ffOPTWR }
1120   };
1121 #define NFILE asize(fnm)
1122
1123
1124   /* Command line arguments must be static */
1125   static gmx_bool bNewRTP=FALSE;
1126   static gmx_bool bInter=FALSE, bCysMan=FALSE;
1127   static gmx_bool bLysMan=FALSE, bAspMan=FALSE, bGluMan=FALSE, bHisMan=FALSE;
1128   static gmx_bool bGlnMan=FALSE, bArgMan=FALSE;
1129   static gmx_bool bTerMan=FALSE, bUnA=FALSE, bHeavyH;
1130   static gmx_bool bSort=TRUE, bAllowMissing=FALSE, bRemoveH=FALSE;
1131   static gmx_bool bDeuterate=FALSE,bVerbose=FALSE,bChargeGroups=TRUE,bCmap=TRUE;
1132   static gmx_bool bRenumRes=FALSE,bRTPresname=FALSE;
1133   static real angle=135.0, distance=0.3,posre_fc=1000;
1134   static real long_bond_dist=0.25, short_bond_dist=0.05;
1135   static const char *vsitestr[] = { NULL, "none", "hydrogens", "aromatics", NULL };
1136   static const char *watstr[] = { NULL, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", NULL };
1137   static const char *chainsep[] = { NULL, "id_or_ter", "id_and_ter", "ter", "id", "interactive", NULL };
1138   static const char *ff = "select";
1139
1140   t_pargs pa[] = {
1141     { "-newrtp", FALSE, etBOOL, {&bNewRTP},
1142       "HIDDENWrite the residue database in new format to 'new.rtp'"},
1143     { "-lb",     FALSE, etREAL, {&long_bond_dist},
1144       "HIDDENLong bond warning distance" },
1145     { "-sb",     FALSE, etREAL, {&short_bond_dist},
1146       "HIDDENShort bond warning distance" },
1147     { "-chainsep", FALSE, etENUM, {chainsep},
1148       "Condition in PDB files when a new chain and molecule_type should be started" },
1149     { "-ff",     FALSE, etSTR,  {&ff},
1150       "Force field, interactive by default. Use -h for information." },
1151     { "-water",  FALSE, etENUM, {watstr},
1152       "Water model to use" },
1153     { "-inter",  FALSE, etBOOL, {&bInter},
1154       "Set the next 8 options to interactive"},
1155     { "-ss",     FALSE, etBOOL, {&bCysMan},
1156       "Interactive SS bridge selection" },
1157     { "-ter",    FALSE, etBOOL, {&bTerMan},
1158       "Interactive termini selection, iso charged" },
1159     { "-lys",    FALSE, etBOOL, {&bLysMan},
1160       "Interactive Lysine selection, iso charged" },
1161     { "-arg",    FALSE, etBOOL, {&bArgMan},
1162       "Interactive Arganine selection, iso charged" },
1163     { "-asp",    FALSE, etBOOL, {&bAspMan},
1164       "Interactive Aspartic Acid selection, iso charged" },
1165     { "-glu",    FALSE, etBOOL, {&bGluMan},
1166       "Interactive Glutamic Acid selection, iso charged" },
1167     { "-gln",    FALSE, etBOOL, {&bGlnMan},
1168       "Interactive Glutamine selection, iso neutral" },
1169     { "-his",    FALSE, etBOOL, {&bHisMan},
1170       "Interactive Histidine selection, iso checking H-bonds" },
1171     { "-angle",  FALSE, etREAL, {&angle},
1172       "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
1173     { "-dist",   FALSE, etREAL, {&distance},
1174       "Maximum donor-acceptor distance for a H-bond (nm)" },
1175     { "-una",    FALSE, etBOOL, {&bUnA},
1176       "Select aromatic rings with united CH atoms on Phenylalanine, "
1177       "Tryptophane and Tyrosine" },
1178     { "-sort",   FALSE, etBOOL, {&bSort},
1179       "HIDDENSort the residues according to database, turning this off is dangerous as charge groups might be broken in parts" },
1180     { "-ignh",   FALSE, etBOOL, {&bRemoveH},
1181       "Ignore hydrogen atoms that are in the pdb file" },
1182     { "-missing",FALSE, etBOOL, {&bAllowMissing},
1183       "Continue when atoms are missing, dangerous" },
1184     { "-v",      FALSE, etBOOL, {&bVerbose},
1185       "Be slightly more verbose in messages" },
1186     { "-posrefc",FALSE, etREAL, {&posre_fc},
1187       "Force constant for position restraints" },
1188     { "-vsite",  FALSE, etENUM, {vsitestr},
1189       "Convert atoms to virtual sites" },
1190     { "-heavyh", FALSE, etBOOL, {&bHeavyH},
1191       "Make hydrogen atoms heavy" },
1192     { "-deuterate", FALSE, etBOOL, {&bDeuterate},
1193       "Change the mass of hydrogens to 2 amu" },
1194     { "-chargegrp", TRUE, etBOOL, {&bChargeGroups},
1195       "Use charge groups in the rtp file"  },
1196     { "-cmap", TRUE, etBOOL, {&bCmap},
1197       "Use cmap torsions (if enabled in the rtp file)"  },
1198     { "-renum", TRUE, etBOOL, {&bRenumRes},
1199       "Renumber the residues consecutively in the output"  },
1200     { "-rtpres", TRUE, etBOOL, {&bRTPresname},
1201       "Use rtp entry names as residue names"  }
1202   };
1203 #define NPARGS asize(pa)
1204
1205   CopyRight(stderr,argv[0]);
1206   parse_common_args(&argc,argv,0,NFILE,fnm,asize(pa),pa,asize(desc),desc,
1207                     0,NULL,&oenv);
1208
1209   /* Force field selection, interactive or direct */
1210   choose_ff(strcmp(ff,"select") == 0 ? NULL : ff,
1211             forcefield,sizeof(forcefield),
1212             ffdir,sizeof(ffdir));
1213
1214   if (strlen(forcefield) > 0) {
1215     strcpy(ffname,forcefield);
1216     ffname[0] = toupper(ffname[0]);
1217   } else {
1218     gmx_fatal(FARGS,"Empty forcefield string");
1219   }
1220
1221   printf("\nUsing the %s force field in directory %s\n\n",
1222          ffname,ffdir);
1223
1224   choose_watermodel(watstr[0],ffdir,&watermodel);
1225
1226   if (bInter) {
1227     /* if anything changes here, also change description of -inter */
1228     bCysMan = TRUE;
1229     bTerMan = TRUE;
1230     bLysMan = TRUE;
1231     bArgMan = TRUE;
1232     bAspMan = TRUE;
1233     bGluMan = TRUE;
1234     bGlnMan = TRUE;
1235     bHisMan = TRUE;
1236   }
1237
1238   if (bHeavyH)
1239     mHmult=4.0;
1240   else if (bDeuterate)
1241     mHmult=2.0;
1242   else
1243     mHmult=1.0;
1244
1245   switch(vsitestr[0][0]) {
1246   case 'n': /* none */
1247     bVsites=FALSE;
1248     bVsiteAromatics=FALSE;
1249     break;
1250   case 'h': /* hydrogens */
1251     bVsites=TRUE;
1252     bVsiteAromatics=FALSE;
1253     break;
1254   case 'a': /* aromatics */
1255     bVsites=TRUE;
1256     bVsiteAromatics=TRUE;
1257     break;
1258   default:
1259     gmx_fatal(FARGS,"DEATH HORROR in $s (%d): vsitestr[0]='%s'",
1260                 __FILE__,__LINE__,vsitestr[0]);
1261   }/* end switch */
1262
1263   /* Open the symbol table */
1264   open_symtab(&symtab);
1265
1266   /* Residue type database */
1267   gmx_residuetype_init(&rt);
1268
1269   /* Read residue renaming database(s), if present */
1270   nrrn = fflib_search_file_end(ffdir,".r2b",FALSE,&rrn);
1271
1272   nrtprename = 0;
1273   rtprename  = NULL;
1274   for(i=0; i<nrrn; i++) {
1275     fp = fflib_open(rrn[i]);
1276     read_rtprename(rrn[i],fp,&nrtprename,&rtprename);
1277     fclose(fp);
1278     sfree(rrn[i]);
1279   }
1280   sfree(rrn);
1281
1282   /* Add all alternative names from the residue renaming database to the list of recognized amino/nucleic acids. */
1283   naa=0;
1284   for(i=0;i<nrtprename;i++)
1285   {
1286       rc=gmx_residuetype_get_type(rt,rtprename[i].gmx,&p_restype);
1287
1288       /* Only add names if the 'standard' gromacs/iupac base name was found */
1289       if(rc==0)
1290       {
1291           gmx_residuetype_add(rt,rtprename[i].main,p_restype);
1292           gmx_residuetype_add(rt,rtprename[i].nter,p_restype);
1293           gmx_residuetype_add(rt,rtprename[i].cter,p_restype);
1294           gmx_residuetype_add(rt,rtprename[i].bter,p_restype);
1295       }
1296   }
1297
1298   clear_mat(box);
1299   if (watermodel != NULL && (strstr(watermodel,"4p") ||
1300                              strstr(watermodel,"4P"))) {
1301     watres = "HO4";
1302   } else if (watermodel != NULL && (strstr(watermodel,"5p") ||
1303                                     strstr(watermodel,"5P"))) {
1304     watres = "HO5";
1305   } else {
1306     watres = "HOH";
1307   }
1308
1309   aps = gmx_atomprop_init();
1310   natom = read_pdball(opt2fn("-f",NFILE,fnm),opt2fn_null("-q",NFILE,fnm),title,
1311                       &pdba_all,&pdbx,&ePBC,box,bRemoveH,&symtab,rt,watres,
1312                       aps,bVerbose);
1313
1314   if (natom==0)
1315     gmx_fatal(FARGS,"No atoms found in pdb file %s\n",opt2fn("-f",NFILE,fnm));
1316
1317   printf("Analyzing pdb file\n");
1318   nch=0;
1319   maxch=0;
1320   nwaterchain=0;
1321
1322   modify_chain_numbers(&pdba_all,chainsep[0]);
1323
1324
1325   bMerge = !strncmp(chainsep[0],"int",3);
1326
1327   this_atomname       = NULL;
1328   this_atomnum        = -1;
1329   this_resname        = NULL;
1330   this_resnum         = -1;
1331   this_chainid        = '?';
1332   this_chainnumber    = -1;
1333   this_chainstart     = 0;
1334
1335   pdb_ch=NULL;
1336   bMerged = FALSE;
1337   for (i=0; (i<natom); i++)
1338   {
1339       ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1340
1341       prev_atomname      = this_atomname;
1342       prev_atomnum       = this_atomnum;
1343       prev_resname       = this_resname;
1344       prev_resnum        = this_resnum;
1345       prev_chainid       = this_chainid;
1346       prev_chainnumber   = this_chainnumber;
1347       if (!bMerged)
1348       {
1349           prev_chainstart    = this_chainstart;
1350       }
1351
1352       this_atomname      = *pdba_all.atomname[i];
1353       this_atomnum       = (pdba_all.pdbinfo != NULL) ? pdba_all.pdbinfo[i].atomnr : i+1;
1354       this_resname       = *ri->name;
1355       this_resnum        = ri->nr;
1356       this_chainid       = ri->chainid;
1357       this_chainnumber   = ri->chainnum;
1358
1359       bWat = gmx_strcasecmp(*ri->name,watres) == 0;
1360       if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat != bPrevWat))
1361       {
1362           this_chainstart = pdba_all.atom[i].resind;
1363           if (bMerge && i>0 && !bWat)
1364           {
1365               printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) with\n"
1366                      "chain starting with residue %s%d (chain id '%c', atom %d %s)? [n/y]\n",
1367                      prev_resname,prev_resnum,prev_chainid,prev_atomnum,prev_atomname,
1368                      this_resname,this_resnum,this_chainid,this_atomnum,this_atomname);
1369
1370               if(NULL==fgets(select,STRLEN-1,stdin))
1371               {
1372                   gmx_fatal(FARGS,"Error reading from stdin");
1373               }
1374           }
1375           else
1376           {
1377               select[0] = 'n';
1378           }
1379
1380           bMerged = (select[0] == 'y');
1381           if (bMerged)
1382           {
1383               pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] =
1384                   pdba_all.atom[i].resind - prev_chainstart;
1385               pdb_ch[nch-1].nterpairs++;
1386               srenew(pdb_ch[nch-1].chainstart,pdb_ch[nch-1].nterpairs+1);
1387           }
1388           else
1389           {
1390               /* set natom for previous chain */
1391               if (nch > 0)
1392               {
1393                   pdb_ch[nch-1].natom=i-pdb_ch[nch-1].start;
1394               }
1395               if (bWat)
1396               {
1397                   nwaterchain++;
1398                   ri->chainid = ' ';
1399               }
1400               /* check if chain identifier was used before */
1401               for (j=0; (j<nch); j++)
1402               {
1403                   if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
1404                   {
1405                       printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1406                              "They will be treated as separate chains unless you reorder your file.\n",
1407                              ri->chainid);
1408                   }
1409               }
1410               if (nch == maxch)
1411               {
1412                   maxch += 16;
1413                   srenew(pdb_ch,maxch);
1414               }
1415               pdb_ch[nch].chainid = ri->chainid;
1416               pdb_ch[nch].chainnum = ri->chainnum;
1417               pdb_ch[nch].start=i;
1418               pdb_ch[nch].bAllWat=bWat;
1419               if (bWat)
1420                   pdb_ch[nch].nterpairs=0;
1421               else
1422                   pdb_ch[nch].nterpairs=1;
1423               snew(pdb_ch[nch].chainstart,pdb_ch[nch].nterpairs+1);
1424               /* modified [nch] to [0] below */
1425               pdb_ch[nch].chainstart[0]=0;
1426               nch++;
1427           }
1428       }
1429       bPrevWat=bWat;
1430   }
1431   pdb_ch[nch-1].natom=natom-pdb_ch[nch-1].start;
1432
1433   /* set all the water blocks at the end of the chain */
1434   snew(swap_index,nch);
1435   j=0;
1436   for(i=0; i<nch; i++)
1437     if (!pdb_ch[i].bAllWat) {
1438       swap_index[j]=i;
1439       j++;
1440     }
1441   for(i=0; i<nch; i++)
1442     if (pdb_ch[i].bAllWat) {
1443       swap_index[j]=i;
1444       j++;
1445     }
1446   if (nwaterchain>1)
1447     printf("Moved all the water blocks to the end\n");
1448
1449   snew(chains,nch);
1450   /* copy pdb data and x for all chains */
1451   for (i=0; (i<nch); i++) {
1452     si=swap_index[i];
1453     chains[i].chainid = pdb_ch[si].chainid;
1454     chains[i].chainnum = pdb_ch[si].chainnum;
1455     chains[i].bAllWat = pdb_ch[si].bAllWat;
1456     chains[i].nterpairs = pdb_ch[si].nterpairs;
1457     chains[i].chainstart = pdb_ch[si].chainstart;
1458     snew(chains[i].ntdb,pdb_ch[si].nterpairs);
1459     snew(chains[i].ctdb,pdb_ch[si].nterpairs);
1460     snew(chains[i].r_start,pdb_ch[si].nterpairs);
1461     snew(chains[i].r_end,pdb_ch[si].nterpairs);
1462
1463     snew(chains[i].pdba,1);
1464     init_t_atoms(chains[i].pdba,pdb_ch[si].natom,TRUE);
1465     snew(chains[i].x,chains[i].pdba->nr);
1466     for (j=0; j<chains[i].pdba->nr; j++) {
1467       chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start+j];
1468       snew(chains[i].pdba->atomname[j],1);
1469       *chains[i].pdba->atomname[j] =
1470         strdup(*pdba_all.atomname[pdb_ch[si].start+j]);
1471       chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start+j];
1472       copy_rvec(pdbx[pdb_ch[si].start+j],chains[i].x[j]);
1473     }
1474     /* Re-index the residues assuming that the indices are continuous */
1475     k    = chains[i].pdba->atom[0].resind;
1476     nres = chains[i].pdba->atom[chains[i].pdba->nr-1].resind - k + 1;
1477     chains[i].pdba->nres = nres;
1478     for(j=0; j < chains[i].pdba->nr; j++) {
1479       chains[i].pdba->atom[j].resind -= k;
1480     }
1481     srenew(chains[i].pdba->resinfo,nres);
1482     for(j=0; j<nres; j++) {
1483       chains[i].pdba->resinfo[j] = pdba_all.resinfo[k+j];
1484       snew(chains[i].pdba->resinfo[j].name,1);
1485       *chains[i].pdba->resinfo[j].name = strdup(*pdba_all.resinfo[k+j].name);
1486       /* make all chain identifiers equal to that of the chain */
1487       chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
1488     }
1489   }
1490
1491   if (bMerge)
1492     printf("\nMerged %d chains into one molecule definition\n\n",
1493            pdb_ch[0].nterpairs);
1494
1495   printf("There are %d chains and %d blocks of water and "
1496          "%d residues with %d atoms\n",
1497          nch-nwaterchain,nwaterchain,
1498          pdba_all.resinfo[pdba_all.atom[natom-1].resind].nr,natom);
1499
1500   printf("\n  %5s  %4s %6s\n","chain","#res","#atoms");
1501   for (i=0; (i<nch); i++)
1502     printf("  %d '%c' %5d %6d  %s\n",
1503            i+1, chains[i].chainid ? chains[i].chainid:'-',
1504            chains[i].pdba->nres, chains[i].pdba->nr,
1505            chains[i].bAllWat ? "(only water)":"");
1506   printf("\n");
1507
1508   check_occupancy(&pdba_all,opt2fn("-f",NFILE,fnm),bVerbose);
1509
1510   /* Read atomtypes... */
1511   atype = read_atype(ffdir,&symtab);
1512
1513   /* read residue database */
1514   printf("Reading residue database... (%s)\n",forcefield);
1515   nrtpf = fflib_search_file_end(ffdir,".rtp",TRUE,&rtpf);
1516   nrtp  = 0;
1517   restp = NULL;
1518   for(i=0; i<nrtpf; i++) {
1519     read_resall(rtpf[i],&nrtp,&restp,atype,&symtab,FALSE);
1520     sfree(rtpf[i]);
1521   }
1522   sfree(rtpf);
1523   if (bNewRTP) {
1524     /* Not correct with multiple rtp input files with different bonded types */
1525     fp=gmx_fio_fopen("new.rtp","w");
1526     print_resall(fp,nrtp,restp,atype);
1527     gmx_fio_fclose(fp);
1528   }
1529
1530   /* read hydrogen database */
1531   nah = read_h_db(ffdir,&ah);
1532
1533   /* Read Termini database... */
1534   nNtdb=read_ter_db(ffdir,'n',&ntdb,atype);
1535   nCtdb=read_ter_db(ffdir,'c',&ctdb,atype);
1536
1537   top_fn=ftp2fn(efTOP,NFILE,fnm);
1538   top_file=gmx_fio_fopen(top_fn,"w");
1539
1540   sprintf(generator,"%s - %s",ShortProgram(), GromacsVersion() );
1541
1542   print_top_header(top_file,top_fn,generator,FALSE,ffdir,mHmult);
1543
1544   nincl=0;
1545   nmol=0;
1546   incls=NULL;
1547   mols=NULL;
1548   nres=0;
1549   for(chain=0; (chain<nch); chain++) {
1550     cc = &(chains[chain]);
1551
1552     /* set pdba, natom and nres to the current chain */
1553     pdba =cc->pdba;
1554     x    =cc->x;
1555     natom=cc->pdba->nr;
1556     nres =cc->pdba->nres;
1557
1558     if (cc->chainid && ( cc->chainid != ' ' ) )
1559       printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1560               chain+1,cc->chainid,natom,nres);
1561     else
1562       printf("Processing chain %d (%d atoms, %d residues)\n",
1563               chain+1,natom,nres);
1564
1565     process_chain(pdba,x,bUnA,bUnA,bUnA,bLysMan,bAspMan,bGluMan,
1566                   bHisMan,bArgMan,bGlnMan,angle,distance,&symtab,
1567                   nrtprename,rtprename);
1568
1569         cc->chainstart[cc->nterpairs] = pdba->nres;
1570         j = 0;
1571         for(i=0; i<cc->nterpairs; i++)
1572         {
1573             find_nc_ter(pdba,cc->chainstart[i],cc->chainstart[i+1],
1574                         &(cc->r_start[j]),&(cc->r_end[j]),rt);
1575
1576             if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
1577             {
1578                 j++;
1579             }
1580         }
1581         cc->nterpairs = j;
1582         if (cc->nterpairs == 0)
1583         {
1584             printf("Problem with chain definition, or missing terminal residues.\n"
1585                    "This chain does not appear to contain a recognized chain molecule.\n"
1586                    "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
1587         }
1588
1589     /* Check for disulfides and other special bonds */
1590     nssbonds = mk_specbonds(pdba,x,bCysMan,&ssbonds,bVerbose);
1591
1592     if (nrtprename > 0) {
1593       rename_resrtp(pdba,cc->nterpairs,cc->r_start,cc->r_end,nrtprename,rtprename,
1594                     &symtab,bVerbose);
1595     }
1596
1597     if (debug) {
1598       if (nch==1) {
1599         sprintf(fn,"chain.pdb");
1600       } else {
1601         sprintf(fn,"chain_%c%d.pdb",cc->chainid,cc->chainnum);
1602       }
1603       write_sto_conf(fn,title,pdba,x,NULL,ePBC,box);
1604     }
1605
1606
1607     for(i=0; i<cc->nterpairs; i++)
1608     {
1609
1610         /* Set termini.
1611          * We first apply a filter so we only have the
1612          * termini that can be applied to the residue in question
1613          * (or a generic terminus if no-residue specific is available).
1614          */
1615         /* First the N terminus */
1616         if (nNtdb > 0)
1617         {
1618             tdblist = filter_ter(nrtp,restp,nNtdb,ntdb,
1619                                  *pdba->resinfo[cc->r_start[i]].name,
1620                                  *pdba->resinfo[cc->r_start[i]].rtp,
1621                                  &ntdblist);
1622             if(ntdblist==0)
1623             {
1624                 printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
1625                        "is already in a terminus-specific form and skipping terminus selection.\n");
1626                 cc->ntdb[i]=NULL;
1627             }
1628             else
1629             {
1630                 if(bTerMan && ntdblist>1)
1631                 {
1632                     sprintf(select,"Select start terminus type for %s-%d",
1633                             *pdba->resinfo[cc->r_start[i]].name,
1634                             pdba->resinfo[cc->r_start[i]].nr);
1635                     cc->ntdb[i] = choose_ter(ntdblist,tdblist,select);
1636                 }
1637                 else
1638                 {
1639                     cc->ntdb[i] = tdblist[0];
1640                 }
1641
1642                 printf("Start terminus %s-%d: %s\n",
1643                        *pdba->resinfo[cc->r_start[i]].name,
1644                        pdba->resinfo[cc->r_start[i]].nr,
1645                        (cc->ntdb[i])->name);
1646                 sfree(tdblist);
1647             }
1648         }
1649         else
1650         {
1651             cc->ntdb[i] = NULL;
1652         }
1653
1654         /* And the C terminus */
1655         if (nCtdb > 0)
1656         {
1657             tdblist = filter_ter(nrtp,restp,nCtdb,ctdb,
1658                                  *pdba->resinfo[cc->r_end[i]].name,
1659                                  *pdba->resinfo[cc->r_end[i]].rtp,
1660                                  &ntdblist);
1661             if(ntdblist==0)
1662             {
1663                 printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
1664                        "is already in a terminus-specific form and skipping terminus selection.\n");
1665                 cc->ctdb[i] = NULL;
1666             }
1667             else
1668             {
1669                 if(bTerMan && ntdblist>1)
1670                 {
1671                     sprintf(select,"Select end terminus type for %s-%d",
1672                             *pdba->resinfo[cc->r_end[i]].name,
1673                             pdba->resinfo[cc->r_end[i]].nr);
1674                     cc->ctdb[i] = choose_ter(ntdblist,tdblist,select);
1675                 }
1676                 else
1677                 {
1678                     cc->ctdb[i] = tdblist[0];
1679                 }
1680                 printf("End terminus %s-%d: %s\n",
1681                        *pdba->resinfo[cc->r_end[i]].name,
1682                        pdba->resinfo[cc->r_end[i]].nr,
1683                        (cc->ctdb[i])->name);
1684                 sfree(tdblist);
1685             }
1686         }
1687         else
1688         {
1689             cc->ctdb[i] = NULL;
1690         }
1691     }
1692     /* lookup hackblocks and rtp for all residues */
1693     get_hackblocks_rtp(&hb_chain, &restp_chain,
1694                        nrtp, restp, pdba->nres, pdba->resinfo,
1695                        cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end);
1696     /* ideally, now we would not need the rtp itself anymore, but do
1697      everything using the hb and restp arrays. Unfortunately, that
1698      requires some re-thinking of code in gen_vsite.c, which I won't
1699      do now :( AF 26-7-99 */
1700
1701     rename_atoms(NULL,ffdir,
1702                  pdba,&symtab,restp_chain,FALSE,rt,FALSE,bVerbose);
1703
1704     match_atomnames_with_rtp(restp_chain,hb_chain,pdba,x,bVerbose);
1705
1706     if (bSort) {
1707       block = new_blocka();
1708       snew(gnames,1);
1709       sort_pdbatoms(pdba->nres,restp_chain,hb_chain,
1710                     natom,&pdba,&x,block,&gnames);
1711       natom = remove_duplicate_atoms(pdba,x,bVerbose);
1712       if (ftp2bSet(efNDX,NFILE,fnm)) {
1713         if (bRemoveH) {
1714           fprintf(stderr,"WARNING: with the -remh option the generated "
1715                   "index file (%s) might be useless\n"
1716                   "(the index file is generated before hydrogens are added)",
1717                   ftp2fn(efNDX,NFILE,fnm));
1718         }
1719         write_index(ftp2fn(efNDX,NFILE,fnm),block,gnames);
1720       }
1721       for(i=0; i < block->nr; i++)
1722         sfree(gnames[i]);
1723       sfree(gnames);
1724       done_blocka(block);
1725     } else {
1726       fprintf(stderr,"WARNING: "
1727               "without sorting no check for duplicate atoms can be done\n");
1728     }
1729
1730     /* Generate Hydrogen atoms (and termini) in the sequence */
1731     natom=add_h(&pdba,&x,nah,ah,
1732                 cc->nterpairs,cc->ntdb,cc->ctdb,cc->r_start,cc->r_end,bAllowMissing,
1733                 NULL,NULL,TRUE,FALSE);
1734     printf("Now there are %d residues with %d atoms\n",
1735            pdba->nres,pdba->nr);
1736     if (debug) write_pdbfile(debug,title,pdba,x,ePBC,box,' ',0,NULL,TRUE);
1737
1738     if (debug)
1739       for(i=0; (i<natom); i++)
1740         fprintf(debug,"Res %s%d atom %d %s\n",
1741                 *(pdba->resinfo[pdba->atom[i].resind].name),
1742                 pdba->resinfo[pdba->atom[i].resind].nr,i+1,*pdba->atomname[i]);
1743
1744     strcpy(posre_fn,ftp2fn(efITP,NFILE,fnm));
1745
1746     /* make up molecule name(s) */
1747
1748       k = (cc->nterpairs>0 && cc->r_start[0]>=0) ? cc->r_start[0] : 0;
1749
1750     gmx_residuetype_get_type(rt,*pdba->resinfo[k].name,&p_restype);
1751
1752     suffix[0]='\0';
1753
1754     if (cc->bAllWat)
1755     {
1756         sprintf(molname,"Water");
1757     }
1758     else
1759     {
1760         this_chainid = cc->chainid;
1761
1762         /* Add the chain id if we have one */
1763         if(this_chainid != ' ')
1764         {
1765             sprintf(buf,"_chain_%c",this_chainid);
1766             strcat(suffix,buf);
1767         }
1768
1769         /* Check if there have been previous chains with the same id */
1770         nid_used = 0;
1771         for(k=0;k<chain;k++)
1772         {
1773             if(cc->chainid == chains[k].chainid)
1774             {
1775                 nid_used++;
1776             }
1777         }
1778         /* Add the number for this chain identifier if there are multiple copies */
1779         if(nid_used>0)
1780         {
1781
1782             sprintf(buf,"%d",nid_used+1);
1783             strcat(suffix,buf);
1784         }
1785
1786         if(strlen(suffix)>0)
1787         {
1788             sprintf(molname,"%s%s",p_restype,suffix);
1789         }
1790         else
1791         {
1792             strcpy(molname,p_restype);
1793         }
1794     }
1795
1796     if ((nch-nwaterchain>1) && !cc->bAllWat) {
1797       bITP=TRUE;
1798       strcpy(itp_fn,top_fn);
1799       printf("Chain time...\n");
1800       c=strrchr(itp_fn,'.');
1801       sprintf(c,"_%s.itp",molname);
1802       c=strrchr(posre_fn,'.');
1803       sprintf(c,"_%s.itp",molname);
1804       if (strcmp(itp_fn,posre_fn) == 0) {
1805         strcpy(buf_fn,posre_fn);
1806         c  = strrchr(buf_fn,'.');
1807         *c = '\0';
1808         sprintf(posre_fn,"%s_pr.itp",buf_fn);
1809       }
1810
1811       nincl++;
1812       srenew(incls,nincl);
1813       incls[nincl-1]=strdup(itp_fn);
1814       itp_file=gmx_fio_fopen(itp_fn,"w");
1815     } else
1816       bITP=FALSE;
1817
1818     srenew(mols,nmol+1);
1819     if (cc->bAllWat) {
1820       mols[nmol].name = strdup("SOL");
1821       mols[nmol].nr   = pdba->nres;
1822     } else {
1823       mols[nmol].name = strdup(molname);
1824       mols[nmol].nr   = 1;
1825     }
1826     nmol++;
1827
1828     if (bITP)
1829       print_top_comment(itp_file,itp_fn,generator,ffdir,TRUE);
1830
1831     if (cc->bAllWat)
1832       top_file2=NULL;
1833     else
1834       if (bITP)
1835         top_file2=itp_file;
1836       else
1837         top_file2=top_file;
1838
1839     pdb2top(top_file2,posre_fn,molname,pdba,&x,atype,&symtab,
1840             nrtp,restp,
1841             restp_chain,hb_chain,
1842             cc->nterpairs,cc->ntdb,cc->ctdb,bAllowMissing,
1843             bVsites,bVsiteAromatics,forcefield,ffdir,
1844             mHmult,nssbonds,ssbonds,
1845             long_bond_dist,short_bond_dist,bDeuterate,bChargeGroups,bCmap,
1846             bRenumRes,bRTPresname);
1847
1848     if (!cc->bAllWat)
1849       write_posres(posre_fn,pdba,posre_fc);
1850
1851     if (bITP)
1852       gmx_fio_fclose(itp_file);
1853
1854     /* pdba and natom have been reassigned somewhere so: */
1855     cc->pdba = pdba;
1856     cc->x = x;
1857
1858     if (debug) {
1859       if (cc->chainid == ' ')
1860         sprintf(fn,"chain.pdb");
1861       else
1862         sprintf(fn,"chain_%c.pdb",cc->chainid);
1863       cool_quote(quote,255,NULL);
1864       write_sto_conf(fn,quote,pdba,x,NULL,ePBC,box);
1865     }
1866   }
1867
1868   if (watermodel == NULL) {
1869     for(chain=0; chain<nch; chain++) {
1870       if (chains[chain].bAllWat) {
1871         gmx_fatal(FARGS,"You have chosen not to include a water model, but there is water in the input file. Select a water model or remove the water from your input file.");
1872       }
1873     }
1874   } else {
1875     sprintf(buf_fn,"%s%c%s.itp",ffdir,DIR_SEPARATOR,watermodel);
1876     if (!fflib_fexist(buf_fn)) {
1877       gmx_fatal(FARGS,"The topology file '%s' for the selected water model '%s' can not be found in the force field directory. Select a different water model.",
1878                 buf_fn,watermodel);
1879     }
1880   }
1881
1882   print_top_mols(top_file,title,ffdir,watermodel,nincl,incls,nmol,mols);
1883   gmx_fio_fclose(top_file);
1884
1885   gmx_residuetype_destroy(rt);
1886
1887   /* now merge all chains back together */
1888   natom=0;
1889   nres=0;
1890   for (i=0; (i<nch); i++) {
1891     natom+=chains[i].pdba->nr;
1892     nres+=chains[i].pdba->nres;
1893   }
1894   snew(atoms,1);
1895   init_t_atoms(atoms,natom,FALSE);
1896   for(i=0; i < atoms->nres; i++)
1897     sfree(atoms->resinfo[i].name);
1898   sfree(atoms->resinfo);
1899   atoms->nres=nres;
1900   snew(atoms->resinfo,nres);
1901   snew(x,natom);
1902   k=0;
1903   l=0;
1904   for (i=0; (i<nch); i++) {
1905     if (nch>1)
1906       printf("Including chain %d in system: %d atoms %d residues\n",
1907              i+1,chains[i].pdba->nr,chains[i].pdba->nres);
1908     for (j=0; (j<chains[i].pdba->nr); j++) {
1909       atoms->atom[k]=chains[i].pdba->atom[j];
1910       atoms->atom[k].resind += l; /* l is processed nr of residues */
1911       atoms->atomname[k]=chains[i].pdba->atomname[j];
1912       atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
1913       copy_rvec(chains[i].x[j],x[k]);
1914       k++;
1915     }
1916     for (j=0; (j<chains[i].pdba->nres); j++) {
1917       atoms->resinfo[l] = chains[i].pdba->resinfo[j];
1918       if (bRTPresname) {
1919         atoms->resinfo[l].name = atoms->resinfo[l].rtp;
1920       }
1921       l++;
1922     }
1923   }
1924
1925   if (nch>1) {
1926     fprintf(stderr,"Now there are %d atoms and %d residues\n",k,l);
1927     print_sums(atoms, TRUE);
1928   }
1929
1930   fprintf(stderr,"\nWriting coordinate file...\n");
1931   clear_rvec(box_space);
1932   if (box[0][0] == 0)
1933     gen_box(0,atoms->nr,x,box,box_space,FALSE);
1934   write_sto_conf(ftp2fn(efSTO,NFILE,fnm),title,atoms,x,NULL,ePBC,box);
1935
1936   printf("\t\t--------- PLEASE NOTE ------------\n");
1937   printf("You have successfully generated a topology from: %s.\n",
1938          opt2fn("-f",NFILE,fnm));
1939   if (watermodel != NULL) {
1940     printf("The %s force field and the %s water model are used.\n",
1941            ffname,watermodel);
1942   } else {
1943     printf("The %s force field is used.\n",
1944            ffname);
1945   }
1946   printf("\t\t--------- ETON ESAELP ------------\n");
1947
1948
1949   thanx(stdout);
1950
1951   return 0;
1952 }