fixed pdb2gmx behavior for nearly matching residue names and made the matching much...
[gromacs.git] / src / kernel / ter_db.c
blob1e9474b1b632de4c11524440db341c172ddd0de6
1 /*
2 *
3 * This source code is part of
4 *
5 * G R O M A C S
6 *
7 * GROningen MAchine for Chemical Simulations
8 *
9 * VERSION 3.2.0
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2004, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
32 * And Hey:
33 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
35 #ifdef HAVE_CONFIG_H
36 #include <config.h>
37 #endif
39 #include "sysstuff.h"
40 #include "smalloc.h"
41 #include "typedefs.h"
42 #include "symtab.h"
43 #include "futil.h"
44 #include "resall.h"
45 #include "h_db.h"
46 #include "string2.h"
47 #include "strdb.h"
48 #include "gmx_fatal.h"
49 #include "ter_db.h"
50 #include "toputil.h"
51 #include "gmxfio.h"
52 #include "fflibutil.h"
55 /* use bonded types definitions in hackblock.h */
56 #define ekwRepl ebtsNR+1
57 #define ekwAdd ebtsNR+2
58 #define ekwDel ebtsNR+3
59 #define ekwNR 3
60 const char *kw_names[ekwNR] = {
61 "replace", "add", "delete"
64 int find_kw(char *keyw)
66 int i;
68 for(i=0; i<ebtsNR; i++)
69 if (gmx_strcasecmp(btsNames[i],keyw) == 0)
70 return i;
71 for(i=0; i<ekwNR; i++)
72 if (gmx_strcasecmp(kw_names[i],keyw) == 0)
73 return ebtsNR + 1 + i;
75 return NOTSET;
78 #define FATAL() gmx_fatal(FARGS,"Reading Termini Database: not enough items on line\n%s",line)
80 static void read_atom(char *line, bool bAdd,
81 char **nname, t_atom *a, gpp_atomtype_t atype, int *cgnr)
83 int nr, i;
84 char buf[5][30],type[12];
85 double m, q;
87 /* This code is messy, because of support for different formats:
88 * for replace: [new name] <atom type> <m> <q> [cgnr (old format)]
89 * for add: <atom type> <m> <q> [cgnr]
91 nr = sscanf(line,"%s %s %s %s %s",buf[0],buf[1],buf[2],buf[3],buf[4]);
93 /* Here there an ambiguity due to the old replace format with cgnr,
94 * which was read for years, but ignored in the rest of the code.
95 * We have to assume that the atom type does not start with a digit
96 * to make a line with 4 entries uniquely interpretable.
98 if (!bAdd && nr == 4 && isdigit(buf[1][0])) {
99 nr = 3;
102 if (nr < 3 || nr > 4) {
103 gmx_fatal(FARGS,"Reading Termini Database: expected %d or %d items of atom data in stead of %d on line\n%s", 3, 4, nr, line);
105 i = 0;
106 if (!bAdd) {
107 if (nr == 4) {
108 *nname = strdup(buf[i++]);
109 } else {
110 *nname = NULL;
113 a->type = get_atomtype_type(buf[i++],atype);
114 sscanf(buf[i++],"%lf",&m);
115 a->m = m;
116 sscanf(buf[i++],"%lf",&q);
117 a->q = q;
118 if (bAdd && nr == 4) {
119 sscanf(buf[i++],"%d", cgnr);
120 } else {
121 *cgnr = NOTSET;
125 static void print_atom(FILE *out,t_atom *a,gpp_atomtype_t atype,char *newnm)
127 fprintf(out,"\t%s\t%g\t%g\n",
128 get_atomtype_name(a->type,atype),a->m,a->q);
131 static void print_ter_db(const char *ff,char C,int nb,t_hackblock tb[],
132 gpp_atomtype_t atype)
134 FILE *out;
135 int i,j,k,bt,nrepl,nadd,ndel;
136 char buf[STRLEN],nname[STRLEN];
138 sprintf(buf,"%s-%c.tdb",ff,C);
139 out = gmx_fio_fopen(buf,"w");
141 for(i=0; (i<nb); i++) {
142 fprintf(out,"[ %s ]\n",tb[i].name);
144 /* first count: */
145 nrepl=0;
146 nadd=0;
147 ndel=0;
148 for(j=0; j<tb[i].nhack; j++)
149 if ( tb[i].hack[j].oname!=NULL && tb[i].hack[j].nname!=NULL )
150 nrepl++;
151 else if ( tb[i].hack[j].oname==NULL && tb[i].hack[j].nname!=NULL )
152 nadd++;
153 else if ( tb[i].hack[j].oname!=NULL && tb[i].hack[j].nname==NULL )
154 ndel++;
155 else if ( tb[i].hack[j].oname==NULL && tb[i].hack[j].nname==NULL )
156 gmx_fatal(FARGS,"invalid hack (%s) in termini database",tb[i].name);
157 if (nrepl) {
158 fprintf(out,"[ %s ]\n",kw_names[ekwRepl-ebtsNR-1]);
159 for(j=0; j<tb[i].nhack; j++)
160 if ( tb[i].hack[j].oname!=NULL && tb[i].hack[j].nname!=NULL ) {
161 fprintf(out,"%s\t",tb[i].hack[j].oname);
162 print_atom(out,tb[i].hack[j].atom,atype,tb[i].hack[j].nname);
165 if (nadd) {
166 fprintf(out,"[ %s ]\n",kw_names[ekwAdd-ebtsNR-1]);
167 for(j=0; j<tb[i].nhack; j++)
168 if ( tb[i].hack[j].oname==NULL && tb[i].hack[j].nname!=NULL ) {
169 print_ab(out,&(tb[i].hack[j]),tb[i].hack[j].nname);
170 print_atom(out,tb[i].hack[j].atom,atype,tb[i].hack[j].nname);
173 if (ndel) {
174 fprintf(out,"[ %s ]\n",kw_names[ekwDel-ebtsNR-1]);
175 for(j=0; j<tb[i].nhack; j++)
176 if ( tb[i].hack[j].oname!=NULL && tb[i].hack[j].nname==NULL )
177 fprintf(out,"%s\n",tb[i].hack[j].oname);
179 for(bt=0; bt<ebtsNR; bt++)
180 if (tb[i].rb[bt].nb) {
181 fprintf(out,"[ %s ]\n", btsNames[bt]);
182 for(j=0; j<tb[i].rb[bt].nb; j++) {
183 for(k=0; k<btsNiatoms[bt]; k++)
184 fprintf(out,"%s%s",k?"\t":"",tb[i].rb[bt].b[j].a[k]);
185 if ( tb[i].rb[bt].b[j].s )
186 fprintf(out,"\t%s",tb[i].rb[bt].b[j].s);
187 fprintf(out,"\n");
190 fprintf(out,"\n");
192 gmx_fio_fclose(out);
195 static void read_ter_db_file(char *fn,
196 int *ntbptr,t_hackblock **tbptr,
197 gpp_atomtype_t atype)
199 char filebase[STRLEN],*ptr;
200 FILE *in;
201 char header[STRLEN],buf[STRLEN],line[STRLEN];
202 t_hackblock *tb;
203 int i,j,n,ni,kwnr,nb,maxnb,nh;
205 fflib_filename_base(fn,filebase,STRLEN);
206 /* Remove the C/N termini extension */
207 ptr = strrchr(filebase,'.');
208 if (ptr != NULL) {
209 ptr[0] = '\0';
212 in = fflib_open(fn);
213 if (debug)
214 fprintf(debug,"Opened %s\n",fn);
216 tb = *tbptr;
217 nb = *ntbptr - 1;
218 maxnb=0;
219 kwnr=NOTSET;
220 get_a_line(in,line,STRLEN);
221 while (!feof(in)) {
222 if (get_header(line,header)) {
223 /* this is a new block, or a new keyword */
224 kwnr=find_kw(header);
226 if (kwnr == NOTSET) {
227 nb++;
228 /* here starts a new block */
229 if ( nb >= maxnb ) {
230 maxnb = nb + 100;
231 srenew(tb,maxnb);
233 clear_t_hackblock(&tb[nb]);
234 tb[nb].name = strdup(header);
235 tb[nb].filebase = strdup(filebase);
237 } else {
238 if (nb < 0)
239 gmx_fatal(FARGS,"reading termini database: "
240 "directive expected before line:\n%s\n"
241 "This might be a file in an old format.",line);
242 /* this is not a header, so it must be data */
243 if (kwnr >= ebtsNR) {
244 /* this is a hack: add/rename/delete atoms */
245 /* make space for hacks */
246 if (tb[nb].nhack >= tb[nb].maxhack) {
247 tb[nb].maxhack+=10;
248 srenew(tb[nb].hack, tb[nb].maxhack);
250 nh=tb[nb].nhack;
251 clear_t_hack(&(tb[nb].hack[nh]));
252 for(i=0; i<4; i++)
253 tb[nb].hack[nh].a[i]=NULL;
254 tb[nb].nhack++;
256 /* get data */
257 n=0;
258 if ( kwnr==ekwRepl || kwnr==ekwDel ) {
259 if (sscanf(line, "%s%n", buf, &n) != 1)
260 gmx_fatal(FARGS,"Reading Termini Database '%s': "
261 "expected atom name on line\n%s",fn,line);
262 tb[nb].hack[nh].oname = strdup(buf);
263 /* we only replace or delete one atom at a time */
264 tb[nb].hack[nh].nr = 1;
265 } else if ( kwnr==ekwAdd ) {
266 read_ab(line, fn, &(tb[nb].hack[nh]));
267 get_a_line(in, line, STRLEN);
268 } else
269 gmx_fatal(FARGS,"unimplemented keyword number %d (%s:%d)",
270 kwnr,__FILE__,__LINE__);
271 if ( kwnr==ekwRepl || kwnr==ekwAdd ) {
272 snew(tb[nb].hack[nh].atom, 1);
273 read_atom(line+n,kwnr==ekwAdd,
274 &tb[nb].hack[nh].nname, tb[nb].hack[nh].atom, atype,
275 &tb[nb].hack[nh].cgnr);
276 if (tb[nb].hack[nh].nname == NULL) {
277 if (tb[nb].hack[nh].oname != NULL) {
278 tb[nb].hack[nh].nname = strdup(tb[nb].hack[nh].oname);
279 } else {
280 gmx_fatal(FARGS,"Reading Termini Database '%s': don't know which name the new atom should have on line\n%s",fn,line);
284 } else if (kwnr >= 0 && kwnr < ebtsNR) {
285 /* this is bonded data: bonds, angles, dihedrals or impropers */
286 srenew(tb[nb].rb[kwnr].b,tb[nb].rb[kwnr].nb+1);
287 n=0;
288 for(j=0; j<btsNiatoms[kwnr]; j++) {
289 if ( sscanf(line+n, "%s%n", buf, &ni) == 1 )
290 tb[nb].rb[kwnr].b[tb[nb].rb[kwnr].nb].a[j] = strdup(buf);
291 else
292 gmx_fatal(FARGS,"Reading Termini Database '%s': expected %d atom names (found %d) on line\n%s", fn, btsNiatoms[kwnr], j-1, line);
293 n+=ni;
295 for( ; j<MAXATOMLIST; j++)
296 tb[nb].rb[kwnr].b[tb[nb].rb[kwnr].nb].a[j] = NULL;
297 strcpy(buf, "");
298 sscanf(line+n, "%s", buf);
299 tb[nb].rb[kwnr].b[tb[nb].rb[kwnr].nb].s = strdup(buf);
300 tb[nb].rb[kwnr].nb++;
301 } else
302 gmx_fatal(FARGS,"Reading Termini Database: Expecting a header at line\n"
303 "%s",line);
305 get_a_line(in,line,STRLEN);
307 nb++;
308 srenew(tb,nb);
310 ffclose(in);
312 *ntbptr = nb;
313 *tbptr = tb;
316 int read_ter_db(const char *ffdir,char ter,
317 t_hackblock **tbptr,gpp_atomtype_t atype)
319 char ext[STRLEN];
320 int ntdbf,f;
321 char **tdbf;
322 int ntb;
324 sprintf(ext,".%c.tdb",ter);
326 /* Search for termini database files.
327 * Do not generate an error when none are found.
329 ntdbf = fflib_search_file_end(ffdir,ext,FALSE,&tdbf);
330 ntb = 0;
331 *tbptr = NULL;
332 for(f=0; f<ntdbf; f++) {
333 read_ter_db_file(tdbf[f],&ntb,tbptr,atype);
334 sfree(tdbf[f]);
336 sfree(tdbf);
338 if (debug) {
339 print_ter_db("new",ter,ntb,*tbptr,atype);
342 return ntb;
345 t_hackblock **filter_ter(int nrtp,t_restp rtp[],
346 int nb,t_hackblock tb[],
347 const char *resname,
348 const char *rtpname,
349 int *nret)
351 /* Since some force fields (e.g. OPLS) needs different
352 * atomtypes for different residues there could be a lot
353 * of entries in the databases for specific residues
354 * (e.g. GLY-NH3+, SER-NH3+, ALA-NH3+).
356 * To reduce the database size, we assume that a terminus specifier liker
358 * [ GLY|SER|ALA-NH3+ ]
360 * would cover all of the three residue types above.
361 * Wildcards (*,?) are not OK. Don't worry about e.g. GLU vs. GLUH since
362 * pdb2gmx only uses the first 3 letters when calling this routine.
364 * To automate this, this routines scans a list of termini
365 * for the residue name "resname" and returns an allocated list of
366 * pointers to the termini that could be applied to the
367 * residue in question. The variable pointed to by nret will
368 * contain the number of valid pointers in the list.
369 * Remember to free the list when you are done with it...
372 t_restp * restp;
373 int i,j,n,len,none_idx;
374 bool found;
375 char *rtpname_match,*s,*s2,*c;
376 t_hackblock **list;
378 rtpname_match = search_rtp(rtpname,nrtp,rtp);
379 restp = get_restp(rtpname_match,nrtp,rtp);
381 n=0;
382 list=NULL;
384 for(i=0;i<nb;i++)
386 s=tb[i].name;
387 found=FALSE;
390 /* The residue name should appear in a tdb file with the same base name
391 * as the file containing the rtp entry.
392 * This makes termini selection for different molecule types
393 * much cleaner.
395 if (gmx_strcasecmp(restp->filebase,tb[i].filebase) == 0 &&
396 gmx_strncasecmp(resname,s,3) == 0)
398 found=TRUE;
399 srenew(list,n+1);
400 list[n]=&(tb[i]);
401 n++;
403 else
405 /* advance to next |-separated field */
406 s=strchr(s,'|');
407 if(s!=NULL)
409 s++;
413 while(!found && s!=NULL);
416 /* All residue-specific termini have been added. See if there
417 * are some generic ones by searching for the occurence of
418 * '-' in the name prior to the last position (which indicates charge).
419 * The [ None ] alternative is special since we don't want that
420 * to be the default, so we put it last in the list we return.
422 none_idx=-1;
423 for(i=0;i<nb;i++)
425 s=tb[i].name;
426 /* The residue name should appear in a tdb file with the same base name
427 * as the file containing the rtp entry.
428 * This makes termini selection for different molecule types
429 * much cleaner.
431 if(gmx_strcasecmp(restp->filebase,tb[i].filebase) == 0)
433 if(!gmx_strcasecmp("None",s))
435 none_idx=i;
437 else
439 c=strchr(s,'-');
440 if(c==NULL || ((c-s+1)==strlen(s)))
442 /* Check that we haven't already added a residue-specific version
443 * of this terminus.
445 for(j=0;j<n && strstr((*list[j]).name,s)==NULL;j++);
446 if(j==n)
448 srenew(list,n+1);
449 list[n]=&(tb[i]);
450 n++;
456 if(none_idx>=0)
458 srenew(list,n+1);
459 list[n]=&(tb[none_idx]);
460 n++;
463 *nret=n;
464 return list;
468 t_hackblock *choose_ter(int nb,t_hackblock **tb,const char *title)
470 int i,sel,ret;
472 printf("%s\n",title);
473 for(i=0; (i<nb); i++)
474 printf("%2d: %s\n",i,(*tb[i]).name);
475 do {
476 ret=fscanf(stdin,"%d",&sel);
477 } while ((ret != 1) || (sel < 0) || (sel >= nb));
479 return tb[sel];