Changed the pV term to the correct one, with the reference pressure. Also made it
[gromacs/rigid-bodies.git] / src / tools / do_dssp.c
blob01757bf78843db6930164a2d65bd5fa958145133
1 /*
2 *
3 * This source code is part of
4 *
5 * G R O M A C S
6 *
7 * GROningen MAchine for Chemical Simulations
8 *
9 * VERSION 3.2.0
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2004, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
32 * And Hey:
33 * Green Red Orange Magenta Azure Cyan Skyblue
35 #ifdef HAVE_CONFIG_H
36 #include <config.h>
37 #endif
39 #include "sysstuff.h"
40 #include "typedefs.h"
41 #include "string2.h"
42 #include "strdb.h"
43 #include "macros.h"
44 #include "smalloc.h"
45 #include "mshift.h"
46 #include "statutil.h"
47 #include "copyrite.h"
48 #include "pdbio.h"
49 #include "gmx_fatal.h"
50 #include "xvgr.h"
51 #include "matio.h"
52 #include "index.h"
53 #include "gstat.h"
54 #include "tpxio.h"
55 #include "viewit.h"
57 static int strip_dssp(char *dsspfile,int nres,
58 bool bPhobres[],real t,
59 real *acc,FILE *fTArea,
60 t_matrix *mat,int average_area[],
61 const output_env_t oenv)
63 static bool bFirst=TRUE;
64 static char *ssbuf;
65 FILE *tapeout;
66 static int xsize,frame;
67 char buf[STRLEN+1];
68 char SSTP;
69 int i,nr,iacc,nresidues;
70 int naccf,naccb; /* Count hydrophobic and hydrophilic residues */
71 real iaccf,iaccb;
72 t_xpmelmt c;
74 tapeout=ffopen(dsspfile,"r");
76 /* Skip header */
77 do {
78 fgets2(buf,STRLEN,tapeout);
79 } while (strstr(buf,"KAPPA") == NULL);
80 if (bFirst) {
81 /* Since we also have empty lines in the dssp output (temp) file,
82 * and some line content is saved to the ssbuf variable,
83 * we need more memory than just nres elements. To be shure,
84 * we allocate 2*nres-1, since for each chain there is a
85 * separating line in the temp file. (At most each residue
86 * could have been defined as a separate chain.) */
87 snew(ssbuf,2*nres-1);
90 iaccb=iaccf=0;
91 nresidues = 0;
92 naccf = 0;
93 naccb = 0;
94 for(nr=0; (fgets2(buf,STRLEN,tapeout) != NULL); nr++) {
95 if (buf[13] == '!') /* Chain separator line has '!' at pos. 13 */
96 SSTP='='; /* Chain separator sign '=' */
97 else
98 SSTP=buf[16]==' ' ? '~' : buf[16];
99 ssbuf[nr]=SSTP;
101 buf[39]='\0';
103 /* Only calculate solvent accessible area if needed */
104 if ((NULL != acc) && (buf[13] != '!'))
106 sscanf(&(buf[34]),"%d",&iacc);
107 acc[nr]=iacc;
108 /* average_area and bPhobres are counted from 0...nres-1 */
109 average_area[nresidues]+=iacc;
110 if (bPhobres[nresidues])
112 naccb++;
113 iaccb+=iacc;
115 else
117 naccf++;
118 iaccf+=iacc;
120 /* Keep track of the residue number (do not count chain separator lines '!') */
121 nresidues++;
124 ssbuf[nr]='\0';
126 if (bFirst) {
127 if (0 != acc)
128 fprintf(stderr, "%d residues were classified as hydrophobic and %d as hydrophilic.\n", naccb, naccf);
130 sprintf(mat->title,"Secondary structure");
131 mat->legend[0]=0;
132 sprintf(mat->label_x,"%s",get_time_label(oenv));
133 sprintf(mat->label_y,"Residue");
134 mat->bDiscrete=TRUE;
135 mat->ny=nr;
136 snew(mat->axis_y,nr);
137 for(i=0; i<nr; i++)
138 mat->axis_y[i]=i+1;
139 mat->axis_x=NULL;
140 mat->matrix=NULL;
141 xsize=0;
142 frame=0;
143 bFirst=FALSE;
145 if (frame>=xsize) {
146 xsize+=10;
147 srenew(mat->axis_x,xsize);
148 srenew(mat->matrix,xsize);
150 mat->axis_x[frame]=t;
151 snew(mat->matrix[frame],nr);
152 c.c2=0;
153 for(i=0; i<nr; i++) {
154 c.c1=ssbuf[i];
155 mat->matrix[frame][i] = max(0,searchcmap(mat->nmap,mat->map,c));
157 frame++;
158 mat->nx=frame;
160 if (fTArea)
161 fprintf(fTArea,"%10g %10g %10g\n",t,0.01*iaccb,0.01*iaccf);
162 fclose(tapeout);
164 /* Return the number of lines found in the dssp file (i.e. number
165 * of redidues plus chain separator lines).
166 * This is the number of y elements needed for the area xpm file */
167 return nr;
170 bool *bPhobics(t_atoms *atoms)
172 int i,nb;
173 char **cb;
174 bool *bb;
176 nb=get_strings("phbres.dat",&cb);
177 snew(bb,atoms->nres);
179 for(i=0; (i<atoms->nres); i++) {
180 if (search_str(nb,cb,*atoms->resinfo[i].name) != -1)
181 bb[i]=TRUE;
183 return bb;
186 static void check_oo(t_atoms *atoms)
188 char *OOO;
190 int i;
192 OOO=strdup("O");
194 for(i=0; (i<atoms->nr); i++) {
195 if (strcmp(*(atoms->atomname[i]),"OXT")==0)
196 *atoms->atomname[i]=OOO;
197 else if (strcmp(*(atoms->atomname[i]),"O1")==0)
198 *atoms->atomname[i]=OOO;
202 static void norm_acc(t_atoms *atoms, int nres,
203 real av_area[], real norm_av_area[])
205 int i,n,n_surf;
207 char surffn[]="surface.dat";
208 char **surf_res, **surf_lines;
209 double *surf;
211 n_surf = get_lines(surffn, &surf_lines);
212 snew(surf, n_surf);
213 snew(surf_res, n_surf);
214 for(i=0; (i<n_surf); i++) {
215 snew(surf_res[i], 5);
216 sscanf(surf_lines[i],"%s %lf",surf_res[i],&surf[i]);
219 for(i=0; (i<nres); i++) {
220 n = search_str(n_surf,surf_res,*atoms->resinfo[i].name);
221 if ( n != -1)
222 norm_av_area[i] = av_area[i] / surf[n];
223 else
224 fprintf(stderr,"Residue %s not found in surface database (%s)\n",
225 *atoms->resinfo[i].name,surffn);
229 void prune_ss_legend(t_matrix *mat)
231 bool *present;
232 int *newnum;
233 int i,r,f,newnmap;
234 t_mapping *newmap;
236 snew(present,mat->nmap);
237 snew(newnum,mat->nmap);
239 for(f=0; f<mat->nx; f++)
240 for(r=0; r<mat->ny; r++)
241 present[mat->matrix[f][r]]=TRUE;
243 newnmap=0;
244 newmap=NULL;
245 for (i=0; i<mat->nmap; i++) {
246 newnum[i]=-1;
247 if (present[i]) {
248 newnum[i]=newnmap;
249 newnmap++;
250 srenew(newmap,newnmap);
251 newmap[newnmap-1]=mat->map[i];
254 if (newnmap!=mat->nmap) {
255 mat->nmap=newnmap;
256 mat->map=newmap;
257 for(f=0; f<mat->nx; f++)
258 for(r=0; r<mat->ny; r++)
259 mat->matrix[f][r]=newnum[mat->matrix[f][r]];
263 void write_sas_mat(const char *fn,real **accr,int nframe,int nres,t_matrix *mat)
265 real lo,hi;
266 int i,j,nlev;
267 t_rgb rlo={1,1,1}, rhi={0,0,0};
268 FILE *fp;
270 if(fn) {
271 hi=lo=accr[0][0];
272 for(i=0; i<nframe; i++)
273 for(j=0; j<nres; j++) {
274 lo=min(lo,accr[i][j]);
275 hi=max(hi,accr[i][j]);
277 fp=ffopen(fn,"w");
278 nlev=hi-lo+1;
279 write_xpm(fp,0,"Solvent Accessible Surface","Surface (A^2)",
280 "Time","Residue Index",nframe,nres,
281 mat->axis_x,mat->axis_y,accr,lo,hi,rlo,rhi,&nlev);
282 ffclose(fp);
286 void analyse_ss(const char *outfile, t_matrix *mat, const char *ss_string,
287 const output_env_t oenv)
289 FILE *fp;
290 t_mapping *map;
291 int s,f,r,*count,ss_count;
292 char **leg;
294 map=mat->map;
295 snew(count,mat->nmap);
296 snew(leg,mat->nmap+1);
297 leg[0]="Structure";
298 for(s=0; s<mat->nmap; s++)
299 leg[s+1]=strdup(map[s].desc);
301 fp=xvgropen(outfile,"Secondary Structure",
302 get_xvgr_tlabel(oenv),"Number of Residues",oenv);
303 if (get_print_xvgr_codes(oenv))
304 fprintf(fp,"@ subtitle \"Structure = ");
305 for(s=0; s<strlen(ss_string); s++) {
306 if (s>0)
307 fprintf(fp," + ");
308 for(f=0; f<mat->nmap; f++)
309 if (ss_string[s]==map[f].code.c1)
310 fprintf(fp,"%s",map[f].desc);
312 fprintf(fp,"\"\n");
313 xvgr_legend(fp,mat->nmap+1,leg,oenv);
315 for(f=0; f<mat->nx; f++) {
316 ss_count=0;
317 for(s=0; s<mat->nmap; s++)
318 count[s]=0;
319 for(r=0; r<mat->ny; r++)
320 count[mat->matrix[f][r]]++;
321 for(s=0; s<mat->nmap; s++) {
322 if (strchr(ss_string,map[s].code.c1))
323 ss_count+=count[s];
325 fprintf(fp,"%8g %5d",mat->axis_x[f],ss_count);
326 for(s=0; s<mat->nmap; s++)
327 fprintf(fp," %5d",count[s]);
328 fprintf(fp,"\n");
331 fclose(fp);
332 sfree(leg);
333 sfree(count);
336 int main(int argc,char *argv[])
338 const char *desc[] = {
339 "do_dssp ",
340 "reads a trajectory file and computes the secondary structure for",
341 "each time frame ",
342 "calling the dssp program. If you do not have the dssp program,",
343 "get it. do_dssp assumes that the dssp executable is",
344 "/usr/local/bin/dssp. If this is not the case, then you should",
345 "set an environment variable [BB]DSSP[bb] pointing to the dssp",
346 "executable, e.g.: [PAR]",
347 "[TT]setenv DSSP /opt/dssp/bin/dssp[tt][PAR]",
348 "The structure assignment for each residue and time is written to an",
349 "[TT].xpm[tt] matrix file. This file can be visualized with for instance",
350 "[TT]xv[tt] and can be converted to postscript with [TT]xpm2ps[tt].",
351 "Individual chains are separated by light grey lines in the xpm and",
352 "postscript files.",
353 "The number of residues with each secondary structure type and the",
354 "total secondary structure ([TT]-sss[tt]) count as a function of",
355 "time are also written to file ([TT]-sc[tt]).[PAR]",
356 "Solvent accessible surface (SAS) per residue can be calculated, both in",
357 "absolute values (A^2) and in fractions of the maximal accessible",
358 "surface of a residue. The maximal accessible surface is defined as",
359 "the accessible surface of a residue in a chain of glycines.",
360 "[BB]Note[bb] that the program [TT]g_sas[tt] can also compute SAS",
361 "and that is more efficient.[PAR]",
362 "Finally, this program can dump the secondary structure in a special file",
363 "[TT]ssdump.dat[tt] for usage in the program [TT]g_chi[tt]. Together",
364 "these two programs can be used to analyze dihedral properties as a",
365 "function of secondary structure type."
367 static bool bVerbose;
368 static const char *ss_string="HEBT";
369 t_pargs pa[] = {
370 { "-v", FALSE, etBOOL, {&bVerbose},
371 "HIDDENGenerate miles of useless information" },
372 { "-sss", FALSE, etSTR, {&ss_string},
373 "Secondary structures for structure count"}
376 int status;
377 FILE *tapein;
378 FILE *ss,*acc,*fTArea,*tmpf;
379 const char *fnSCount,*fnArea,*fnTArea,*fnAArea;
380 char *leg[] = { "Phobic", "Phylic" };
381 t_topology top;
382 int ePBC;
383 t_atoms *atoms;
384 t_matrix mat;
385 int nres,nr0,naccr,nres_plus_separators;
386 bool *bPhbres,bDoAccSurf;
387 real t;
388 int i,natoms,nframe=0;
389 matrix box;
390 int gnx;
391 char *grpnm,*ss_str;
392 atom_id *index;
393 rvec *xp,*x;
394 int *average_area;
395 real **accr,*accr_ptr=NULL,*av_area, *norm_av_area;
396 char pdbfile[32],tmpfile[32],title[256];
397 char dssp[256];
398 const char *dptr;
399 output_env_t oenv;
402 t_filenm fnm[] = {
403 { efTRX, "-f", NULL, ffREAD },
404 { efTPS, NULL, NULL, ffREAD },
405 { efNDX, NULL, NULL, ffOPTRD },
406 { efDAT, "-ssdump", "ssdump", ffOPTWR },
407 { efMAP, "-map", "ss", ffLIBRD },
408 { efXPM, "-o", "ss", ffWRITE },
409 { efXVG, "-sc", "scount", ffWRITE },
410 { efXPM, "-a", "area", ffOPTWR },
411 { efXVG, "-ta", "totarea", ffOPTWR },
412 { efXVG, "-aa", "averarea",ffOPTWR }
414 #define NFILE asize(fnm)
416 CopyRight(stderr,argv[0]);
417 parse_common_args(&argc,argv,
418 PCA_CAN_TIME | PCA_CAN_VIEW | PCA_TIME_UNIT | PCA_BE_NICE ,
419 NFILE,fnm, asize(pa),pa, asize(desc),desc,0,NULL,&oenv);
420 fnSCount= opt2fn("-sc",NFILE,fnm);
421 fnArea = opt2fn_null("-a", NFILE,fnm);
422 fnTArea = opt2fn_null("-ta",NFILE,fnm);
423 fnAArea = opt2fn_null("-aa",NFILE,fnm);
424 bDoAccSurf=(fnArea || fnTArea || fnAArea);
426 read_tps_conf(ftp2fn(efTPS,NFILE,fnm),title,&top,&ePBC,&xp,NULL,box,FALSE);
427 atoms=&(top.atoms);
428 check_oo(atoms);
429 bPhbres=bPhobics(atoms);
431 get_index(atoms,ftp2fn_null(efNDX,NFILE,fnm),1,&gnx,&index,&grpnm);
432 nres=0;
433 nr0=-1;
434 for(i=0; (i<gnx); i++) {
435 if (atoms->atom[index[i]].resind != nr0) {
436 nr0=atoms->atom[index[i]].resind;
437 nres++;
440 fprintf(stderr,"There are %d residues in your selected group\n",nres);
442 strcpy(pdbfile,"ddXXXXXX");
443 gmx_tmpnam(pdbfile);
444 if ((tmpf = fopen(pdbfile,"w")) == NULL) {
445 sprintf(pdbfile,"%ctmp%cfilterXXXXXX",DIR_SEPARATOR,DIR_SEPARATOR);
446 gmx_tmpnam(pdbfile);
447 if ((tmpf = fopen(pdbfile,"w")) == NULL)
448 gmx_fatal(FARGS,"Can not open tmp file %s",pdbfile);
450 else
451 fclose(tmpf);
453 strcpy(tmpfile,"ddXXXXXX");
454 gmx_tmpnam(tmpfile);
455 if ((tmpf = fopen(tmpfile,"w")) == NULL) {
456 sprintf(tmpfile,"%ctmp%cfilterXXXXXX",DIR_SEPARATOR,DIR_SEPARATOR);
457 gmx_tmpnam(tmpfile);
458 if ((tmpf = fopen(tmpfile,"w")) == NULL)
459 gmx_fatal(FARGS,"Can not open tmp file %s",tmpfile);
461 else
462 fclose(tmpf);
464 if ((dptr=getenv("DSSP")) == NULL)
465 dptr="/usr/local/bin/dssp";
466 if (!gmx_fexist(dptr))
467 gmx_fatal(FARGS,"DSSP executable (%s) does not exist (use setenv DSSP)",
468 dptr);
469 sprintf(dssp,"%s %s %s %s > /dev/null %s",
470 dptr,bDoAccSurf?"":"-na",pdbfile,tmpfile,bVerbose?"":"2> /dev/null");
471 if (bVerbose)
472 fprintf(stderr,"dssp cmd='%s'\n",dssp);
474 if (fnTArea) {
475 fTArea=xvgropen(fnTArea,"Solvent Accessible Surface Area",
476 get_xvgr_tlabel(oenv),"Area (nm\\S2\\N)",oenv);
477 xvgr_legend(fTArea,2,leg,oenv);
478 } else
479 fTArea=NULL;
481 mat.map=NULL;
482 mat.nmap=getcmap(libopen(opt2fn("-map",NFILE,fnm)),
483 opt2fn("-map",NFILE,fnm),&(mat.map));
485 natoms=read_first_x(oenv,&status,ftp2fn(efTRX,NFILE,fnm),&t,&x,box);
486 if (natoms > atoms->nr)
487 gmx_fatal(FARGS,"\nTrajectory does not match topology!");
488 if (gnx > natoms)
489 gmx_fatal(FARGS,"\nTrajectory does not match selected group!");
491 snew(average_area, atoms->nres);
492 snew(av_area , atoms->nres);
493 snew(norm_av_area, atoms->nres);
494 accr=NULL;
495 naccr=0;
496 do {
497 t = conv_time(oenv,t);
498 if (bDoAccSurf && nframe>=naccr) {
499 naccr+=10;
500 srenew(accr,naccr);
501 for(i=naccr-10; i<naccr; i++)
502 snew(accr[i],2*atoms->nres-1);
504 rm_pbc(&(top.idef),ePBC,natoms,box,x,x);
505 tapein=ffopen(pdbfile,"w");
506 write_pdbfile_indexed(tapein,NULL,atoms,x,ePBC,box,0,-1,gnx,index,NULL);
507 fclose(tapein);
509 #ifdef GMX_NO_SYSTEM
510 printf("Warning-- No calls to system(3) supported on this platform.");
511 printf("Warning-- Skipping execution of 'system(\"%s\")'.", dssp);
512 exit(1);
513 #else
514 if(0 != system(dssp))
516 gmx_fatal(FARGS,"Failed to execute command: %s",dssp);
518 #endif
520 /* strip_dssp returns the number of lines found in the dssp file, i.e.
521 * the number of residues plus the separator lines */
523 if (bDoAccSurf)
524 accr_ptr = accr[nframe];
526 nres_plus_separators = strip_dssp(tmpfile,nres,bPhbres,t,
527 accr_ptr,fTArea,&mat,average_area,oenv);
528 remove(tmpfile);
529 remove(pdbfile);
530 nframe++;
531 } while(read_next_x(oenv,status,&t,natoms,x,box));
532 fprintf(stderr,"\n");
533 close_trj(status);
534 if (fTArea)
535 ffclose(fTArea);
537 prune_ss_legend(&mat);
539 ss=opt2FILE("-o",NFILE,fnm,"w");
540 mat.flags = 0;
541 write_xpm_m(ss,mat);
542 ffclose(ss);
544 if (opt2bSet("-ssdump",NFILE,fnm)) {
545 snew(ss_str,nres+1);
546 for(i=0; (i<nres); i++)
547 ss_str[i] = mat.map[mat.matrix[0][i]].code.c1;
548 ss_str[i] = '\0';
549 ss = opt2FILE("-ssdump",NFILE,fnm,"w");
550 fprintf(ss,"%d\n%s\n",nres,ss_str);
551 fclose(ss);
552 sfree(ss_str);
554 analyse_ss(fnSCount,&mat,ss_string,oenv);
556 if (bDoAccSurf) {
557 write_sas_mat(fnArea,accr,nframe,nres_plus_separators,&mat);
559 for(i=0; i<atoms->nres; i++)
560 av_area[i] = (average_area[i] / (real)nframe);
562 norm_acc(atoms, nres, av_area, norm_av_area);
564 if (fnAArea) {
565 acc=xvgropen(fnAArea,"Average Accessible Area",
566 "Residue","A\\S2",oenv);
567 for(i=0; (i<nres); i++)
568 fprintf(acc,"%5d %10g %10g\n",i+1,av_area[i], norm_av_area[i]);
569 ffclose(acc);
573 view_all(oenv, NFILE, fnm);
575 thanx(stderr);
577 return 0;