Fixes #882 - looping bug in trxio.c
[gromacs.git] / src / tools / do_dssp.c
blob4f193bd5547b37dffff081e4dcb8f94ece194f56
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 *
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * VERSION 3.2.0
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
33 * And Hey:
34 * Green Red Orange Magenta Azure Cyan Skyblue
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
40 #include "sysstuff.h"
41 #include "typedefs.h"
42 #include "string2.h"
43 #include "strdb.h"
44 #include "macros.h"
45 #include "smalloc.h"
46 #include "mshift.h"
47 #include "statutil.h"
48 #include "copyrite.h"
49 #include "pdbio.h"
50 #include "gmx_fatal.h"
51 #include "xvgr.h"
52 #include "matio.h"
53 #include "index.h"
54 #include "gstat.h"
55 #include "tpxio.h"
56 #include "viewit.h"
59 static int strip_dssp(char *dsspfile,int nres,
60 gmx_bool bPhobres[],real t,
61 real *acc,FILE *fTArea,
62 t_matrix *mat,int average_area[],
63 const output_env_t oenv)
65 static gmx_bool bFirst=TRUE;
66 static char *ssbuf;
67 FILE *tapeout;
68 static int xsize,frame;
69 char buf[STRLEN+1];
70 char SSTP;
71 int i,nr,iacc,nresidues;
72 int naccf,naccb; /* Count hydrophobic and hydrophilic residues */
73 real iaccf,iaccb;
74 t_xpmelmt c;
76 tapeout=ffopen(dsspfile,"r");
78 /* Skip header */
79 do {
80 fgets2(buf,STRLEN,tapeout);
81 } while (strstr(buf,"KAPPA") == NULL);
82 if (bFirst) {
83 /* Since we also have empty lines in the dssp output (temp) file,
84 * and some line content is saved to the ssbuf variable,
85 * we need more memory than just nres elements. To be shure,
86 * we allocate 2*nres-1, since for each chain there is a
87 * separating line in the temp file. (At most each residue
88 * could have been defined as a separate chain.) */
89 snew(ssbuf,2*nres-1);
92 iaccb=iaccf=0;
93 nresidues = 0;
94 naccf = 0;
95 naccb = 0;
96 for(nr=0; (fgets2(buf,STRLEN,tapeout) != NULL); nr++) {
97 if (buf[13] == '!') /* Chain separator line has '!' at pos. 13 */
98 SSTP='='; /* Chain separator sign '=' */
99 else
100 SSTP=buf[16]==' ' ? '~' : buf[16];
101 ssbuf[nr]=SSTP;
103 buf[39]='\0';
105 /* Only calculate solvent accessible area if needed */
106 if ((NULL != acc) && (buf[13] != '!'))
108 sscanf(&(buf[34]),"%d",&iacc);
109 acc[nr]=iacc;
110 /* average_area and bPhobres are counted from 0...nres-1 */
111 average_area[nresidues]+=iacc;
112 if (bPhobres[nresidues])
114 naccb++;
115 iaccb+=iacc;
117 else
119 naccf++;
120 iaccf+=iacc;
122 /* Keep track of the residue number (do not count chain separator lines '!') */
123 nresidues++;
126 ssbuf[nr]='\0';
128 if (bFirst) {
129 if (0 != acc)
130 fprintf(stderr, "%d residues were classified as hydrophobic and %d as hydrophilic.\n", naccb, naccf);
132 sprintf(mat->title,"Secondary structure");
133 mat->legend[0]=0;
134 sprintf(mat->label_x,"%s",output_env_get_time_label(oenv));
135 sprintf(mat->label_y,"Residue");
136 mat->bDiscrete=TRUE;
137 mat->ny=nr;
138 snew(mat->axis_y,nr);
139 for(i=0; i<nr; i++)
140 mat->axis_y[i]=i+1;
141 mat->axis_x=NULL;
142 mat->matrix=NULL;
143 xsize=0;
144 frame=0;
145 bFirst=FALSE;
147 if (frame>=xsize) {
148 xsize+=10;
149 srenew(mat->axis_x,xsize);
150 srenew(mat->matrix,xsize);
152 mat->axis_x[frame]=t;
153 snew(mat->matrix[frame],nr);
154 c.c2=0;
155 for(i=0; i<nr; i++) {
156 c.c1=ssbuf[i];
157 mat->matrix[frame][i] = max(0,searchcmap(mat->nmap,mat->map,c));
159 frame++;
160 mat->nx=frame;
162 if (fTArea)
163 fprintf(fTArea,"%10g %10g %10g\n",t,0.01*iaccb,0.01*iaccf);
164 ffclose(tapeout);
166 /* Return the number of lines found in the dssp file (i.e. number
167 * of redidues plus chain separator lines).
168 * This is the number of y elements needed for the area xpm file */
169 return nr;
172 static gmx_bool *bPhobics(t_atoms *atoms)
174 int i,nb;
175 char **cb;
176 gmx_bool *bb;
179 nb = get_strings("phbres.dat",&cb);
180 snew(bb,atoms->nres);
182 for (i=0; (i<atoms->nres); i++)
184 if ( -1 != search_str(nb,cb,*atoms->resinfo[i].name) )
186 bb[i]=TRUE;
189 return bb;
192 static void check_oo(t_atoms *atoms)
194 char *OOO;
196 int i;
198 OOO=strdup("O");
200 for(i=0; (i<atoms->nr); i++) {
201 if (strcmp(*(atoms->atomname[i]),"OXT")==0)
202 *atoms->atomname[i]=OOO;
203 else if (strcmp(*(atoms->atomname[i]),"O1")==0)
204 *atoms->atomname[i]=OOO;
205 else if (strcmp(*(atoms->atomname[i]),"OC1")==0)
206 *atoms->atomname[i]=OOO;
210 static void norm_acc(t_atoms *atoms, int nres,
211 real av_area[], real norm_av_area[])
213 int i,n,n_surf;
215 char surffn[]="surface.dat";
216 char **surf_res, **surf_lines;
217 double *surf;
219 n_surf = get_lines(surffn, &surf_lines);
220 snew(surf, n_surf);
221 snew(surf_res, n_surf);
222 for(i=0; (i<n_surf); i++) {
223 snew(surf_res[i], 5);
224 sscanf(surf_lines[i],"%s %lf",surf_res[i],&surf[i]);
227 for(i=0; (i<nres); i++) {
228 n = search_str(n_surf,surf_res,*atoms->resinfo[i].name);
229 if ( n != -1)
230 norm_av_area[i] = av_area[i] / surf[n];
231 else
232 fprintf(stderr,"Residue %s not found in surface database (%s)\n",
233 *atoms->resinfo[i].name,surffn);
237 void prune_ss_legend(t_matrix *mat)
239 gmx_bool *present;
240 int *newnum;
241 int i,r,f,newnmap;
242 t_mapping *newmap;
244 snew(present,mat->nmap);
245 snew(newnum,mat->nmap);
247 for(f=0; f<mat->nx; f++)
248 for(r=0; r<mat->ny; r++)
249 present[mat->matrix[f][r]]=TRUE;
251 newnmap=0;
252 newmap=NULL;
253 for (i=0; i<mat->nmap; i++) {
254 newnum[i]=-1;
255 if (present[i]) {
256 newnum[i]=newnmap;
257 newnmap++;
258 srenew(newmap,newnmap);
259 newmap[newnmap-1]=mat->map[i];
262 if (newnmap!=mat->nmap) {
263 mat->nmap=newnmap;
264 mat->map=newmap;
265 for(f=0; f<mat->nx; f++)
266 for(r=0; r<mat->ny; r++)
267 mat->matrix[f][r]=newnum[mat->matrix[f][r]];
271 void write_sas_mat(const char *fn,real **accr,int nframe,int nres,t_matrix *mat)
273 real lo,hi;
274 int i,j,nlev;
275 t_rgb rlo={1,1,1}, rhi={0,0,0};
276 FILE *fp;
278 if(fn) {
279 hi=lo=accr[0][0];
280 for(i=0; i<nframe; i++)
281 for(j=0; j<nres; j++) {
282 lo=min(lo,accr[i][j]);
283 hi=max(hi,accr[i][j]);
285 fp=ffopen(fn,"w");
286 nlev=hi-lo+1;
287 write_xpm(fp,0,"Solvent Accessible Surface","Surface (A^2)",
288 "Time","Residue Index",nframe,nres,
289 mat->axis_x,mat->axis_y,accr,lo,hi,rlo,rhi,&nlev);
290 ffclose(fp);
294 void analyse_ss(const char *outfile, t_matrix *mat, const char *ss_string,
295 const output_env_t oenv)
297 FILE *fp;
298 t_mapping *map;
299 int f,r,*count,*total,ss_count,total_count;
300 size_t s;
301 const char** leg;
303 map=mat->map;
304 snew(count,mat->nmap);
305 snew(total,mat->nmap);
306 snew(leg,mat->nmap+1);
307 leg[0]="Structure";
308 for(s=0; s<(size_t)mat->nmap; s++)
310 leg[s+1]=strdup(map[s].desc);
313 fp=xvgropen(outfile,"Secondary Structure",
314 output_env_get_xvgr_tlabel(oenv),"Number of Residues",oenv);
315 if (output_env_get_print_xvgr_codes(oenv))
317 fprintf(fp,"@ subtitle \"Structure = ");
319 for(s=0; s<strlen(ss_string); s++)
321 if (s>0)
323 fprintf(fp," + ");
325 for(f=0; f<mat->nmap; f++)
327 if (ss_string[s]==map[f].code.c1)
329 fprintf(fp,"%s",map[f].desc);
333 fprintf(fp,"\"\n");
334 xvgr_legend(fp,mat->nmap+1,leg,oenv);
336 total_count = 0;
337 for(s=0; s<(size_t)mat->nmap; s++)
339 total[s]=0;
341 for(f=0; f<mat->nx; f++)
343 ss_count=0;
344 for(s=0; s<(size_t)mat->nmap; s++)
346 count[s]=0;
348 for(r=0; r<mat->ny; r++)
350 count[mat->matrix[f][r]]++;
351 total[mat->matrix[f][r]]++;
353 for(s=0; s<(size_t)mat->nmap; s++)
355 if (strchr(ss_string,map[s].code.c1))
357 ss_count+=count[s];
358 total_count += count[s];
361 fprintf(fp,"%8g %5d",mat->axis_x[f],ss_count);
362 for(s=0; s<(size_t)mat->nmap; s++)
364 fprintf(fp," %5d",count[s]);
366 fprintf(fp,"\n");
368 /* now print column totals */
369 fprintf(fp, "%-8s %5d", "# Totals", total_count);
370 for(s=0; s<(size_t)mat->nmap; s++)
372 fprintf(fp," %5d",total[s]);
374 fprintf(fp,"\n");
376 /* now print percentages */
377 fprintf(fp, "%-8s %5.2f", "# SS %", total_count / (real) (mat->nx * mat->ny));
378 for(s=0; s<(size_t)mat->nmap; s++)
380 fprintf(fp," %5.2f",total[s] / (real) (mat->nx * mat->ny));
382 fprintf(fp,"\n");
384 ffclose(fp);
385 sfree(leg);
386 sfree(count);
389 int main(int argc,char *argv[])
391 const char *desc[] = {
392 "[TT]do_dssp[tt] ",
393 "reads a trajectory file and computes the secondary structure for",
394 "each time frame ",
395 "calling the dssp program. If you do not have the dssp program,",
396 "get it from http://swift.cmbi.ru.nl/gv/dssp. [TT]do_dssp[tt] assumes ",
397 "that the dssp executable is located in ",
398 "[TT]/usr/local/bin/dssp[tt]. If this is not the case, then you should",
399 "set an environment variable [TT]DSSP[tt] pointing to the dssp",
400 "executable, e.g.: [PAR]",
401 "[TT]setenv DSSP /opt/dssp/bin/dssp[tt][PAR]",
402 "The structure assignment for each residue and time is written to an",
403 "[TT].xpm[tt] matrix file. This file can be visualized with for instance",
404 "[TT]xv[tt] and can be converted to postscript with [TT]xpm2ps[tt].",
405 "Individual chains are separated by light grey lines in the [TT].xpm[tt] and",
406 "postscript files.",
407 "The number of residues with each secondary structure type and the",
408 "total secondary structure ([TT]-sss[tt]) count as a function of",
409 "time are also written to file ([TT]-sc[tt]).[PAR]",
410 "Solvent accessible surface (SAS) per residue can be calculated, both in",
411 "absolute values (A^2) and in fractions of the maximal accessible",
412 "surface of a residue. The maximal accessible surface is defined as",
413 "the accessible surface of a residue in a chain of glycines.",
414 "[BB]Note[bb] that the program [TT]g_sas[tt] can also compute SAS",
415 "and that is more efficient.[PAR]",
416 "Finally, this program can dump the secondary structure in a special file",
417 "[TT]ssdump.dat[tt] for usage in the program [TT]g_chi[tt]. Together",
418 "these two programs can be used to analyze dihedral properties as a",
419 "function of secondary structure type."
421 static gmx_bool bVerbose;
422 static const char *ss_string="HEBT";
423 t_pargs pa[] = {
424 { "-v", FALSE, etBOOL, {&bVerbose},
425 "HIDDENGenerate miles of useless information" },
426 { "-sss", FALSE, etSTR, {&ss_string},
427 "Secondary structures for structure count"}
430 t_trxstatus *status;
431 FILE *tapein;
432 FILE *ss,*acc,*fTArea,*tmpf;
433 const char *fnSCount,*fnArea,*fnTArea,*fnAArea;
434 const char *leg[] = { "Phobic", "Phylic" };
435 t_topology top;
436 int ePBC;
437 t_atoms *atoms;
438 t_matrix mat;
439 int nres,nr0,naccr,nres_plus_separators;
440 gmx_bool *bPhbres,bDoAccSurf;
441 real t;
442 int i,j,natoms,nframe=0;
443 matrix box;
444 int gnx;
445 char *grpnm,*ss_str;
446 atom_id *index;
447 rvec *xp,*x;
448 int *average_area;
449 real **accr,*accr_ptr=NULL,*av_area, *norm_av_area;
450 char pdbfile[32],tmpfile[32],title[256];
451 char dssp[256];
452 const char *dptr;
453 output_env_t oenv;
454 gmx_rmpbc_t gpbc=NULL;
456 t_filenm fnm[] = {
457 { efTRX, "-f", NULL, ffREAD },
458 { efTPS, NULL, NULL, ffREAD },
459 { efNDX, NULL, NULL, ffOPTRD },
460 { efDAT, "-ssdump", "ssdump", ffOPTWR },
461 { efMAP, "-map", "ss", ffLIBRD },
462 { efXPM, "-o", "ss", ffWRITE },
463 { efXVG, "-sc", "scount", ffWRITE },
464 { efXPM, "-a", "area", ffOPTWR },
465 { efXVG, "-ta", "totarea", ffOPTWR },
466 { efXVG, "-aa", "averarea",ffOPTWR }
468 #define NFILE asize(fnm)
470 CopyRight(stderr,argv[0]);
471 parse_common_args(&argc,argv,
472 PCA_CAN_TIME | PCA_CAN_VIEW | PCA_TIME_UNIT | PCA_BE_NICE ,
473 NFILE,fnm, asize(pa),pa, asize(desc),desc,0,NULL,&oenv);
474 fnSCount= opt2fn("-sc",NFILE,fnm);
475 fnArea = opt2fn_null("-a", NFILE,fnm);
476 fnTArea = opt2fn_null("-ta",NFILE,fnm);
477 fnAArea = opt2fn_null("-aa",NFILE,fnm);
478 bDoAccSurf=(fnArea || fnTArea || fnAArea);
480 read_tps_conf(ftp2fn(efTPS,NFILE,fnm),title,&top,&ePBC,&xp,NULL,box,FALSE);
481 atoms=&(top.atoms);
482 check_oo(atoms);
483 bPhbres = bPhobics(atoms);
485 get_index(atoms,ftp2fn_null(efNDX,NFILE,fnm),1,&gnx,&index,&grpnm);
486 nres=0;
487 nr0=-1;
488 for(i=0; (i<gnx); i++) {
489 if (atoms->atom[index[i]].resind != nr0) {
490 nr0=atoms->atom[index[i]].resind;
491 nres++;
494 fprintf(stderr,"There are %d residues in your selected group\n",nres);
496 strcpy(pdbfile,"ddXXXXXX");
497 gmx_tmpnam(pdbfile);
498 if ((tmpf = fopen(pdbfile,"w")) == NULL) {
499 sprintf(pdbfile,"%ctmp%cfilterXXXXXX",DIR_SEPARATOR,DIR_SEPARATOR);
500 gmx_tmpnam(pdbfile);
501 if ((tmpf = fopen(pdbfile,"w")) == NULL)
502 gmx_fatal(FARGS,"Can not open tmp file %s",pdbfile);
504 else
505 fclose(tmpf);
507 strcpy(tmpfile,"ddXXXXXX");
508 gmx_tmpnam(tmpfile);
509 if ((tmpf = fopen(tmpfile,"w")) == NULL) {
510 sprintf(tmpfile,"%ctmp%cfilterXXXXXX",DIR_SEPARATOR,DIR_SEPARATOR);
511 gmx_tmpnam(tmpfile);
512 if ((tmpf = fopen(tmpfile,"w")) == NULL)
513 gmx_fatal(FARGS,"Can not open tmp file %s",tmpfile);
515 else
516 fclose(tmpf);
518 if ((dptr=getenv("DSSP")) == NULL)
519 dptr="/usr/local/bin/dssp";
520 if (!gmx_fexist(dptr))
521 gmx_fatal(FARGS,"DSSP executable (%s) does not exist (use setenv DSSP)",
522 dptr);
523 sprintf(dssp,"%s %s %s %s > /dev/null %s",
524 dptr,bDoAccSurf?"":"-na",pdbfile,tmpfile,bVerbose?"":"2> /dev/null");
525 if (bVerbose)
526 fprintf(stderr,"dssp cmd='%s'\n",dssp);
528 if (fnTArea) {
529 fTArea=xvgropen(fnTArea,"Solvent Accessible Surface Area",
530 output_env_get_xvgr_tlabel(oenv),"Area (nm\\S2\\N)",oenv);
531 xvgr_legend(fTArea,2,leg,oenv);
532 } else
533 fTArea=NULL;
535 mat.map=NULL;
536 mat.nmap=getcmap(libopen(opt2fn("-map",NFILE,fnm)),
537 opt2fn("-map",NFILE,fnm),&(mat.map));
539 natoms=read_first_x(oenv,&status,ftp2fn(efTRX,NFILE,fnm),&t,&x,box);
540 if (natoms > atoms->nr)
541 gmx_fatal(FARGS,"\nTrajectory does not match topology!");
542 if (gnx > natoms)
543 gmx_fatal(FARGS,"\nTrajectory does not match selected group!");
545 snew(average_area, atoms->nres);
546 snew(av_area , atoms->nres);
547 snew(norm_av_area, atoms->nres);
548 accr=NULL;
549 naccr=0;
551 gpbc = gmx_rmpbc_init(&top.idef,ePBC,natoms,box);
552 do {
553 t = output_env_conv_time(oenv,t);
554 if (bDoAccSurf && nframe>=naccr) {
555 naccr+=10;
556 srenew(accr,naccr);
557 for(i=naccr-10; i<naccr; i++)
558 snew(accr[i],2*atoms->nres-1);
560 gmx_rmpbc(gpbc,natoms,box,x);
561 tapein=ffopen(pdbfile,"w");
562 write_pdbfile_indexed(tapein,NULL,atoms,x,ePBC,box,' ',-1,gnx,index,NULL,TRUE);
563 ffclose(tapein);
565 #ifdef GMX_NO_SYSTEM
566 printf("Warning-- No calls to system(3) supported on this platform.");
567 printf("Warning-- Skipping execution of 'system(\"%s\")'.", dssp);
568 exit(1);
569 #else
570 if(0 != system(dssp))
572 gmx_fatal(FARGS,"Failed to execute command: %s",dssp);
574 #endif
576 /* strip_dssp returns the number of lines found in the dssp file, i.e.
577 * the number of residues plus the separator lines */
579 if (bDoAccSurf)
580 accr_ptr = accr[nframe];
582 nres_plus_separators = strip_dssp(tmpfile,nres,bPhbres,t,
583 accr_ptr,fTArea,&mat,average_area,oenv);
584 remove(tmpfile);
585 remove(pdbfile);
586 nframe++;
587 } while(read_next_x(oenv,status,&t,natoms,x,box));
588 fprintf(stderr,"\n");
589 close_trj(status);
590 if (fTArea)
591 ffclose(fTArea);
592 gmx_rmpbc_done(gpbc);
594 prune_ss_legend(&mat);
596 ss=opt2FILE("-o",NFILE,fnm,"w");
597 mat.flags = 0;
598 write_xpm_m(ss,mat);
599 ffclose(ss);
601 if (opt2bSet("-ssdump",NFILE,fnm))
603 ss = opt2FILE("-ssdump",NFILE,fnm,"w");
604 snew(ss_str,nres+1);
605 fprintf(ss,"%d\n",nres);
606 for(j=0; j<mat.nx; j++)
608 for(i=0; (i<mat.ny); i++)
610 ss_str[i] = mat.map[mat.matrix[j][i]].code.c1;
612 ss_str[i] = '\0';
613 fprintf(ss,"%s\n",ss_str);
615 ffclose(ss);
616 sfree(ss_str);
618 analyse_ss(fnSCount,&mat,ss_string,oenv);
620 if (bDoAccSurf) {
621 write_sas_mat(fnArea,accr,nframe,nres_plus_separators,&mat);
623 for(i=0; i<atoms->nres; i++)
624 av_area[i] = (average_area[i] / (real)nframe);
626 norm_acc(atoms, nres, av_area, norm_av_area);
628 if (fnAArea) {
629 acc=xvgropen(fnAArea,"Average Accessible Area",
630 "Residue","A\\S2",oenv);
631 for(i=0; (i<nres); i++)
632 fprintf(acc,"%5d %10g %10g\n",i+1,av_area[i], norm_av_area[i]);
633 ffclose(acc);
637 view_all(oenv, NFILE, fnm);
639 thanx(stderr);
641 return 0;