1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * Green Red Orange Magenta Azure Cyan Skyblue
50 #include "gmx_fatal.h"
59 static int strip_dssp(char *dsspfile
,int nres
,
60 gmx_bool bPhobres
[],real t
,
61 real
*acc
,FILE *fTArea
,
62 t_matrix
*mat
,int average_area
[],
63 const output_env_t oenv
)
65 static gmx_bool bFirst
=TRUE
;
68 static int xsize
,frame
;
71 int i
,nr
,iacc
,nresidues
;
72 int naccf
,naccb
; /* Count hydrophobic and hydrophilic residues */
76 tapeout
=ffopen(dsspfile
,"r");
80 fgets2(buf
,STRLEN
,tapeout
);
81 } while (strstr(buf
,"KAPPA") == NULL
);
83 /* Since we also have empty lines in the dssp output (temp) file,
84 * and some line content is saved to the ssbuf variable,
85 * we need more memory than just nres elements. To be shure,
86 * we allocate 2*nres-1, since for each chain there is a
87 * separating line in the temp file. (At most each residue
88 * could have been defined as a separate chain.) */
96 for(nr
=0; (fgets2(buf
,STRLEN
,tapeout
) != NULL
); nr
++) {
97 if (buf
[13] == '!') /* Chain separator line has '!' at pos. 13 */
98 SSTP
='='; /* Chain separator sign '=' */
100 SSTP
=buf
[16]==' ' ? '~' : buf
[16];
105 /* Only calculate solvent accessible area if needed */
106 if ((NULL
!= acc
) && (buf
[13] != '!'))
108 sscanf(&(buf
[34]),"%d",&iacc
);
110 /* average_area and bPhobres are counted from 0...nres-1 */
111 average_area
[nresidues
]+=iacc
;
112 if (bPhobres
[nresidues
])
122 /* Keep track of the residue number (do not count chain separator lines '!') */
130 fprintf(stderr
, "%d residues were classified as hydrophobic and %d as hydrophilic.\n", naccb
, naccf
);
132 sprintf(mat
->title
,"Secondary structure");
134 sprintf(mat
->label_x
,"%s",output_env_get_time_label(oenv
));
135 sprintf(mat
->label_y
,"Residue");
138 snew(mat
->axis_y
,nr
);
149 srenew(mat
->axis_x
,xsize
);
150 srenew(mat
->matrix
,xsize
);
152 mat
->axis_x
[frame
]=t
;
153 snew(mat
->matrix
[frame
],nr
);
155 for(i
=0; i
<nr
; i
++) {
157 mat
->matrix
[frame
][i
] = max(0,searchcmap(mat
->nmap
,mat
->map
,c
));
163 fprintf(fTArea
,"%10g %10g %10g\n",t
,0.01*iaccb
,0.01*iaccf
);
166 /* Return the number of lines found in the dssp file (i.e. number
167 * of redidues plus chain separator lines).
168 * This is the number of y elements needed for the area xpm file */
172 static gmx_bool
*bPhobics(t_atoms
*atoms
)
179 nb
= get_strings("phbres.dat",&cb
);
180 snew(bb
,atoms
->nres
);
182 for (i
=0; (i
<atoms
->nres
); i
++)
184 if ( -1 != search_str(nb
,cb
,*atoms
->resinfo
[i
].name
) )
192 static void check_oo(t_atoms
*atoms
)
200 for(i
=0; (i
<atoms
->nr
); i
++) {
201 if (strcmp(*(atoms
->atomname
[i
]),"OXT")==0)
202 *atoms
->atomname
[i
]=OOO
;
203 else if (strcmp(*(atoms
->atomname
[i
]),"O1")==0)
204 *atoms
->atomname
[i
]=OOO
;
205 else if (strcmp(*(atoms
->atomname
[i
]),"OC1")==0)
206 *atoms
->atomname
[i
]=OOO
;
210 static void norm_acc(t_atoms
*atoms
, int nres
,
211 real av_area
[], real norm_av_area
[])
215 char surffn
[]="surface.dat";
216 char **surf_res
, **surf_lines
;
219 n_surf
= get_lines(surffn
, &surf_lines
);
221 snew(surf_res
, n_surf
);
222 for(i
=0; (i
<n_surf
); i
++) {
223 snew(surf_res
[i
], 5);
224 sscanf(surf_lines
[i
],"%s %lf",surf_res
[i
],&surf
[i
]);
227 for(i
=0; (i
<nres
); i
++) {
228 n
= search_str(n_surf
,surf_res
,*atoms
->resinfo
[i
].name
);
230 norm_av_area
[i
] = av_area
[i
] / surf
[n
];
232 fprintf(stderr
,"Residue %s not found in surface database (%s)\n",
233 *atoms
->resinfo
[i
].name
,surffn
);
237 void prune_ss_legend(t_matrix
*mat
)
244 snew(present
,mat
->nmap
);
245 snew(newnum
,mat
->nmap
);
247 for(f
=0; f
<mat
->nx
; f
++)
248 for(r
=0; r
<mat
->ny
; r
++)
249 present
[mat
->matrix
[f
][r
]]=TRUE
;
253 for (i
=0; i
<mat
->nmap
; i
++) {
258 srenew(newmap
,newnmap
);
259 newmap
[newnmap
-1]=mat
->map
[i
];
262 if (newnmap
!=mat
->nmap
) {
265 for(f
=0; f
<mat
->nx
; f
++)
266 for(r
=0; r
<mat
->ny
; r
++)
267 mat
->matrix
[f
][r
]=newnum
[mat
->matrix
[f
][r
]];
271 void write_sas_mat(const char *fn
,real
**accr
,int nframe
,int nres
,t_matrix
*mat
)
275 t_rgb rlo
={1,1,1}, rhi
={0,0,0};
280 for(i
=0; i
<nframe
; i
++)
281 for(j
=0; j
<nres
; j
++) {
282 lo
=min(lo
,accr
[i
][j
]);
283 hi
=max(hi
,accr
[i
][j
]);
287 write_xpm(fp
,0,"Solvent Accessible Surface","Surface (A^2)",
288 "Time","Residue Index",nframe
,nres
,
289 mat
->axis_x
,mat
->axis_y
,accr
,lo
,hi
,rlo
,rhi
,&nlev
);
294 void analyse_ss(const char *outfile
, t_matrix
*mat
, const char *ss_string
,
295 const output_env_t oenv
)
299 int f
,r
,*count
,*total
,ss_count
,total_count
;
304 snew(count
,mat
->nmap
);
305 snew(total
,mat
->nmap
);
306 snew(leg
,mat
->nmap
+1);
308 for(s
=0; s
<(size_t)mat
->nmap
; s
++)
310 leg
[s
+1]=strdup(map
[s
].desc
);
313 fp
=xvgropen(outfile
,"Secondary Structure",
314 output_env_get_xvgr_tlabel(oenv
),"Number of Residues",oenv
);
315 if (output_env_get_print_xvgr_codes(oenv
))
317 fprintf(fp
,"@ subtitle \"Structure = ");
319 for(s
=0; s
<strlen(ss_string
); s
++)
325 for(f
=0; f
<mat
->nmap
; f
++)
327 if (ss_string
[s
]==map
[f
].code
.c1
)
329 fprintf(fp
,"%s",map
[f
].desc
);
334 xvgr_legend(fp
,mat
->nmap
+1,leg
,oenv
);
337 for(s
=0; s
<(size_t)mat
->nmap
; s
++)
341 for(f
=0; f
<mat
->nx
; f
++)
344 for(s
=0; s
<(size_t)mat
->nmap
; s
++)
348 for(r
=0; r
<mat
->ny
; r
++)
350 count
[mat
->matrix
[f
][r
]]++;
351 total
[mat
->matrix
[f
][r
]]++;
353 for(s
=0; s
<(size_t)mat
->nmap
; s
++)
355 if (strchr(ss_string
,map
[s
].code
.c1
))
358 total_count
+= count
[s
];
361 fprintf(fp
,"%8g %5d",mat
->axis_x
[f
],ss_count
);
362 for(s
=0; s
<(size_t)mat
->nmap
; s
++)
364 fprintf(fp
," %5d",count
[s
]);
368 /* now print column totals */
369 fprintf(fp
, "%-8s %5d", "# Totals", total_count
);
370 for(s
=0; s
<(size_t)mat
->nmap
; s
++)
372 fprintf(fp
," %5d",total
[s
]);
376 /* now print percentages */
377 fprintf(fp
, "%-8s %5.2f", "# SS %", total_count
/ (real
) (mat
->nx
* mat
->ny
));
378 for(s
=0; s
<(size_t)mat
->nmap
; s
++)
380 fprintf(fp
," %5.2f",total
[s
] / (real
) (mat
->nx
* mat
->ny
));
389 int main(int argc
,char *argv
[])
391 const char *desc
[] = {
393 "reads a trajectory file and computes the secondary structure for",
395 "calling the dssp program. If you do not have the dssp program,",
396 "get it from http://swift.cmbi.ru.nl/gv/dssp. [TT]do_dssp[tt] assumes ",
397 "that the dssp executable is located in ",
398 "[TT]/usr/local/bin/dssp[tt]. If this is not the case, then you should",
399 "set an environment variable [TT]DSSP[tt] pointing to the dssp",
400 "executable, e.g.: [PAR]",
401 "[TT]setenv DSSP /opt/dssp/bin/dssp[tt][PAR]",
402 "The structure assignment for each residue and time is written to an",
403 "[TT].xpm[tt] matrix file. This file can be visualized with for instance",
404 "[TT]xv[tt] and can be converted to postscript with [TT]xpm2ps[tt].",
405 "Individual chains are separated by light grey lines in the [TT].xpm[tt] and",
407 "The number of residues with each secondary structure type and the",
408 "total secondary structure ([TT]-sss[tt]) count as a function of",
409 "time are also written to file ([TT]-sc[tt]).[PAR]",
410 "Solvent accessible surface (SAS) per residue can be calculated, both in",
411 "absolute values (A^2) and in fractions of the maximal accessible",
412 "surface of a residue. The maximal accessible surface is defined as",
413 "the accessible surface of a residue in a chain of glycines.",
414 "[BB]Note[bb] that the program [TT]g_sas[tt] can also compute SAS",
415 "and that is more efficient.[PAR]",
416 "Finally, this program can dump the secondary structure in a special file",
417 "[TT]ssdump.dat[tt] for usage in the program [TT]g_chi[tt]. Together",
418 "these two programs can be used to analyze dihedral properties as a",
419 "function of secondary structure type."
421 static gmx_bool bVerbose
;
422 static const char *ss_string
="HEBT";
424 { "-v", FALSE
, etBOOL
, {&bVerbose
},
425 "HIDDENGenerate miles of useless information" },
426 { "-sss", FALSE
, etSTR
, {&ss_string
},
427 "Secondary structures for structure count"}
432 FILE *ss
,*acc
,*fTArea
,*tmpf
;
433 const char *fnSCount
,*fnArea
,*fnTArea
,*fnAArea
;
434 const char *leg
[] = { "Phobic", "Phylic" };
439 int nres
,nr0
,naccr
,nres_plus_separators
;
440 gmx_bool
*bPhbres
,bDoAccSurf
;
442 int i
,j
,natoms
,nframe
=0;
449 real
**accr
,*accr_ptr
=NULL
,*av_area
, *norm_av_area
;
450 char pdbfile
[32],tmpfile
[32],title
[256];
454 gmx_rmpbc_t gpbc
=NULL
;
457 { efTRX
, "-f", NULL
, ffREAD
},
458 { efTPS
, NULL
, NULL
, ffREAD
},
459 { efNDX
, NULL
, NULL
, ffOPTRD
},
460 { efDAT
, "-ssdump", "ssdump", ffOPTWR
},
461 { efMAP
, "-map", "ss", ffLIBRD
},
462 { efXPM
, "-o", "ss", ffWRITE
},
463 { efXVG
, "-sc", "scount", ffWRITE
},
464 { efXPM
, "-a", "area", ffOPTWR
},
465 { efXVG
, "-ta", "totarea", ffOPTWR
},
466 { efXVG
, "-aa", "averarea",ffOPTWR
}
468 #define NFILE asize(fnm)
470 CopyRight(stderr
,argv
[0]);
471 parse_common_args(&argc
,argv
,
472 PCA_CAN_TIME
| PCA_CAN_VIEW
| PCA_TIME_UNIT
| PCA_BE_NICE
,
473 NFILE
,fnm
, asize(pa
),pa
, asize(desc
),desc
,0,NULL
,&oenv
);
474 fnSCount
= opt2fn("-sc",NFILE
,fnm
);
475 fnArea
= opt2fn_null("-a", NFILE
,fnm
);
476 fnTArea
= opt2fn_null("-ta",NFILE
,fnm
);
477 fnAArea
= opt2fn_null("-aa",NFILE
,fnm
);
478 bDoAccSurf
=(fnArea
|| fnTArea
|| fnAArea
);
480 read_tps_conf(ftp2fn(efTPS
,NFILE
,fnm
),title
,&top
,&ePBC
,&xp
,NULL
,box
,FALSE
);
483 bPhbres
= bPhobics(atoms
);
485 get_index(atoms
,ftp2fn_null(efNDX
,NFILE
,fnm
),1,&gnx
,&index
,&grpnm
);
488 for(i
=0; (i
<gnx
); i
++) {
489 if (atoms
->atom
[index
[i
]].resind
!= nr0
) {
490 nr0
=atoms
->atom
[index
[i
]].resind
;
494 fprintf(stderr
,"There are %d residues in your selected group\n",nres
);
496 strcpy(pdbfile
,"ddXXXXXX");
498 if ((tmpf
= fopen(pdbfile
,"w")) == NULL
) {
499 sprintf(pdbfile
,"%ctmp%cfilterXXXXXX",DIR_SEPARATOR
,DIR_SEPARATOR
);
501 if ((tmpf
= fopen(pdbfile
,"w")) == NULL
)
502 gmx_fatal(FARGS
,"Can not open tmp file %s",pdbfile
);
507 strcpy(tmpfile
,"ddXXXXXX");
509 if ((tmpf
= fopen(tmpfile
,"w")) == NULL
) {
510 sprintf(tmpfile
,"%ctmp%cfilterXXXXXX",DIR_SEPARATOR
,DIR_SEPARATOR
);
512 if ((tmpf
= fopen(tmpfile
,"w")) == NULL
)
513 gmx_fatal(FARGS
,"Can not open tmp file %s",tmpfile
);
518 if ((dptr
=getenv("DSSP")) == NULL
)
519 dptr
="/usr/local/bin/dssp";
520 if (!gmx_fexist(dptr
))
521 gmx_fatal(FARGS
,"DSSP executable (%s) does not exist (use setenv DSSP)",
523 sprintf(dssp
,"%s %s %s %s > /dev/null %s",
524 dptr
,bDoAccSurf
?"":"-na",pdbfile
,tmpfile
,bVerbose
?"":"2> /dev/null");
526 fprintf(stderr
,"dssp cmd='%s'\n",dssp
);
529 fTArea
=xvgropen(fnTArea
,"Solvent Accessible Surface Area",
530 output_env_get_xvgr_tlabel(oenv
),"Area (nm\\S2\\N)",oenv
);
531 xvgr_legend(fTArea
,2,leg
,oenv
);
536 mat
.nmap
=getcmap(libopen(opt2fn("-map",NFILE
,fnm
)),
537 opt2fn("-map",NFILE
,fnm
),&(mat
.map
));
539 natoms
=read_first_x(oenv
,&status
,ftp2fn(efTRX
,NFILE
,fnm
),&t
,&x
,box
);
540 if (natoms
> atoms
->nr
)
541 gmx_fatal(FARGS
,"\nTrajectory does not match topology!");
543 gmx_fatal(FARGS
,"\nTrajectory does not match selected group!");
545 snew(average_area
, atoms
->nres
);
546 snew(av_area
, atoms
->nres
);
547 snew(norm_av_area
, atoms
->nres
);
551 gpbc
= gmx_rmpbc_init(&top
.idef
,ePBC
,natoms
,box
);
553 t
= output_env_conv_time(oenv
,t
);
554 if (bDoAccSurf
&& nframe
>=naccr
) {
557 for(i
=naccr
-10; i
<naccr
; i
++)
558 snew(accr
[i
],2*atoms
->nres
-1);
560 gmx_rmpbc(gpbc
,natoms
,box
,x
);
561 tapein
=ffopen(pdbfile
,"w");
562 write_pdbfile_indexed(tapein
,NULL
,atoms
,x
,ePBC
,box
,' ',-1,gnx
,index
,NULL
,TRUE
);
566 printf("Warning-- No calls to system(3) supported on this platform.");
567 printf("Warning-- Skipping execution of 'system(\"%s\")'.", dssp
);
570 if(0 != system(dssp
))
572 gmx_fatal(FARGS
,"Failed to execute command: %s",dssp
);
576 /* strip_dssp returns the number of lines found in the dssp file, i.e.
577 * the number of residues plus the separator lines */
580 accr_ptr
= accr
[nframe
];
582 nres_plus_separators
= strip_dssp(tmpfile
,nres
,bPhbres
,t
,
583 accr_ptr
,fTArea
,&mat
,average_area
,oenv
);
587 } while(read_next_x(oenv
,status
,&t
,natoms
,x
,box
));
588 fprintf(stderr
,"\n");
592 gmx_rmpbc_done(gpbc
);
594 prune_ss_legend(&mat
);
596 ss
=opt2FILE("-o",NFILE
,fnm
,"w");
601 if (opt2bSet("-ssdump",NFILE
,fnm
))
603 ss
= opt2FILE("-ssdump",NFILE
,fnm
,"w");
605 fprintf(ss
,"%d\n",nres
);
606 for(j
=0; j
<mat
.nx
; j
++)
608 for(i
=0; (i
<mat
.ny
); i
++)
610 ss_str
[i
] = mat
.map
[mat
.matrix
[j
][i
]].code
.c1
;
613 fprintf(ss
,"%s\n",ss_str
);
618 analyse_ss(fnSCount
,&mat
,ss_string
,oenv
);
621 write_sas_mat(fnArea
,accr
,nframe
,nres_plus_separators
,&mat
);
623 for(i
=0; i
<atoms
->nres
; i
++)
624 av_area
[i
] = (average_area
[i
] / (real
)nframe
);
626 norm_acc(atoms
, nres
, av_area
, norm_av_area
);
629 acc
=xvgropen(fnAArea
,"Average Accessible Area",
630 "Residue","A\\S2",oenv
);
631 for(i
=0; (i
<nres
); i
++)
632 fprintf(acc
,"%5d %10g %10g\n",i
+1,av_area
[i
], norm_av_area
[i
]);
637 view_all(oenv
, NFILE
, fnm
);