2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
43 #include "gromacs/fileio/pdbio.h"
44 #include "gromacs/topology/index.h"
46 #include "gromacs/fileio/tpxio.h"
47 #include "gromacs/fileio/trxio.h"
50 #include "gromacs/commandline/pargs.h"
51 #include "gromacs/fileio/matio.h"
52 #include "gromacs/fileio/strdb.h"
53 #include "gromacs/fileio/xvgr.h"
54 #include "gromacs/pbcutil/rmpbc.h"
55 #include "gromacs/utility/cstringutil.h"
56 #include "gromacs/utility/fatalerror.h"
57 #include "gromacs/utility/smalloc.h"
59 static int strip_dssp(char *dsspfile
, int nres
,
60 gmx_bool bPhobres
[], real t
,
61 real
*acc
, FILE *fTArea
,
62 t_matrix
*mat
, int average_area
[],
63 const output_env_t oenv
)
65 static gmx_bool bFirst
= TRUE
;
68 static int xsize
, frame
;
71 int i
, nr
, iacc
, nresidues
;
72 int naccf
, naccb
; /* Count hydrophobic and hydrophilic residues */
76 tapeout
= gmx_ffopen(dsspfile
, "r");
81 fgets2(buf
, STRLEN
, tapeout
);
83 while (strstr(buf
, "KAPPA") == NULL
);
86 /* Since we also have empty lines in the dssp output (temp) file,
87 * and some line content is saved to the ssbuf variable,
88 * we need more memory than just nres elements. To be shure,
89 * we allocate 2*nres-1, since for each chain there is a
90 * separating line in the temp file. (At most each residue
91 * could have been defined as a separate chain.) */
92 snew(ssbuf
, 2*nres
-1);
99 for (nr
= 0; (fgets2(buf
, STRLEN
, tapeout
) != NULL
); nr
++)
101 if (buf
[13] == '!') /* Chain separator line has '!' at pos. 13 */
103 SSTP
= '='; /* Chain separator sign '=' */
107 SSTP
= buf
[16] == ' ' ? '~' : buf
[16];
113 /* Only calculate solvent accessible area if needed */
114 if ((NULL
!= acc
) && (buf
[13] != '!'))
116 sscanf(&(buf
[34]), "%d", &iacc
);
118 /* average_area and bPhobres are counted from 0...nres-1 */
119 average_area
[nresidues
] += iacc
;
120 if (bPhobres
[nresidues
])
130 /* Keep track of the residue number (do not count chain separator lines '!') */
140 fprintf(stderr
, "%d residues were classified as hydrophobic and %d as hydrophilic.\n", naccb
, naccf
);
143 sprintf(mat
->title
, "Secondary structure");
145 sprintf(mat
->label_x
, "%s", output_env_get_time_label(oenv
));
146 sprintf(mat
->label_y
, "Residue");
147 mat
->bDiscrete
= TRUE
;
149 snew(mat
->axis_y
, nr
);
150 for (i
= 0; i
< nr
; i
++)
152 mat
->axis_y
[i
] = i
+1;
163 srenew(mat
->axis_x
, xsize
);
164 srenew(mat
->matrix
, xsize
);
166 mat
->axis_x
[frame
] = t
;
167 snew(mat
->matrix
[frame
], nr
);
169 for (i
= 0; i
< nr
; i
++)
172 mat
->matrix
[frame
][i
] = max(0, searchcmap(mat
->nmap
, mat
->map
, c
));
179 fprintf(fTArea
, "%10g %10g %10g\n", t
, 0.01*iaccb
, 0.01*iaccf
);
181 gmx_ffclose(tapeout
);
183 /* Return the number of lines found in the dssp file (i.e. number
184 * of redidues plus chain separator lines).
185 * This is the number of y elements needed for the area xpm file */
189 static gmx_bool
*bPhobics(t_atoms
*atoms
)
196 nb
= get_strings("phbres.dat", &cb
);
197 snew(bb
, atoms
->nres
);
199 for (i
= 0; (i
< atoms
->nres
); i
++)
201 if (-1 != search_str(nb
, cb
, *atoms
->resinfo
[i
].name
) )
209 static void check_oo(t_atoms
*atoms
)
217 for (i
= 0; (i
< atoms
->nr
); i
++)
219 if (strcmp(*(atoms
->atomname
[i
]), "OXT") == 0)
221 *atoms
->atomname
[i
] = OOO
;
223 else if (strcmp(*(atoms
->atomname
[i
]), "O1") == 0)
225 *atoms
->atomname
[i
] = OOO
;
227 else if (strcmp(*(atoms
->atomname
[i
]), "OC1") == 0)
229 *atoms
->atomname
[i
] = OOO
;
234 static void norm_acc(t_atoms
*atoms
, int nres
,
235 real av_area
[], real norm_av_area
[])
239 char surffn
[] = "surface.dat";
240 char **surf_res
, **surf_lines
;
243 n_surf
= get_lines(surffn
, &surf_lines
);
245 snew(surf_res
, n_surf
);
246 for (i
= 0; (i
< n_surf
); i
++)
248 snew(surf_res
[i
], 5);
249 sscanf(surf_lines
[i
], "%s %lf", surf_res
[i
], &surf
[i
]);
252 for (i
= 0; (i
< nres
); i
++)
254 n
= search_str(n_surf
, surf_res
, *atoms
->resinfo
[i
].name
);
257 norm_av_area
[i
] = av_area
[i
] / surf
[n
];
261 fprintf(stderr
, "Residue %s not found in surface database (%s)\n",
262 *atoms
->resinfo
[i
].name
, surffn
);
267 void prune_ss_legend(t_matrix
*mat
)
271 int i
, r
, f
, newnmap
;
274 snew(present
, mat
->nmap
);
275 snew(newnum
, mat
->nmap
);
277 for (f
= 0; f
< mat
->nx
; f
++)
279 for (r
= 0; r
< mat
->ny
; r
++)
281 present
[mat
->matrix
[f
][r
]] = TRUE
;
287 for (i
= 0; i
< mat
->nmap
; i
++)
294 srenew(newmap
, newnmap
);
295 newmap
[newnmap
-1] = mat
->map
[i
];
298 if (newnmap
!= mat
->nmap
)
302 for (f
= 0; f
< mat
->nx
; f
++)
304 for (r
= 0; r
< mat
->ny
; r
++)
306 mat
->matrix
[f
][r
] = newnum
[mat
->matrix
[f
][r
]];
312 void write_sas_mat(const char *fn
, real
**accr
, int nframe
, int nres
, t_matrix
*mat
)
316 t_rgb rlo
= {1, 1, 1}, rhi
= {0, 0, 0};
321 hi
= lo
= accr
[0][0];
322 for (i
= 0; i
< nframe
; i
++)
324 for (j
= 0; j
< nres
; j
++)
326 lo
= min(lo
, accr
[i
][j
]);
327 hi
= max(hi
, accr
[i
][j
]);
330 fp
= gmx_ffopen(fn
, "w");
332 write_xpm(fp
, 0, "Solvent Accessible Surface", "Surface (A^2)",
333 "Time", "Residue Index", nframe
, nres
,
334 mat
->axis_x
, mat
->axis_y
, accr
, lo
, hi
, rlo
, rhi
, &nlev
);
339 void analyse_ss(const char *outfile
, t_matrix
*mat
, const char *ss_string
,
340 const output_env_t oenv
)
344 int f
, r
, *count
, *total
, ss_count
, total_count
;
349 snew(count
, mat
->nmap
);
350 snew(total
, mat
->nmap
);
351 snew(leg
, mat
->nmap
+1);
352 leg
[0] = "Structure";
353 for (s
= 0; s
< (size_t)mat
->nmap
; s
++)
355 leg
[s
+1] = strdup(map
[s
].desc
);
358 fp
= xvgropen(outfile
, "Secondary Structure",
359 output_env_get_xvgr_tlabel(oenv
), "Number of Residues", oenv
);
360 if (output_env_get_print_xvgr_codes(oenv
))
362 fprintf(fp
, "@ subtitle \"Structure = ");
364 for (s
= 0; s
< strlen(ss_string
); s
++)
370 for (f
= 0; f
< mat
->nmap
; f
++)
372 if (ss_string
[s
] == map
[f
].code
.c1
)
374 fprintf(fp
, "%s", map
[f
].desc
);
379 xvgr_legend(fp
, mat
->nmap
+1, leg
, oenv
);
382 for (s
= 0; s
< (size_t)mat
->nmap
; s
++)
386 for (f
= 0; f
< mat
->nx
; f
++)
389 for (s
= 0; s
< (size_t)mat
->nmap
; s
++)
393 for (r
= 0; r
< mat
->ny
; r
++)
395 count
[mat
->matrix
[f
][r
]]++;
396 total
[mat
->matrix
[f
][r
]]++;
398 for (s
= 0; s
< (size_t)mat
->nmap
; s
++)
400 if (strchr(ss_string
, map
[s
].code
.c1
))
402 ss_count
+= count
[s
];
403 total_count
+= count
[s
];
406 fprintf(fp
, "%8g %5d", mat
->axis_x
[f
], ss_count
);
407 for (s
= 0; s
< (size_t)mat
->nmap
; s
++)
409 fprintf(fp
, " %5d", count
[s
]);
413 /* now print column totals */
414 fprintf(fp
, "%-8s %5d", "# Totals", total_count
);
415 for (s
= 0; s
< (size_t)mat
->nmap
; s
++)
417 fprintf(fp
, " %5d", total
[s
]);
421 /* now print percentages */
422 fprintf(fp
, "%-8s %5.2f", "# SS %", total_count
/ (real
) (mat
->nx
* mat
->ny
));
423 for (s
= 0; s
< (size_t)mat
->nmap
; s
++)
425 fprintf(fp
, " %5.2f", total
[s
] / (real
) (mat
->nx
* mat
->ny
));
434 int gmx_do_dssp(int argc
, char *argv
[])
436 const char *desc
[] = {
438 "reads a trajectory file and computes the secondary structure for",
440 "calling the dssp program. If you do not have the dssp program,",
441 "get it from http://swift.cmbi.ru.nl/gv/dssp. [THISMODULE] assumes ",
442 "that the dssp executable is located in ",
443 "[TT]/usr/local/bin/dssp[tt]. If this is not the case, then you should",
444 "set an environment variable [TT]DSSP[tt] pointing to the dssp",
445 "executable, e.g.: [PAR]",
446 "[TT]setenv DSSP /opt/dssp/bin/dssp[tt][PAR]",
447 "Since version 2.0.0, dssp is invoked with a syntax that differs",
448 "from earlier versions. If you have an older version of dssp,",
449 "use the [TT]-ver[tt] option to direct do_dssp to use the older syntax.",
450 "By default, do_dssp uses the syntax introduced with version 2.0.0.",
451 "Even newer versions (which at the time of writing are not yet released)",
452 "are assumed to have the same syntax as 2.0.0.[PAR]",
453 "The structure assignment for each residue and time is written to an",
454 "[TT].xpm[tt] matrix file. This file can be visualized with for instance",
455 "[TT]xv[tt] and can be converted to postscript with [TT]xpm2ps[tt].",
456 "Individual chains are separated by light grey lines in the [TT].xpm[tt] and",
458 "The number of residues with each secondary structure type and the",
459 "total secondary structure ([TT]-sss[tt]) count as a function of",
460 "time are also written to file ([TT]-sc[tt]).[PAR]",
461 "Solvent accessible surface (SAS) per residue can be calculated, both in",
462 "absolute values (A^2) and in fractions of the maximal accessible",
463 "surface of a residue. The maximal accessible surface is defined as",
464 "the accessible surface of a residue in a chain of glycines.",
465 "[BB]Note[bb] that the program [gmx-sas] can also compute SAS",
466 "and that is more efficient.[PAR]",
467 "Finally, this program can dump the secondary structure in a special file",
468 "[TT]ssdump.dat[tt] for usage in the program [gmx-chi]. Together",
469 "these two programs can be used to analyze dihedral properties as a",
470 "function of secondary structure type."
472 static gmx_bool bVerbose
;
473 static const char *ss_string
= "HEBT";
474 static int dsspVersion
= 2;
476 { "-v", FALSE
, etBOOL
, {&bVerbose
},
477 "HIDDENGenerate miles of useless information" },
478 { "-sss", FALSE
, etSTR
, {&ss_string
},
479 "Secondary structures for structure count"},
480 { "-ver", FALSE
, etINT
, {&dsspVersion
},
481 "DSSP major version. Syntax changed with version 2"}
486 FILE *ss
, *acc
, *fTArea
, *tmpf
;
487 const char *fnSCount
, *fnArea
, *fnTArea
, *fnAArea
;
488 const char *leg
[] = { "Phobic", "Phylic" };
493 int nres
, nr0
, naccr
, nres_plus_separators
;
494 gmx_bool
*bPhbres
, bDoAccSurf
;
496 int i
, j
, natoms
, nframe
= 0;
499 char *grpnm
, *ss_str
;
503 real
**accr
, *accr_ptr
= NULL
, *av_area
, *norm_av_area
;
504 char pdbfile
[32], tmpfile
[32], title
[256];
508 gmx_rmpbc_t gpbc
= NULL
;
511 { efTRX
, "-f", NULL
, ffREAD
},
512 { efTPS
, NULL
, NULL
, ffREAD
},
513 { efNDX
, NULL
, NULL
, ffOPTRD
},
514 { efDAT
, "-ssdump", "ssdump", ffOPTWR
},
515 { efMAP
, "-map", "ss", ffLIBRD
},
516 { efXPM
, "-o", "ss", ffWRITE
},
517 { efXVG
, "-sc", "scount", ffWRITE
},
518 { efXPM
, "-a", "area", ffOPTWR
},
519 { efXVG
, "-ta", "totarea", ffOPTWR
},
520 { efXVG
, "-aa", "averarea", ffOPTWR
}
522 #define NFILE asize(fnm)
524 if (!parse_common_args(&argc
, argv
,
525 PCA_CAN_TIME
| PCA_CAN_VIEW
| PCA_TIME_UNIT
| PCA_BE_NICE
,
526 NFILE
, fnm
, asize(pa
), pa
, asize(desc
), desc
, 0, NULL
, &oenv
))
530 fnSCount
= opt2fn("-sc", NFILE
, fnm
);
531 fnArea
= opt2fn_null("-a", NFILE
, fnm
);
532 fnTArea
= opt2fn_null("-ta", NFILE
, fnm
);
533 fnAArea
= opt2fn_null("-aa", NFILE
, fnm
);
534 bDoAccSurf
= (fnArea
|| fnTArea
|| fnAArea
);
536 read_tps_conf(ftp2fn(efTPS
, NFILE
, fnm
), title
, &top
, &ePBC
, &xp
, NULL
, box
, FALSE
);
537 atoms
= &(top
.atoms
);
539 bPhbres
= bPhobics(atoms
);
541 get_index(atoms
, ftp2fn_null(efNDX
, NFILE
, fnm
), 1, &gnx
, &index
, &grpnm
);
544 for (i
= 0; (i
< gnx
); i
++)
546 if (atoms
->atom
[index
[i
]].resind
!= nr0
)
548 nr0
= atoms
->atom
[index
[i
]].resind
;
552 fprintf(stderr
, "There are %d residues in your selected group\n", nres
);
554 strcpy(pdbfile
, "ddXXXXXX");
556 if ((tmpf
= fopen(pdbfile
, "w")) == NULL
)
558 sprintf(pdbfile
, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR
, DIR_SEPARATOR
);
560 if ((tmpf
= fopen(pdbfile
, "w")) == NULL
)
562 gmx_fatal(FARGS
, "Can not open tmp file %s", pdbfile
);
570 strcpy(tmpfile
, "ddXXXXXX");
572 if ((tmpf
= fopen(tmpfile
, "w")) == NULL
)
574 sprintf(tmpfile
, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR
, DIR_SEPARATOR
);
576 if ((tmpf
= fopen(tmpfile
, "w")) == NULL
)
578 gmx_fatal(FARGS
, "Can not open tmp file %s", tmpfile
);
586 if ((dptr
= getenv("DSSP")) == NULL
)
588 dptr
= "/usr/local/bin/dssp";
590 if (!gmx_fexist(dptr
))
592 gmx_fatal(FARGS
, "DSSP executable (%s) does not exist (use setenv DSSP)",
595 if (dsspVersion
>= 2)
599 printf("\nWARNING: You use DSSP version %d, which is not explicitly\nsupported by do_dssp. Assuming version 2 syntax.\n\n", dsspVersion
);
602 sprintf(dssp
, "%s -i %s -o %s > /dev/null %s",
603 dptr
, pdbfile
, tmpfile
, bVerbose
? "" : "2> /dev/null");
607 sprintf(dssp
, "%s %s %s %s > /dev/null %s",
608 dptr
, bDoAccSurf
? "" : "-na", pdbfile
, tmpfile
, bVerbose
? "" : "2> /dev/null");
611 fprintf(stderr
, "dssp cmd='%s'\n", dssp
);
615 fTArea
= xvgropen(fnTArea
, "Solvent Accessible Surface Area",
616 output_env_get_xvgr_tlabel(oenv
), "Area (nm\\S2\\N)", oenv
);
617 xvgr_legend(fTArea
, 2, leg
, oenv
);
625 mat
.nmap
= readcmap(opt2fn("-map", NFILE
, fnm
), &(mat
.map
));
627 natoms
= read_first_x(oenv
, &status
, ftp2fn(efTRX
, NFILE
, fnm
), &t
, &x
, box
);
628 if (natoms
> atoms
->nr
)
630 gmx_fatal(FARGS
, "\nTrajectory does not match topology!");
634 gmx_fatal(FARGS
, "\nTrajectory does not match selected group!");
637 snew(average_area
, atoms
->nres
);
638 snew(av_area
, atoms
->nres
);
639 snew(norm_av_area
, atoms
->nres
);
643 gpbc
= gmx_rmpbc_init(&top
.idef
, ePBC
, natoms
);
646 t
= output_env_conv_time(oenv
, t
);
647 if (bDoAccSurf
&& nframe
>= naccr
)
651 for (i
= naccr
-10; i
< naccr
; i
++)
653 snew(accr
[i
], 2*atoms
->nres
-1);
656 gmx_rmpbc(gpbc
, natoms
, box
, x
);
657 tapein
= gmx_ffopen(pdbfile
, "w");
658 write_pdbfile_indexed(tapein
, NULL
, atoms
, x
, ePBC
, box
, ' ', -1, gnx
, index
, NULL
, TRUE
);
662 printf("Warning-- No calls to system(3) supported on this platform.");
663 printf("Warning-- Skipping execution of 'system(\"%s\")'.", dssp
);
666 if (0 != system(dssp
))
668 gmx_fatal(FARGS
, "Failed to execute command: %s\n",
669 "Try specifying your dssp version with the -ver option.", dssp
);
673 /* strip_dssp returns the number of lines found in the dssp file, i.e.
674 * the number of residues plus the separator lines */
678 accr_ptr
= accr
[nframe
];
681 nres_plus_separators
= strip_dssp(tmpfile
, nres
, bPhbres
, t
,
682 accr_ptr
, fTArea
, &mat
, average_area
, oenv
);
687 while (read_next_x(oenv
, status
, &t
, x
, box
));
688 fprintf(stderr
, "\n");
694 gmx_rmpbc_done(gpbc
);
696 prune_ss_legend(&mat
);
698 ss
= opt2FILE("-o", NFILE
, fnm
, "w");
700 write_xpm_m(ss
, mat
);
703 if (opt2bSet("-ssdump", NFILE
, fnm
))
705 ss
= opt2FILE("-ssdump", NFILE
, fnm
, "w");
706 snew(ss_str
, nres
+1);
707 fprintf(ss
, "%d\n", nres
);
708 for (j
= 0; j
< mat
.nx
; j
++)
710 for (i
= 0; (i
< mat
.ny
); i
++)
712 ss_str
[i
] = mat
.map
[mat
.matrix
[j
][i
]].code
.c1
;
715 fprintf(ss
, "%s\n", ss_str
);
720 analyse_ss(fnSCount
, &mat
, ss_string
, oenv
);
724 write_sas_mat(fnArea
, accr
, nframe
, nres_plus_separators
, &mat
);
726 for (i
= 0; i
< atoms
->nres
; i
++)
728 av_area
[i
] = (average_area
[i
] / (real
)nframe
);
731 norm_acc(atoms
, nres
, av_area
, norm_av_area
);
735 acc
= xvgropen(fnAArea
, "Average Accessible Area",
736 "Residue", "A\\S2", oenv
);
737 for (i
= 0; (i
< nres
); i
++)
739 fprintf(acc
, "%5d %10g %10g\n", i
+1, av_area
[i
], norm_av_area
[i
]);
745 view_all(oenv
, NFILE
, fnm
);