2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2012,2013,2014,2015,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
48 #include "gromacs/commandline/pargs.h"
49 #include "gromacs/commandline/viewit.h"
50 #include "gromacs/fileio/confio.h"
51 #include "gromacs/fileio/matio.h"
52 #include "gromacs/fileio/pdbio.h"
53 #include "gromacs/fileio/trxio.h"
54 #include "gromacs/fileio/xvgr.h"
55 #include "gromacs/gmxana/gmx_ana.h"
56 #include "gromacs/gmxana/gstat.h"
57 #include "gromacs/pbcutil/rmpbc.h"
58 #include "gromacs/topology/index.h"
59 #include "gromacs/topology/topology.h"
60 #include "gromacs/utility/arraysize.h"
61 #include "gromacs/utility/cstringutil.h"
62 #include "gromacs/utility/dir_separator.h"
63 #include "gromacs/utility/fatalerror.h"
64 #include "gromacs/utility/futil.h"
65 #include "gromacs/utility/smalloc.h"
66 #include "gromacs/utility/strdb.h"
68 #if GMX_NATIVE_WINDOWS
69 # define NULL_DEVICE "nul"
71 # define pclose _pclose
73 # define NULL_DEVICE "/dev/null"
76 struct DsspInputStrings
83 static const char* prepareToRedirectStdout(bool bVerbose
)
85 return bVerbose
? "" : "2>" NULL_DEVICE
;
88 static void printDsspResult(char* dssp
, const DsspInputStrings
& strings
, const std::string
& redirectionString
)
90 #if HAVE_PIPES || GMX_NATIVE_WINDOWS
91 sprintf(dssp
, "%s -i %s %s", strings
.dptr
.c_str(), strings
.pdbfile
.c_str(), redirectionString
.c_str());
93 sprintf(dssp
, "%s -i %s -o %s > %s %s", strings
.dptr
.c_str(), strings
.pdbfile
.c_str(),
94 strings
.tmpfile
.c_str(), NULL_DEVICE
, redirectionString
.c_str());
99 static int strip_dssp(FILE* tapeout
,
101 const gmx_bool bPhobres
[],
107 const gmx_output_env_t
* oenv
)
109 static gmx_bool bFirst
= TRUE
;
111 char buf
[STRLEN
+ 1];
113 int nr
, iacc
, nresidues
;
114 int naccf
, naccb
; /* Count hydrophobic and hydrophilic residues */
121 fgets2(buf
, STRLEN
, tapeout
);
122 } while (std::strstr(buf
, "KAPPA") == nullptr);
125 /* Since we also have empty lines in the dssp output (temp) file,
126 * and some line content is saved to the ssbuf variable,
127 * we need more memory than just nres elements. To be shure,
128 * we allocate 2*nres-1, since for each chain there is a
129 * separating line in the temp file. (At most each residue
130 * could have been defined as a separate chain.) */
131 snew(ssbuf
, 2 * nres
- 1);
138 for (nr
= 0; (fgets2(buf
, STRLEN
, tapeout
) != nullptr); nr
++)
140 if (buf
[13] == '!') /* Chain separator line has '!' at pos. 13 */
142 SSTP
= '='; /* Chain separator sign '=' */
146 SSTP
= buf
[16] == ' ' ? '~' : buf
[16];
152 /* Only calculate solvent accessible area if needed */
153 if ((nullptr != acc
) && (buf
[13] != '!'))
155 sscanf(&(buf
[34]), "%d", &iacc
);
157 /* average_area and bPhobres are counted from 0...nres-1 */
158 average_area
[nresidues
] += iacc
;
159 if (bPhobres
[nresidues
])
169 /* Keep track of the residue number (do not count chain separator lines '!') */
179 fprintf(stderr
, "%d residues were classified as hydrophobic and %d as hydrophilic.\n",
183 mat
->title
= "Secondary structure";
185 mat
->label_x
= output_env_get_time_label(oenv
);
186 mat
->label_y
= "Residue";
187 mat
->bDiscrete
= true;
189 mat
->axis_y
.resize(nr
);
190 std::iota(mat
->axis_y
.begin(), mat
->axis_y
.end(), 1);
191 mat
->axis_x
.resize(0);
192 mat
->matrix
.resize(1, 1);
195 mat
->axis_x
.push_back(t
);
196 mat
->matrix
.resize(mat
->matrix
.extent(0), nr
);
197 mat
->nx
= mat
->matrix
.extent(0);
198 auto columnIndex
= mat
->nx
- 1;
199 for (int i
= 0; i
< nr
; i
++)
201 t_xpmelmt c
= { ssbuf
[i
], 0 };
202 mat
->matrix(columnIndex
, i
) = std::max(static_cast<t_matelmt
>(0), searchcmap(mat
->map
, c
));
207 fprintf(fTArea
, "%10g %10g %10g\n", t
, 0.01 * iaccb
, 0.01 * iaccf
);
210 /* Return the number of lines found in the dssp file (i.e. number
211 * of redidues plus chain separator lines).
212 * This is the number of y elements needed for the area xpm file */
216 static gmx_bool
* bPhobics(t_atoms
* atoms
)
222 char surffn
[] = "surface.dat";
223 char ** surf_res
, **surf_lines
;
226 nb
= get_lines("phbres.dat", &cb
);
227 snew(bb
, atoms
->nres
);
229 n_surf
= get_lines(surffn
, &surf_lines
);
230 snew(surf_res
, n_surf
);
231 for (i
= 0; (i
< n_surf
); i
++)
233 snew(surf_res
[i
], 5);
234 sscanf(surf_lines
[i
], "%s", surf_res
[i
]);
238 for (i
= 0, j
= 0; (i
< atoms
->nres
); i
++)
240 if (-1 != search_str(n_surf
, surf_res
, *atoms
->resinfo
[i
].name
))
242 bb
[j
++] = (-1 != search_str(nb
, cb
, *atoms
->resinfo
[i
].name
));
249 "Not all residues were recognized (%d from %d), the result may be inaccurate!\n", j
, i
);
252 for (i
= 0; (i
< n_surf
); i
++)
261 static void check_oo(t_atoms
* atoms
)
267 OOO
= gmx_strdup("O");
269 for (i
= 0; (i
< atoms
->nr
); i
++)
271 if ((std::strcmp(*(atoms
->atomname
[i
]), "OXT") == 0)
272 || (std::strcmp(*(atoms
->atomname
[i
]), "O1") == 0)
273 || (std::strcmp(*(atoms
->atomname
[i
]), "OC1") == 0))
275 *atoms
->atomname
[i
] = OOO
;
280 static void norm_acc(t_atoms
* atoms
, int nres
, const real av_area
[], real norm_av_area
[])
284 char surffn
[] = "surface.dat";
285 char ** surf_res
, **surf_lines
;
288 n_surf
= get_lines(surffn
, &surf_lines
);
290 snew(surf_res
, n_surf
);
291 for (i
= 0; (i
< n_surf
); i
++)
293 snew(surf_res
[i
], 5);
294 sscanf(surf_lines
[i
], "%s %lf", surf_res
[i
], &surf
[i
]);
297 for (i
= 0; (i
< nres
); i
++)
299 n
= search_str(n_surf
, surf_res
, *atoms
->resinfo
[i
].name
);
302 norm_av_area
[i
] = av_area
[i
] / surf
[n
];
306 fprintf(stderr
, "Residue %s not found in surface database (%s)\n",
307 *atoms
->resinfo
[i
].name
, surffn
);
312 static void prune_ss_legend(t_matrix
* mat
)
314 std::vector
<bool> isPresent(mat
->map
.size());
315 std::vector
<int> newnum(mat
->map
.size());
316 std::vector
<t_mapping
> newmap
;
318 for (int f
= 0; f
< mat
->nx
; f
++)
320 for (int r
= 0; r
< mat
->ny
; r
++)
322 isPresent
[mat
->matrix(f
, r
)] = true;
326 for (size_t i
= 0; i
< mat
->map
.size(); i
++)
331 newnum
[i
] = newmap
.size();
332 newmap
.emplace_back(mat
->map
[i
]);
335 if (newmap
.size() != mat
->map
.size())
337 std::swap(mat
->map
, newmap
);
338 for (int f
= 0; f
< mat
->nx
; f
++)
340 for (int r
= 0; r
< mat
->ny
; r
++)
342 mat
->matrix(f
, r
) = newnum
[mat
->matrix(f
, r
)];
348 static void write_sas_mat(const char* fn
, real
** accr
, int nframe
, int nres
, t_matrix
* mat
)
352 t_rgb rlo
= { 1, 1, 1 }, rhi
= { 0, 0, 0 };
357 hi
= lo
= accr
[0][0];
358 for (i
= 0; i
< nframe
; i
++)
360 for (j
= 0; j
< nres
; j
++)
362 lo
= std::min(lo
, accr
[i
][j
]);
363 hi
= std::max(hi
, accr
[i
][j
]);
366 fp
= gmx_ffopen(fn
, "w");
367 nlev
= static_cast<int>(hi
- lo
+ 1);
368 write_xpm(fp
, 0, "Solvent Accessible Surface", "Surface (A^2)", "Time", "Residue Index",
369 nframe
, nres
, mat
->axis_x
.data(), mat
->axis_y
.data(), accr
, lo
, hi
, rlo
, rhi
, &nlev
);
374 static void analyse_ss(const char* outfile
, t_matrix
* mat
, const char* ss_string
, const gmx_output_env_t
* oenv
)
377 int ss_count
, total_count
;
379 gmx::ArrayRef
<t_mapping
> map
= mat
->map
;
380 std::vector
<int> count(map
.size());
381 std::vector
<int> total(map
.size(), 0);
382 // This copying would not be necessary if xvgr_legend could take a
383 // view of string views
384 std::vector
<std::string
> leg
;
385 leg
.reserve(map
.size() + 1);
386 leg
.emplace_back("Structure");
387 for (const auto& m
: map
)
389 leg
.emplace_back(m
.desc
);
392 fp
= xvgropen(outfile
, "Secondary Structure", output_env_get_xvgr_tlabel(oenv
),
393 "Number of Residues", oenv
);
394 if (output_env_get_print_xvgr_codes(oenv
))
396 fprintf(fp
, "@ subtitle \"Structure = ");
398 for (size_t s
= 0; s
< std::strlen(ss_string
); s
++)
404 for (const auto& m
: map
)
406 if (ss_string
[s
] == m
.code
.c1
)
408 fprintf(fp
, "%s", m
.desc
);
413 xvgrLegend(fp
, leg
, oenv
);
416 for (int f
= 0; f
< mat
->nx
; f
++)
419 for (auto& c
: count
)
423 for (int r
= 0; r
< mat
->ny
; r
++)
425 count
[mat
->matrix(f
, r
)]++;
426 total
[mat
->matrix(f
, r
)]++;
428 for (gmx::index s
= 0; s
!= gmx::ssize(map
); ++s
)
430 if (std::strchr(ss_string
, map
[s
].code
.c1
))
432 ss_count
+= count
[s
];
433 total_count
+= count
[s
];
436 fprintf(fp
, "%8g %5d", mat
->axis_x
[f
], ss_count
);
437 for (const auto& c
: count
)
439 fprintf(fp
, " %5d", c
);
443 /* now print column totals */
444 fprintf(fp
, "%-8s %5d", "# Totals", total_count
);
445 for (const auto& t
: total
)
447 fprintf(fp
, " %5d", t
);
451 /* now print probabilities */
452 fprintf(fp
, "%-8s %5.2f", "# SS pr.", total_count
/ static_cast<real
>(mat
->nx
* mat
->ny
));
453 for (const auto& t
: total
)
455 fprintf(fp
, " %5.2f", t
/ static_cast<real
>(mat
->nx
* mat
->ny
));
462 int gmx_do_dssp(int argc
, char* argv
[])
464 const char* desc
[] = {
466 "reads a trajectory file and computes the secondary structure for",
468 "calling the dssp program. If you do not have the dssp program,",
469 "get it from http://swift.cmbi.ru.nl/gv/dssp. [THISMODULE] assumes ",
470 "that the dssp executable is located in ",
471 "[TT]/usr/local/bin/dssp[tt]. If this is not the case, then you should",
472 "set an environment variable [TT]DSSP[tt] pointing to the dssp",
473 "executable, e.g.: [PAR]",
474 "[TT]setenv DSSP /opt/dssp/bin/dssp[tt][PAR]",
475 "Since version 2.0.0, dssp is invoked with a syntax that differs",
476 "from earlier versions. If you have an older version of dssp,",
477 "use the [TT]-ver[tt] option to direct do_dssp to use the older syntax.",
478 "By default, do_dssp uses the syntax introduced with version 2.0.0.",
479 "Even newer versions (which at the time of writing are not yet released)",
480 "are assumed to have the same syntax as 2.0.0.[PAR]",
481 "The structure assignment for each residue and time is written to an",
482 "[REF].xpm[ref] matrix file. This file can be visualized with for instance",
483 "[TT]xv[tt] and can be converted to postscript with [TT]xpm2ps[tt].",
484 "Individual chains are separated by light grey lines in the [REF].xpm[ref] and",
486 "The number of residues with each secondary structure type and the",
487 "total secondary structure ([TT]-sss[tt]) count as a function of",
488 "time are also written to file ([TT]-sc[tt]).[PAR]",
489 "Solvent accessible surface (SAS) per residue can be calculated, both in",
490 "absolute values (A^2) and in fractions of the maximal accessible",
491 "surface of a residue. The maximal accessible surface is defined as",
492 "the accessible surface of a residue in a chain of glycines.",
493 "[BB]Note[bb] that the program [gmx-sas] can also compute SAS",
494 "and that is more efficient.[PAR]",
495 "Finally, this program can dump the secondary structure in a special file",
496 "[TT]ssdump.dat[tt] for usage in the program [gmx-chi]. Together",
497 "these two programs can be used to analyze dihedral properties as a",
498 "function of secondary structure type."
500 static gmx_bool bVerbose
;
501 static const char* ss_string
= "HEBT";
502 static int dsspVersion
= 2;
504 { "-v", FALSE
, etBOOL
, { &bVerbose
}, "HIDDENGenerate miles of useless information" },
505 { "-sss", FALSE
, etSTR
, { &ss_string
}, "Secondary structures for structure count" },
510 "DSSP major version. Syntax changed with version 2" }
514 FILE * tapein
, *tapeout
;
515 FILE * ss
, *acc
, *fTArea
, *tmpf
;
516 const char * fnSCount
, *fnArea
, *fnTArea
, *fnAArea
;
517 const char* leg
[] = { "Phobic", "Phylic" };
522 int nres
, nr0
, naccr
, nres_plus_separators
;
523 gmx_bool
* bPhbres
, bDoAccSurf
;
525 int natoms
, nframe
= 0;
526 matrix box
= { { 0 } };
532 real
** accr
, *accr_ptr
= nullptr, *av_area
, *norm_av_area
;
533 char pdbfile
[32], tmpfile
[32];
536 gmx_output_env_t
* oenv
;
537 gmx_rmpbc_t gpbc
= nullptr;
540 { efTRX
, "-f", nullptr, ffREAD
}, { efTPS
, nullptr, nullptr, ffREAD
},
541 { efNDX
, nullptr, nullptr, ffOPTRD
}, { efDAT
, "-ssdump", "ssdump", ffOPTWR
},
542 { efMAP
, "-map", "ss", ffLIBRD
}, { efXPM
, "-o", "ss", ffWRITE
},
543 { efXVG
, "-sc", "scount", ffWRITE
}, { efXPM
, "-a", "area", ffOPTWR
},
544 { efXVG
, "-ta", "totarea", ffOPTWR
}, { efXVG
, "-aa", "averarea", ffOPTWR
}
546 #define NFILE asize(fnm)
548 if (!parse_common_args(&argc
, argv
, PCA_CAN_TIME
| PCA_CAN_VIEW
| PCA_TIME_UNIT
, NFILE
, fnm
,
549 asize(pa
), pa
, asize(desc
), desc
, 0, nullptr, &oenv
))
553 fnSCount
= opt2fn("-sc", NFILE
, fnm
);
554 fnArea
= opt2fn_null("-a", NFILE
, fnm
);
555 fnTArea
= opt2fn_null("-ta", NFILE
, fnm
);
556 fnAArea
= opt2fn_null("-aa", NFILE
, fnm
);
557 bDoAccSurf
= ((fnArea
!= nullptr) || (fnTArea
!= nullptr) || (fnAArea
!= nullptr));
559 read_tps_conf(ftp2fn(efTPS
, NFILE
, fnm
), &top
, &pbcType
, &xp
, nullptr, box
, FALSE
);
560 atoms
= &(top
.atoms
);
562 bPhbres
= bPhobics(atoms
);
564 get_index(atoms
, ftp2fn_null(efNDX
, NFILE
, fnm
), 1, &gnx
, &index
, &grpnm
);
567 for (int i
= 0; (i
< gnx
); i
++)
569 if (atoms
->atom
[index
[i
]].resind
!= nr0
)
571 nr0
= atoms
->atom
[index
[i
]].resind
;
575 fprintf(stderr
, "There are %d residues in your selected group\n", nres
);
577 std::strcpy(pdbfile
, "ddXXXXXX");
579 if ((tmpf
= fopen(pdbfile
, "w")) == nullptr)
581 sprintf(pdbfile
, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR
, DIR_SEPARATOR
);
583 if ((tmpf
= fopen(pdbfile
, "w")) == nullptr)
585 gmx_fatal(FARGS
, "Can not open tmp file %s", pdbfile
);
590 std::strcpy(tmpfile
, "ddXXXXXX");
592 if ((tmpf
= fopen(tmpfile
, "w")) == nullptr)
594 sprintf(tmpfile
, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR
, DIR_SEPARATOR
);
596 if ((tmpf
= fopen(tmpfile
, "w")) == nullptr)
598 gmx_fatal(FARGS
, "Can not open tmp file %s", tmpfile
);
603 if ((dptr
= getenv("DSSP")) == nullptr)
605 dptr
= "/usr/local/bin/dssp";
607 if (!gmx_fexist(dptr
))
609 gmx_fatal(FARGS
, "DSSP executable (%s) does not exist (use setenv DSSP)", dptr
);
611 std::string redirectionString
;
612 redirectionString
= prepareToRedirectStdout(bVerbose
);
613 DsspInputStrings dsspStrings
;
614 dsspStrings
.dptr
= dptr
;
615 dsspStrings
.pdbfile
= pdbfile
;
616 dsspStrings
.tmpfile
= tmpfile
;
617 if (dsspVersion
>= 2)
621 printf("\nWARNING: You use DSSP version %d, which is not explicitly\nsupported by "
622 "do_dssp. Assuming version 2 syntax.\n\n",
626 printDsspResult(dssp
, dsspStrings
, redirectionString
);
632 dsspStrings
.dptr
.clear();
636 dsspStrings
.dptr
= "-na";
638 printDsspResult(dssp
, dsspStrings
, redirectionString
);
640 fprintf(stderr
, "dssp cmd='%s'\n", dssp
);
644 fTArea
= xvgropen(fnTArea
, "Solvent Accessible Surface Area",
645 output_env_get_xvgr_tlabel(oenv
), "Area (nm\\S2\\N)", oenv
);
646 xvgr_legend(fTArea
, 2, leg
, oenv
);
653 mat
.map
= readcmap(opt2fn("-map", NFILE
, fnm
));
655 natoms
= read_first_x(oenv
, &status
, ftp2fn(efTRX
, NFILE
, fnm
), &t
, &x
, box
);
656 if (natoms
> atoms
->nr
)
658 gmx_fatal(FARGS
, "\nTrajectory does not match topology!");
662 gmx_fatal(FARGS
, "\nTrajectory does not match selected group!");
665 snew(average_area
, atoms
->nres
);
666 snew(av_area
, atoms
->nres
);
667 snew(norm_av_area
, atoms
->nres
);
671 gpbc
= gmx_rmpbc_init(&top
.idef
, pbcType
, natoms
);
674 t
= output_env_conv_time(oenv
, t
);
675 if (bDoAccSurf
&& nframe
>= naccr
)
679 for (int i
= naccr
- 10; i
< naccr
; i
++)
681 snew(accr
[i
], 2 * atoms
->nres
- 1);
684 gmx_rmpbc(gpbc
, natoms
, box
, x
);
685 tapein
= gmx_ffopen(pdbfile
, "w");
686 write_pdbfile_indexed(tapein
, nullptr, atoms
, x
, pbcType
, box
, ' ', -1, gnx
, index
, nullptr, FALSE
);
688 /* strip_dssp returns the number of lines found in the dssp file, i.e.
689 * the number of residues plus the separator lines */
691 #if HAVE_PIPES || GMX_NATIVE_WINDOWS
692 if (nullptr == (tapeout
= popen(dssp
, "r")))
694 if (0 != system(dssp
) || nullptr == (tapeout
= gmx_ffopen(tmpfile
, "r")))
700 "Failed to execute command: %s\n"
701 "Try specifying your dssp version with the -ver option.",
706 accr_ptr
= accr
[nframe
];
708 /* strip_dssp returns the number of lines found in the dssp file, i.e.
709 * the number of residues plus the separator lines */
710 nres_plus_separators
=
711 strip_dssp(tapeout
, nres
, bPhbres
, t
, accr_ptr
, fTArea
, &mat
, average_area
, oenv
);
712 #if HAVE_PIPES || GMX_NATIVE_WINDOWS
715 gmx_ffclose(tapeout
);
720 } while (read_next_x(oenv
, status
, &t
, x
, box
));
721 fprintf(stderr
, "\n");
727 gmx_rmpbc_done(gpbc
);
729 prune_ss_legend(&mat
);
731 ss
= opt2FILE("-o", NFILE
, fnm
, "w");
733 write_xpm_m(ss
, mat
);
736 if (opt2bSet("-ssdump", NFILE
, fnm
))
738 ss
= opt2FILE("-ssdump", NFILE
, fnm
, "w");
739 fprintf(ss
, "%d\n", nres
);
740 for (gmx::index j
= 0; j
!= mat
.matrix
.extent(0); ++j
)
742 auto row
= mat
.matrix
.asView()[j
];
743 for (gmx::index i
= 0; i
!= row
.extent(0); ++i
)
745 fputc(mat
.map
[row
[i
]].code
.c1
, ss
);
751 analyse_ss(fnSCount
, &mat
, ss_string
, oenv
);
755 write_sas_mat(fnArea
, accr
, nframe
, nres_plus_separators
, &mat
);
757 for (int i
= 0; i
< atoms
->nres
; i
++)
759 av_area
[i
] = (average_area
[i
] / static_cast<real
>(nframe
));
762 norm_acc(atoms
, nres
, av_area
, norm_av_area
);
766 acc
= xvgropen(fnAArea
, "Average Accessible Area", "Residue", "A\\S2", oenv
);
767 for (int i
= 0; (i
< nres
); i
++)
769 fprintf(acc
, "%5d %10g %10g\n", i
+ 1, av_area
[i
], norm_av_area
[i
]);
775 view_all(oenv
, NFILE
, fnm
);