maint: remove Travis stuff which has been replaced with Github actions (#325)
[bioperl-live.git] / lib / Bio / Tools / Phylo / Molphy.pm
bloba7a4fbb90609909a660c979172146a64c7b91963
2 # BioPerl module for Bio::Tools::Phylo::Molphy
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason-at-bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Tools::Phylo::Molphy - parser for Molphy output
18 =head1 SYNOPSIS
20 use Bio::Tools::Phylo::Molphy;
21 my $parser = Bio::Tools::Phylo::Molphy->new(-file => 'output.protml');
22 while( my $r = $parser->next_result ) {
23 # r is a Bio::Tools::Phylo::Molphy::Result object
25 # print the model name
26 print $r->model, "\n";
28 # get the substitution matrix
29 # this is a hash of 3letter aa codes -> 3letter aa codes representing
30 # substitution rate
31 my $smat = $r->substitution_matrix;
32 print "Arg -> Gln substitution rate is %d\n",
33 $smat->{'Arg'}->{'Gln'}, "\n";
35 # get the transition probablity matrix
36 # this is a hash of 3letter aa codes -> 3letter aa codes representing
37 # transition probabilty
38 my $tmat = $r->transition_probability_matrix;
39 print "Arg -> Gln transition probablity is %.2f\n",
40 $tmat->{'Arg'}->{'Gln'}, "\n";
42 # get the frequency for each of the residues
43 my $rfreqs = $r->residue_frequencies;
45 foreach my $residue ( keys %{$rfreqs} ) {
46 printf "residue %s expected freq: %.2f observed freq: %.2f\n",
47 $residue,$rfreqs->{$residue}->[0], $rfreqs->{$residue}->[1];
50 my @trees;
51 while( my $t = $r->next_tree ) {
52 push @trees, $t;
55 print "search space is ", $r->search_space, "\n",
56 "1st tree score is ", $trees[0]->score, "\n";
58 # writing to STDOUT, use -file => '>filename' to specify a file
59 my $out = Bio::TreeIO->new(-format => "newick");
60 $out->write_tree($trees[0]); # writing only the 1st tree
63 =head1 DESCRIPTION
65 A parser for Molphy output (protml,dnaml)
67 =head1 FEEDBACK
69 =head2 Mailing Lists
71 User feedback is an integral part of the evolution of this and other
72 Bioperl modules. Send your comments and suggestions preferably to
73 the Bioperl mailing list. Your participation is much appreciated.
75 bioperl-l@bioperl.org - General discussion
76 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
78 =head2 Support
80 Please direct usage questions or support issues to the mailing list:
82 I<bioperl-l@bioperl.org>
84 rather than to the module maintainer directly. Many experienced and
85 reponsive experts will be able look at the problem and quickly
86 address it. Please include a thorough description of the problem
87 with code and data examples if at all possible.
89 =head2 Reporting Bugs
91 Report bugs to the Bioperl bug tracking system to help us keep track
92 of the bugs and their resolution. Bug reports can be submitted via the
93 web:
95 https://github.com/bioperl/bioperl-live/issues
97 =head1 AUTHOR - Jason Stajich
99 Email jason-at-bioperl.org
101 =head1 APPENDIX
103 The rest of the documentation details each of the object methods.
104 Internal methods are usually preceded with a _
106 =cut
109 # Let the code begin...
112 package Bio::Tools::Phylo::Molphy;
114 use strict;
116 use Bio::Tools::Phylo::Molphy::Result;
117 use Bio::TreeIO;
118 use IO::String;
120 use base qw(Bio::Root::Root Bio::Root::IO);
122 =head2 new
124 Title : new
125 Usage : my $obj = Bio::Tools::Phylo::Molphy->new();
126 Function: Builds a new Bio::Tools::Phylo::Molphy object
127 Returns : Bio::Tools::Phylo::Molphy
128 Args : -fh/-file => $val, # for initing input, see Bio::Root::IO
131 =cut
133 sub new {
134 my($class,@args) = @_;
136 my $self = $class->SUPER::new(@args);
137 $self->_initialize_io(@args);
139 return $self;
142 =head2 next_result
144 Title : next_result
145 Usage : my $r = $molphy->next_result
146 Function: Get the next result set from parser data
147 Returns : Bio::Tools::Phylo::Molphy::Result object
148 Args : none
151 =cut
153 sub next_result{
154 my ($self) = @_;
156 # A little statemachine for the parser here
157 my ($state,$transition_ct,
158 @transition_matrix, %transition_mat, @resloc,) = ( 0,0);
159 my ( %subst_matrix, @treelines, @treedata, %frequencies);
160 my ( $treenum,$possible_trees, $model);
161 my ($trans_type,$trans_amount);
162 my $parsed = 0;
163 while( defined ( $_ = $self->_readline()) ) {
164 $parsed = 1;
165 if( /^Relative Substitution Rate Matrix/ ) {
166 if( %subst_matrix ) {
167 $self->_pushback($_);
168 last;
170 $state = 0;
171 my ( @tempdata);
172 @resloc = ();
173 while( defined ($_ = $self->_readline) ) {
174 last if (/^\s+$/);
175 # remove leading/trailing spaces
176 s/^\s+//;
177 s/\s+$//;
178 my @data = split;
179 my $i = 0;
180 for my $l ( @data ) {
181 if( $l =~ /\D+/ ) {
182 push @resloc, $l;
184 $i++;
186 push @tempdata, \@data;
188 my $i = 0;
189 for my $row ( @tempdata ) {
190 my $j = 0;
191 for my $col ( @$row ) {
192 if( $i == $j ) {
193 # empty string for diagonals
194 $subst_matrix{$resloc[$i]}->{$resloc[$j]} = '';
195 } else {
196 $subst_matrix{$resloc[$i]}->{$resloc[$j]} = $col;
198 $j++;
200 $i++;
202 } elsif( /^Transition Probability Matrix/ ) {
203 if( /(1\.0e(5|7))\)\s+(\S+)/ ) {
204 $state = 1;
205 my $newtrans_type = "$3-$1";
206 $trans_amount = $1;
207 if( defined $trans_type ) {
208 # finish processing the transition_matrix
209 my $i =0;
210 foreach my $row ( @transition_matrix ) {
211 my $j = 0;
212 foreach my $col ( @$row ) {
213 $transition_mat{$trans_type}->{$resloc[$i]}->{$resloc[$j]} = $col;
214 $j++;
216 $i++;
219 $trans_type = $newtrans_type;
220 $transition_ct = 0;
221 @transition_matrix = ();
223 } elsif ( /Acid Frequencies/ ) {
224 $state = 0;
225 $self->_readline(); # skip the next line
226 while( defined( $_ = $self->_readline) ) {
227 unless( /^\s+/) {
228 $self->_pushback($_);
229 last;
231 s/^\s+//;
232 s/\s+$//;
233 my ($index,$res,$model,$data) = split;
234 $frequencies{$res} = [ $model,$data];
236 } elsif( /^(\d+)\s*\/\s*(\d+)\s+(.+)\s+model/ ) {
237 my @save = ($1,$2,$3);
238 # finish processing the transition_matrix
239 my $i =0;
240 foreach my $row ( @transition_matrix ) {
241 my $j = 0;
242 foreach my $col ( @$row ) {
243 $transition_mat{$trans_type}->{$resloc[$i]}->{$resloc[$j]} = $col;
244 $j++;
246 $i++;
248 if( defined $treenum ) {
249 $self->_pushback($_);
250 last;
253 $state = 2;
254 ($treenum,$possible_trees, $model) = @save;
255 $model =~ s/\s+/ /g;
256 } elsif( $state == 1 ) {
257 next if( /^\s+$/ || /^\s+Ala/);
258 s/^\s+//;
259 s/\s+$//;
260 if( $trans_type eq '1PAM-1.0e7' ) {
261 # because the matrix is split up into 2-10 column sets
262 push @{$transition_matrix[$transition_ct++]}, split ;
263 $transition_ct = 0 if $transition_ct % 20 == 0;
264 } elsif( $trans_type eq '1PAM-1.0e5' ) {
265 # because the matrix is split up into 2-10 column sets
266 my ($res,@row) = split;
267 next if $transition_ct >= 20; # skip last
268 push @{$transition_matrix[$transition_ct++]}, @row;
270 } elsif( $state == 2 ) {
271 if( s/^(\d+)\s+(\-?\d+(\.\d+)?)\s+// ) {
272 push @treedata, [ $1,$2];
274 # save this for the end so that we can
275 # be efficient and only open one tree parser
276 push @treelines, $_;
279 # waiting till the end to do this, is it better
280 my @trees;
281 if( @treelines ) {
282 my $strdat = IO::String->new(join('',@treelines));
283 my $treeio = Bio::TreeIO->new(-fh => $strdat,
284 -format => 'newick');
285 while( my $tree = $treeio->next_tree ) {
286 if( @treedata ) {
287 my $dat = shift @treedata;
288 # set the associated information
289 $tree->id($dat->[0]);
290 $tree->score($dat->[1]);
292 push @trees, $tree;
295 return unless( $parsed );
296 my $result = Bio::Tools::Phylo::Molphy::Result->new
297 (-trees => \@trees,
298 -substitution_matrix => \%subst_matrix,
299 -frequencies => \%frequencies,
300 -model => $model,
301 -search_space => $possible_trees,
303 while( my ($type,$mat) = each %transition_mat ) {
304 $result->transition_probability_matrix( $type,$mat);
306 $result;