tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / Bio / Tools / Phylo / Molphy.pm
blobc59d318ec8ef4db922a8d855690deb292a1de258
1 # $Id$
3 # BioPerl module for Bio::Tools::Phylo::Molphy
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Jason Stajich <jason-at-bioperl.org>
9 # Copyright Jason Stajich
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::Tools::Phylo::Molphy - parser for Molphy output
19 =head1 SYNOPSIS
21 use Bio::Tools::Phylo::Molphy;
22 my $parser = Bio::Tools::Phylo::Molphy->new(-file => 'output.protml');
23 while( my $r = $parser->next_result ) {
24 # r is a Bio::Tools::Phylo::Molphy::Result object
26 # print the model name
27 print $r->model, "\n";
29 # get the substitution matrix
30 # this is a hash of 3letter aa codes -> 3letter aa codes representing
31 # substitution rate
32 my $smat = $r->substitution_matrix;
33 print "Arg -> Gln substitution rate is %d\n",
34 $smat->{'Arg'}->{'Gln'}, "\n";
36 # get the transition probablity matrix
37 # this is a hash of 3letter aa codes -> 3letter aa codes representing
38 # transition probabilty
39 my $tmat = $r->transition_probability_matrix;
40 print "Arg -> Gln transition probablity is %.2f\n",
41 $tmat->{'Arg'}->{'Gln'}, "\n";
43 # get the frequency for each of the residues
44 my $rfreqs = $r->residue_frequencies;
46 foreach my $residue ( keys %{$rfreqs} ) {
47 printf "residue %s expected freq: %.2f observed freq: %.2f\n",
48 $residue,$rfreqs->{$residue}->[0], $rfreqs->{$residue}->[1];
51 my @trees;
52 while( my $t = $r->next_tree ) {
53 push @trees, $t;
56 print "search space is ", $r->search_space, "\n",
57 "1st tree score is ", $trees[0]->score, "\n";
59 # writing to STDOUT, use -file => '>filename' to specify a file
60 my $out = Bio::TreeIO->new(-format => "newick");
61 $out->write_tree($trees[0]); # writing only the 1st tree
64 =head1 DESCRIPTION
66 A parser for Molphy output (protml,dnaml)
68 =head1 FEEDBACK
70 =head2 Mailing Lists
72 User feedback is an integral part of the evolution of this and other
73 Bioperl modules. Send your comments and suggestions preferably to
74 the Bioperl mailing list. Your participation is much appreciated.
76 bioperl-l@bioperl.org - General discussion
77 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
79 =head2 Support
81 Please direct usage questions or support issues to the mailing list:
83 I<bioperl-l@bioperl.org>
85 rather than to the module maintainer directly. Many experienced and
86 reponsive experts will be able look at the problem and quickly
87 address it. Please include a thorough description of the problem
88 with code and data examples if at all possible.
90 =head2 Reporting Bugs
92 Report bugs to the Bioperl bug tracking system to help us keep track
93 of the bugs and their resolution. Bug reports can be submitted via the
94 web:
96 http://bugzilla.open-bio.org/
98 =head1 AUTHOR - Jason Stajich
100 Email jason-at-bioperl.org
102 =head1 APPENDIX
104 The rest of the documentation details each of the object methods.
105 Internal methods are usually preceded with a _
107 =cut
110 # Let the code begin...
113 package Bio::Tools::Phylo::Molphy;
114 use strict;
116 use Bio::Tools::Phylo::Molphy::Result;
117 use Bio::TreeIO;
118 use IO::String;
120 use base qw(Bio::Root::Root Bio::Root::IO);
122 =head2 new
124 Title : new
125 Usage : my $obj = Bio::Tools::Phylo::Molphy->new();
126 Function: Builds a new Bio::Tools::Phylo::Molphy object
127 Returns : Bio::Tools::Phylo::Molphy
128 Args : -fh/-file => $val, # for initing input, see Bio::Root::IO
131 =cut
133 sub new {
134 my($class,@args) = @_;
136 my $self = $class->SUPER::new(@args);
137 $self->_initialize_io(@args);
139 return $self;
142 =head2 next_result
144 Title : next_result
145 Usage : my $r = $molphy->next_result
146 Function: Get the next result set from parser data
147 Returns : Bio::Tools::Phylo::Molphy::Result object
148 Args : none
151 =cut
153 sub next_result{
154 my ($self) = @_;
156 # A little statemachine for the parser here
157 my ($state,$transition_ct,
158 @transition_matrix, %transition_mat, @resloc,) = ( 0,0);
159 my ( %subst_matrix, @treelines, @treedata, %frequencies);
160 my ( $treenum,$possible_trees, $model);
161 my ($trans_type,$trans_amount);
162 my $parsed = 0;
163 while( defined ( $_ = $self->_readline()) ) {
164 $parsed = 1;
165 if( /^Relative Substitution Rate Matrix/ ) {
166 if( %subst_matrix ) {
167 $self->_pushback($_);
168 last;
170 $state = 0;
171 my ( @tempdata);
172 @resloc = ();
173 while( defined ($_ = $self->_readline) ) {
174 last if (/^\s+$/);
175 # remove leading/trailing spaces
176 s/^\s+//;
177 s/\s+$//;
178 my @data = split;
179 my $i = 0;
180 for my $l ( @data ) {
181 if( $l =~ /\D+/ ) {
182 push @resloc, $l;
184 $i++;
186 push @tempdata, \@data;
188 my $i = 0;
189 for my $row ( @tempdata ) {
190 my $j = 0;
191 for my $col ( @$row ) {
192 if( $i == $j ) {
193 # empty string for diagonals
194 $subst_matrix{$resloc[$i]}->{$resloc[$j]} = '';
195 } else {
196 $subst_matrix{$resloc[$i]}->{$resloc[$j]} = $col;
198 $j++;
200 $i++;
202 } elsif( /^Transition Probability Matrix/ ) {
203 if( /(1\.0e(5|7))\)\s+(\S+)/ ) {
204 $state = 1;
205 my $newtrans_type = "$3-$1";
206 $trans_amount = $1;
207 if( defined $trans_type ) {
208 # finish processing the transition_matrix
209 my $i =0;
210 foreach my $row ( @transition_matrix ) {
211 my $j = 0;
212 foreach my $col ( @$row ) {
213 $transition_mat{$trans_type}->{$resloc[$i]}->{$resloc[$j]} = $col;
214 $j++;
216 $i++;
219 $trans_type = $newtrans_type;
220 $transition_ct = 0;
221 @transition_matrix = ();
223 } elsif ( /Acid Frequencies/ ) {
224 $state = 0;
225 $self->_readline(); # skip the next line
226 while( defined( $_ = $self->_readline) ) {
227 unless( /^\s+/) {
228 $self->_pushback($_);
229 last;
231 s/^\s+//;
232 s/\s+$//;
233 my ($index,$res,$model,$data) = split;
234 $frequencies{$res} = [ $model,$data];
236 } elsif( /^(\d+)\s*\/\s*(\d+)\s+(.+)\s+model/ ) {
237 my @save = ($1,$2,$3);
238 # finish processing the transition_matrix
239 my $i =0;
240 foreach my $row ( @transition_matrix ) {
241 my $j = 0;
242 foreach my $col ( @$row ) {
243 $transition_mat{$trans_type}->{$resloc[$i]}->{$resloc[$j]} = $col;
244 $j++;
246 $i++;
248 if( defined $treenum ) {
249 $self->_pushback($_);
250 last;
253 $state = 2;
254 ($treenum,$possible_trees, $model) = @save;
255 $model =~ s/\s+/ /g;
256 } elsif( $state == 1 ) {
257 next if( /^\s+$/ || /^\s+Ala/);
258 s/^\s+//;
259 s/\s+$//;
260 if( $trans_type eq '1PAM-1.0e7' ) {
261 # because the matrix is split up into 2-10 column sets
262 push @{$transition_matrix[$transition_ct++]}, split ;
263 $transition_ct = 0 if $transition_ct % 20 == 0;
264 } elsif( $trans_type eq '1PAM-1.0e5' ) {
265 # because the matrix is split up into 2-10 column sets
266 my ($res,@row) = split;
267 next if $transition_ct >= 20; # skip last
268 push @{$transition_matrix[$transition_ct++]}, @row;
270 } elsif( $state == 2 ) {
271 if( s/^(\d+)\s+(\-?\d+(\.\d+)?)\s+// ) {
272 push @treedata, [ $1,$2];
274 # save this for the end so that we can
275 # be efficient and only open one tree parser
276 push @treelines, $_;
279 # waiting till the end to do this, is it better
280 my @trees;
281 if( @treelines ) {
282 my $strdat = IO::String->new(join('',@treelines));
283 my $treeio = Bio::TreeIO->new(-fh => $strdat,
284 -format => 'newick');
285 while( my $tree = $treeio->next_tree ) {
286 if( @treedata ) {
287 my $dat = shift @treedata;
288 # set the associated information
289 $tree->id($dat->[0]);
290 $tree->score($dat->[1]);
292 push @trees, $tree;
295 return unless( $parsed );
296 my $result = Bio::Tools::Phylo::Molphy::Result->new
297 (-trees => \@trees,
298 -substitution_matrix => \%subst_matrix,
299 -frequencies => \%frequencies,
300 -model => $model,
301 -search_space => $possible_trees,
303 while( my ($type,$mat) = each %transition_mat ) {
304 $result->transition_probability_matrix( $type,$mat);
306 $result;