bug 2549; fixed small bug in Bio::Taxon which doesn't catch -common_name
[bioperl-live.git] / Bio / TreeIO / pag.pm
blob3e189010a6a2665aa7378a0b461b2673f77c6b46
1 # $Id$
3 # BioPerl module for Bio::TreeIO::pag
5 # Cared for by Jason Stajich <jason-at-bioperl-dot-org>
7 # Copyright Jason Stajich
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::TreeIO::pag - Bio::TreeIO driver for Pagel format
17 =head1 SYNOPSIS
19 use Bio::TreeIO;
20 my $in = Bio::TreeIO->new(-format => 'nexus',
21 -file => 't/data/adh.mb_tree.nexus');
23 my $out = Bio::TreeIO->new(-format => 'pag');
24 while( my $tree = $in->next_tree ) {
25 $out->write_tree($tree);
28 =head1 DESCRIPTION
30 Convert a Bio::TreeIO to Pagel format.
31 More information here http://sapc34.rdg.ac.uk/meade/Mark/
33 =head1 FEEDBACK
35 =head2 Mailing Lists
37 User feedback is an integral part of the evolution of this and other
38 Bioperl modules. Send your comments and suggestions preferably to
39 the Bioperl mailing list. Your participation is much appreciated.
41 bioperl-l@bioperl.org - General discussion
42 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
44 =head2 Reporting Bugs
46 Report bugs to the Bioperl bug tracking system to help us keep track
47 of the bugs and their resolution. Bug reports can be submitted via
48 the web:
50 http://bugzilla.open-bio.org/
52 =head1 AUTHOR - Jason Stajich
54 Email jason-at-bioperl-dot-org
56 =head1 APPENDIX
58 The rest of the documentation details each of the object methods.
59 Internal methods are usually preceded with a _
61 =cut
64 # Let the code begin...
67 package Bio::TreeIO::pag;
68 use strict;
70 our $TaxonNameLen = 10;
72 use base qw(Bio::TreeIO);
74 =head2 new
76 Title : new
77 Usage : my $obj = Bio::TreeIO::pag->new();
78 Function: Builds a new Bio::TreeIO::pag object
79 Returns : an instance of Bio::TreeIO::pag
80 Args : -file/-fh for filename or filehandles
81 -name_length for minimum name length (default = 10)
83 =cut
85 sub _initialize {
86 my $self = shift;
87 $self->SUPER::_initialize(@_);
88 my ( $name_length ) = $self->_rearrange(
90 qw(NAME_LENGTH)
94 $self->name_length( defined $name_length ? $name_length : $TaxonNameLen );
97 =head2 write_tree
99 Title : write_tree
100 Usage :
101 Function: Write a tree out in Pagel format
102 Some options are only appropriate for bayesianmultistate and
103 the simpler output is only proper for discrete
104 Returns : none
105 Args : -no_outgroups => (number)
106 -print_header => 0/1 (leave 0 for discrete, 1 for bayesianms)
107 -special_node => special node - not sure what they wanted to do here
108 -keep_outgroup => 0/1 (keep the outgroup node in the output)
109 -outgroup_ancestor => Bio::Tree::Node (if we want to exclude or include the outgroup this is what we operate on)
110 -tree_no => a tree number label - only useful for BayesianMultistate
113 =cut
115 sub write_tree {
116 my ($self,$tree,@args) = @_;
117 my ($keep_outgroup,
118 $print_header,
119 $no_outgroups,
120 $special_node,
121 $outgroup_ancestor,
122 $tree_no) = (0,0,1);
123 my $name_len = $self->name_length;
124 if( @args ) {
125 ($no_outgroups,
126 $print_header,
127 $special_node,
128 $outgroup_ancestor,
129 $tree_no,
130 $keep_outgroup) = $self->_rearrange([qw(
131 NO_OUTGROUPS
132 PRINT_HEADER
133 SPECIAL_NODE
134 OUTGROUP_ANCESTOR
135 TREE_NO
136 KEEP_OUTGROUP
137 NAME_LENGTH)],@args);
139 my $newname_base = 1;
141 my $root = $tree->get_root_node;
142 my $eps = 0.0001;
143 my (%chars,%names);
144 my @nodes = $tree->get_nodes;
145 my $species_ct;
146 my $traitct;
147 for my $node ( @nodes ) {
148 if ((defined $special_node) && ($node eq $special_node)) {
149 my $no_of_tree_nodes = scalar(@nodes);
150 my $node_name = sprintf("N%d",$no_of_tree_nodes+1);
151 $names{$node->internal_id} = $node_name;
153 } elsif ($node->is_Leaf) {
154 $species_ct++;
156 my $node_name = $node->id;
157 if( length($node_name)> $name_len ) {
158 $self->warn( "Found a taxon name longer than $name_len letters, \n",
159 "name will be abbreviated.\n");
160 $node_name = substr($node_name, 0,$name_len);
161 } else {
162 # $node_name = sprintf("%-".$TaxonNameLen."s",$node_name);
164 $names{$node->internal_id} = $node_name;
165 my @tags = sort $node->get_all_tags;
166 my @charstates = map { ($node->get_tag_values($_))[0] } @tags;
167 $traitct = scalar @charstates unless defined $traitct;
168 $chars{$node->internal_id} = [@charstates];
169 } else {
170 $names{$node->internal_id} = sprintf("N%d", $newname_base++);
174 # generate PAG representation
175 if( $print_header ) {
176 if ($keep_outgroup) {
177 $self->_print(sprintf("%d %d\n",$species_ct,$traitct));
178 } else {
179 $self->_print( sprintf("%d %d\n",$species_ct-$no_outgroups,$traitct));
183 my @ancestors = ();
184 if ($keep_outgroup) {
185 push @ancestors, $root;
186 } else {
187 push @ancestors, ( $root, $outgroup_ancestor);
189 my @rest;
190 foreach my $node (@nodes) {
191 my $i = 0;
192 foreach my $anc (@ancestors) {
193 if ($anc && $node eq $anc) { $i = 1; last }
195 unless ($i > 0) { # root not given in PAG
196 my $current_name = $names{$node->internal_id};
197 my $branch_length_to_output;
198 if ($node->branch_length < $eps) {
199 my $msg_nodename = $current_name;
200 $msg_nodename =~ s/\s+$//;
201 warn( "TREE $tree_no, node \"$msg_nodename\": branch too ",
202 "short (", $node->branch_length, "): increasing length to ",
203 "$eps\n");
204 $branch_length_to_output = $eps;
205 } else {
206 $branch_length_to_output = $node->branch_length;
208 my @line = ( $current_name,
209 $names{$node->ancestor->internal_id},
210 $branch_length_to_output);
212 if ($node->is_Leaf) {
213 push @line, @{$chars{$node->internal_id}};
214 $self->_print(join(',', @line),"\n");
215 } else {
216 push @rest, \@line;
220 for ( @rest ) {
221 $self->_print(join(',', @$_),"\n");
225 =head2 next_tree
227 Title : next_tree
228 Usage :
229 Function:
230 Example :
231 Returns :
232 Args :
235 =cut
237 sub next_tree{
238 my ($self,@args) = @_;
239 $self->throw_not_implemented();
242 =head2 name_length
244 Title : name_length
245 Usage : $self->name_length(20);
246 Function: set mininum taxon name length
247 Returns : integer (length of name)
248 Args : integer
250 =cut
252 sub name_length {
253 my ($self, $val) = @_;
254 return $self->{'name_len'} = $val if $val;
255 return $self->{'name_len'};