From 563e929ab685d91bcf13842c2feb03861a4368df Mon Sep 17 00:00:00 2001 From: Chris Fields Date: Tue, 3 Aug 2010 15:05:01 -0500 Subject: [PATCH] [bug 2984] end was calculated based on arbitrary symbols; punt and let LocatableSeq do this, for consistency --- Bio/AlignIO/phylip.pm | 12 ++++++------ Bio/LocatableSeq.pm | 7 +++++++ Changes | 4 +++- t/AlignIO/phylip.t | 16 +++++++++++++++- 4 files changed, 31 insertions(+), 8 deletions(-) diff --git a/Bio/AlignIO/phylip.pm b/Bio/AlignIO/phylip.pm index 3707b02c6..efe22d30d 100644 --- a/Bio/AlignIO/phylip.pm +++ b/Bio/AlignIO/phylip.pm @@ -44,7 +44,7 @@ Bio::AlignIO::phylip - PHYLIP format sequence input/output stream =head1 DESCRIPTION This object can transform Bio::SimpleAlign objects to and from PHYLIP -fotmat. By deafult it works with the interleaved format. By specifying +format. By default it works with the interleaved format. By specifying the flag -interleaved =E 0 in the initialization the module can read or write data in sequential format. @@ -226,8 +226,8 @@ sub next_aln { $str =~ s/\s//g; $count = scalar @names; $hash{$count} .= $str; - } elsif( $entry =~ /^(.{$idlen})\s+(.*)\s$/ || - $entry =~ /^(.{$idlen})(\S{$idlen}\s+.+)\s$/ # Handle weirdnes s when id is too long + } elsif( $entry =~ /^(.{$idlen})\s*(.*)\s$/ || + $entry =~ /^(.{$idlen})(\S{$idlen}\s+.+)\s$/ # Handle weirdness when id is too long ) { $name = $1; $str = $2; @@ -289,8 +289,8 @@ sub next_aln { $seqname=$name; $start = 1; $str = $hash{$count}; - $str =~ s/[^A-Za-z]//g; - $end = length($str); +# $str =~ s/[^A-Za-z]//g; + #$end = length($str); } # consistency test $self->throw("Length of sequence [$seqname] is not [$residuecount] it is ".CORE::length($hash{$count})."! ") @@ -299,7 +299,7 @@ sub next_aln { $seq = Bio::LocatableSeq->new('-seq' => $hash{$count}, '-display_id' => $seqname, '-start' => $start, - '-end' => $end, + (defined $end) ? ('-end' => $end) : (), '-alphabet' => $self->alphabet, ); $aln->add_seq($seq); diff --git a/Bio/LocatableSeq.pm b/Bio/LocatableSeq.pm index f48aa8e57..942b4801e 100644 --- a/Bio/LocatableSeq.pm +++ b/Bio/LocatableSeq.pm @@ -224,6 +224,13 @@ sub _ungapped_len { return CORE::length($string)/($map_res/$map_coord) + $offset/($map_coord/$map_res); } +#sub length { +# my $self = shift; +# return unless my $string = $self->seq; +# $string =~ s{[$GAP_SYMBOLS$FRAMESHIFT_SYMBOLS]+}{}g; +# return CORE::length($string); +#} + =head2 strand Title : strand diff --git a/Changes b/Changes index b3097a1dd..c2475a99c 100644 --- a/Changes +++ b/Changes @@ -41,7 +41,9 @@ CPAN releases are branched from 'master'. * bug 3120 - bp_seqfeature_gff3.pl '-y' option [genehack, David Breimann] * bug 3126 - catch description [Toshihiko Akiba] * bug 2983 - fix score/percent ID mixup [Alexie Papanicolaou] - + * bug 2984 - let LocatableSeq decide on length of phylip aln [Adam Witney, + cjfields] + [Deprecated] * Bio::Expression modules - these were originally designed to go with the bioperl-microarray suite of tools, however they have never been completed diff --git a/t/AlignIO/phylip.t b/t/AlignIO/phylip.t index 5d44fc013..be4cf044a 100644 --- a/t/AlignIO/phylip.t +++ b/t/AlignIO/phylip.t @@ -7,7 +7,7 @@ BEGIN { use lib '.'; use Bio::Root::Test; - test_begin(-tests => 11); + test_begin(-tests => 16); use_ok('Bio::AlignIO::phylip'); } @@ -31,6 +31,20 @@ $strout = Bio::AlignIO->new( $status = $strout->write_aln($aln); is $status, 1, "phylip output test"; +# check the LocatableSeq start/end/strand etc +my $ls = $aln->get_seq_by_pos(2); +is($ls->display_id, 'Pan_panisc'); +is($ls->start, 1); +is($ls->end,47); + +# bug 2984 +TODO: { + local $TODO = 'problems with default strand, length?'; + # shouldn't this be 0? + is($ls->strand,0); + is($ls->length,47); +} + # PHYLIP sequential/non-interleaved $strout = Bio::AlignIO->new('-file' => test_input_file('noninterleaved.phy'), '-format' => 'phylip'); -- 2.11.4.GIT