3 # BioPerl module for Bio::Align::PairwiseStatistics
5 # Cared for by Jason Stajich <jason@bioperl.org>
7 # Copyright Jason Stajich
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
15 Bio::Align::PairwiseStatistics - Base statistic object for Pairwise Alignments
20 my $stats = Bio::Align::PairwiseStatistics->new();
22 # get alignment object of two sequences somehow
24 print $stats->number_of_comparable_bases($pwaln);
25 my $score = $stats->score_nuc($pwaln);
30 Calculate pairwise statistics.
36 User feedback is an integral part of the evolution of this and other
37 Bioperl modules. Send your comments and suggestions preferably to
38 the Bioperl mailing list. Your participation is much appreciated.
40 bioperl-l@bioperl.org - General discussion
41 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
45 Report bugs to the Bioperl bug tracking system to help us keep track
46 of the bugs and their resolution. Bug reports can be submitted via the
49 http://bugzilla.open-bio.org/
51 =head1 AUTHOR - Jason Stajich
53 Email jason-at-bioperl-dot-org
57 The rest of the documentation details each of the object methods.
58 Internal methods are usually preceded with a _
63 # Let the code begin...
66 package Bio
::Align
::PairwiseStatistics
;
67 use vars
qw($GapChars);
71 BEGIN { $GapChars = '(\.|\-)'; }
73 use base qw(Bio::Root::Root Bio::Align::StatisticsI);
75 =head2 number_of_comparable_bases
77 Title : number_of_comparable_bases
78 Usage : my $bases = $stat->number_of_comparable_bases($aln);
79 Function: Returns the count of the number of bases that can be
80 compared (L) in this alignment ( length - gaps)
82 Args : L<Bio::Align::AlignI>
87 sub number_of_comparable_bases
{
89 if ( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
90 $self->throw("Must provide a Bio::Align::AlignI compliant object to ".
91 "Bio::Align::PairwiseStatistics");
93 } elsif ( $aln->no_sequences != 2 ) {
94 $self->throw("Only pairwise calculations supported. Found ".
95 $aln->no_sequences." sequences in alignment\n");
97 my $L = $aln->length - $self->number_of_gaps($aln);
101 =head2 number_of_differences
103 Title : number_of_differences
104 Usage : my $nd = $stat->number_of_distances($aln);
105 Function: Returns the number of differences between two sequences
107 Args : L<Bio::Align::AlignI>
112 sub number_of_differences
{
113 my ($self,$aln) = @_;
114 if( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
115 $self->throw("Must provide a Bio::Align::AlignI compliant object to ".
116 "Bio::Align::PairwiseStatistics");
117 } elsif ( $aln->no_sequences != 2 ) {
118 $self->throw("Only pairwise calculations supported. Found ".
119 $aln->no_sequences." sequences in alignment\n");
122 foreach my $seq ( $aln->each_seq ) {
123 push @seqs, [ split(//,$seq->seq())];
125 my $firstseq = shift @seqs;
126 #my $secondseq = shift @seqs;
128 for (my $i = 0;$i<$aln->length; $i++ ) {
129 next if ( $firstseq->[$i] =~ /^$GapChars$/ );
130 foreach my $seq ( @seqs ) {
131 next if ( $seq->[$i] =~ /^$GapChars$/ );
132 if( $firstseq->[$i] ne $seq->[$i] ) {
140 =head2 number_of_gaps
142 Title : number_of_gaps
143 Usage : my $nd = $stat->number_of_gaps($aln);
144 Function: Returns the number of gapped positions among sequences in alignment
146 Args : L<Bio::Align::AlignI>
152 my ($self,$aln) = @_;
153 if ( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
154 $self->throw("Must provide a Bio::Align::AlignI compliant object to ".
155 "Bio::Align::PairwiseStatistics");
156 } elsif ( $aln->no_sequences != 2 ) {
157 $self->throw("Only pairwise calculations supported. Found ".
158 $aln->no_sequences." sequences in alignment\n");
160 my $gapline = $aln->gap_line;
161 # this will count the number of '-' characters
162 return $gapline =~ tr/-/-/;
169 Usage : my $score = $stat->score_nuc($aln);
171 my $score = $stat->score_nuc(
178 Function: Calculate the score of an alignment of 2 nucleic acid sequences. The
179 scoring parameters can be specified. Otherwise the blastn default
180 parameters are used: match = 2, mismatch = -3, gap opening = -5, gap
182 Returns : alignment score (number)
183 Args : L<Bio::Align::AlignI>
184 match score [optional]
185 mismatch score [optional]
186 gap opening score [optional]
187 gap extension score [optional]
192 my ($self, @args) = @_;
193 my ( $aln, $match, $mismatch, $gap_open, $gap_ext) = $self->_rearrange( [qw(
194 ALN MATCH MISMATCH GAP_OPEN GAP_EXT)], @args );
195 if ( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
196 $self->throw("Must provide a Bio::Align::AlignI compliant object to ".
197 "Bio::Align::PairwiseStatistics");
198 } elsif ( $aln->no_sequences != 2 ) {
199 $self->throw("Only pairwise calculations supported. Found ".
200 $aln->no_sequences." sequences in alignment\n");
202 my $seq1 = $aln->get_seq_by_pos(1);
203 my $seq2 = $aln->get_seq_by_pos(2);
204 if (! ( ($seq1->alphabet eq 'dna' || $seq1->alphabet eq 'rna') &&
205 ($seq2->alphabet eq 'dna' || $seq2->alphabet eq 'rna') )) {
206 $self->throw("Can only score nucleic acid alignments");
208 $match ||= 2; # Blastn scoring defaults
215 for (my $pos = 1 ; $pos <= $aln->length ; $pos++) {
216 my $res1 = $seq1->subseq($pos, $pos);
217 my $res2 = $seq2->subseq($pos, $pos);
218 if (!($res1 eq '-' || $res2 eq '-')) { # no gap
219 if ($res1 eq $res2) { # same residue
221 } else { # other residue
224 } else { # open or ext gap?
226 if (!($res1 eq '-' && $res2 eq '-')) { # exactly one gap
227 my $prevres = $prevres1;
228 $prevres = $prevres2 if $res2 eq '-';
229 $open = 1 unless $prevres eq '-';
231 $open = 1 unless $prevres1 eq '-' && $prevres2 eq '-';
234 $score += $gap_open; # gap opening
236 $score += $gap_ext; # gap extension