2 # BioPerl module for Bio::Align::PairwiseStatistics
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason@bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Align::PairwiseStatistics - Base statistic object for Pairwise Alignments
21 my $stats = Bio::Align::PairwiseStatistics->new();
23 # get alignment object of two sequences somehow
25 print $stats->number_of_comparable_bases($pwaln);
26 my $score = $stats->score_nuc($pwaln);
31 Calculate pairwise statistics.
37 User feedback is an integral part of the evolution of this and other
38 Bioperl modules. Send your comments and suggestions preferably to
39 the Bioperl mailing list. Your participation is much appreciated.
41 bioperl-l@bioperl.org - General discussion
42 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
46 Please direct usage questions or support issues to the mailing list:
48 I<bioperl-l@bioperl.org>
50 rather than to the module maintainer directly. Many experienced and
51 reponsive experts will be able look at the problem and quickly
52 address it. Please include a thorough description of the problem
53 with code and data examples if at all possible.
57 Report bugs to the Bioperl bug tracking system to help us keep track
58 of the bugs and their resolution. Bug reports can be submitted via the
61 https://github.com/bioperl/bioperl-live/issues
63 =head1 AUTHOR - Jason Stajich
65 Email jason-at-bioperl-dot-org
69 The rest of the documentation details each of the object methods.
70 Internal methods are usually preceded with a _
75 # Let the code begin...
78 package Bio
::Align
::PairwiseStatistics
;
80 use vars
qw($GapChars);
84 BEGIN { $GapChars = '(\.|\-)'; }
86 use base qw(Bio::Root::Root Bio::Align::StatisticsI);
88 =head2 number_of_comparable_bases
90 Title : number_of_comparable_bases
91 Usage : my $bases = $stat->number_of_comparable_bases($aln);
92 Function: Returns the count of the number of bases that can be
93 compared (L) in this alignment ( length - gaps)
95 Args : L<Bio::Align::AlignI>
100 sub number_of_comparable_bases
{
101 my ($self,$aln) = @_;
102 if ( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
103 $self->throw("Must provide a Bio::Align::AlignI compliant object to ".
104 "Bio::Align::PairwiseStatistics");
106 } elsif ( $aln->num_sequences != 2 ) {
107 $self->throw("Only pairwise calculations supported. Found ".
108 $aln->num_sequences." sequences in alignment\n");
110 my $L = $aln->length - $self->number_of_gaps($aln);
114 =head2 number_of_differences
116 Title : number_of_differences
117 Usage : my $nd = $stat->number_of_distances($aln);
118 Function: Returns the number of differences between two sequences
120 Args : L<Bio::Align::AlignI>
125 sub number_of_differences
{
126 my ($self,$aln) = @_;
127 if( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
128 $self->throw("Must provide a Bio::Align::AlignI compliant object to ".
129 "Bio::Align::PairwiseStatistics");
130 } elsif ( $aln->num_sequences != 2 ) {
131 $self->throw("Only pairwise calculations supported. Found ".
132 $aln->num_sequences." sequences in alignment\n");
135 foreach my $seq ( $aln->each_seq ) {
136 push @seqs, [ split(//,$seq->seq())];
138 my $firstseq = shift @seqs;
139 #my $secondseq = shift @seqs;
141 for (my $i = 0;$i<$aln->length; $i++ ) {
142 next if ( $firstseq->[$i] =~ /^$GapChars$/ );
143 foreach my $seq ( @seqs ) {
144 next if ( $seq->[$i] =~ /^$GapChars$/ );
145 if( $firstseq->[$i] ne $seq->[$i] ) {
153 =head2 number_of_gaps
155 Title : number_of_gaps
156 Usage : my $nd = $stat->number_of_gaps($aln);
157 Function: Returns the number of gapped positions among sequences in alignment
159 Args : L<Bio::Align::AlignI>
165 my ($self,$aln) = @_;
166 if ( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
167 $self->throw("Must provide a Bio::Align::AlignI compliant object to ".
168 "Bio::Align::PairwiseStatistics");
169 } elsif ( $aln->num_sequences != 2 ) {
170 $self->throw("Only pairwise calculations supported. Found ".
171 $aln->num_sequences." sequences in alignment\n");
173 my $gapline = $aln->gap_line;
174 # this will count the number of '-' characters
175 return $gapline =~ tr/-/-/;
182 Usage : my $score = $stat->score_nuc($aln);
184 my $score = $stat->score_nuc(
191 Function: Calculate the score of an alignment of 2 nucleic acid sequences. The
192 scoring parameters can be specified. Otherwise the blastn default
193 parameters are used: match = 2, mismatch = -3, gap opening = -5, gap
195 Returns : alignment score (number)
196 Args : L<Bio::Align::AlignI>
197 match score [optional]
198 mismatch score [optional]
199 gap opening score [optional]
200 gap extension score [optional]
205 my ($self, @args) = @_;
206 my ( $aln, $match, $mismatch, $gap_open, $gap_ext) = $self->_rearrange( [qw(
207 ALN MATCH MISMATCH GAP_OPEN GAP_EXT)], @args );
208 if ( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
209 $self->throw("Must provide a Bio::Align::AlignI compliant object to ".
210 "Bio::Align::PairwiseStatistics");
211 } elsif ( $aln->num_sequences != 2 ) {
212 $self->throw("Only pairwise calculations supported. Found ".
213 $aln->num_sequences." sequences in alignment\n");
215 my $seq1 = $aln->get_seq_by_pos(1);
216 my $seq2 = $aln->get_seq_by_pos(2);
217 if (! ( ($seq1->alphabet eq 'dna' || $seq1->alphabet eq 'rna') &&
218 ($seq2->alphabet eq 'dna' || $seq2->alphabet eq 'rna') )) {
219 $self->throw("Can only score nucleic acid alignments");
221 $match ||= 2; # Blastn scoring defaults
228 for (my $pos = 1 ; $pos <= $aln->length ; $pos++) {
229 my $res1 = $seq1->subseq($pos, $pos);
230 my $res2 = $seq2->subseq($pos, $pos);
231 if (!($res1 eq '-' || $res2 eq '-')) { # no gap
232 if ($res1 eq $res2) { # same residue
234 } else { # other residue
237 } else { # open or ext gap?
239 if (!($res1 eq '-' && $res2 eq '-')) { # exactly one gap
240 my $prevres = $prevres1;
241 $prevres = $prevres2 if $res2 eq '-';
242 $open = 1 unless $prevres eq '-';
244 $open = 1 unless $prevres1 eq '-' && $prevres2 eq '-';
247 $score += $gap_open; # gap opening
249 $score += $gap_ext; # gap extension