2 # BioPerl module for Bio::Align::PairwiseStatistics
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason@bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Align::PairwiseStatistics - Base statistic object for Pairwise Alignments
21 my $stats = Bio::Align::PairwiseStatistics->new();
23 # get alignment object of two sequences somehow
25 print $stats->number_of_comparable_bases($pwaln);
26 my $score = $stats->score_nuc($pwaln);
31 Calculate pairwise statistics.
37 User feedback is an integral part of the evolution of this and other
38 Bioperl modules. Send your comments and suggestions preferably to
39 the Bioperl mailing list. Your participation is much appreciated.
41 bioperl-l@bioperl.org - General discussion
42 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
46 Please direct usage questions or support issues to the mailing list:
48 I<bioperl-l@bioperl.org>
50 rather than to the module maintainer directly. Many experienced and
51 reponsive experts will be able look at the problem and quickly
52 address it. Please include a thorough description of the problem
53 with code and data examples if at all possible.
57 Report bugs to the Bioperl bug tracking system to help us keep track
58 of the bugs and their resolution. Bug reports can be submitted via the
61 https://github.com/bioperl/bioperl-live/issues
63 =head1 AUTHOR - Jason Stajich
65 Email jason-at-bioperl-dot-org
69 The rest of the documentation details each of the object methods.
70 Internal methods are usually preceded with a _
75 # Let the code begin...
78 package Bio
::Align
::PairwiseStatistics
;
79 use vars
qw($GapChars);
83 BEGIN { $GapChars = '(\.|\-)'; }
85 use base qw(Bio::Root::Root Bio::Align::StatisticsI);
87 =head2 number_of_comparable_bases
89 Title : number_of_comparable_bases
90 Usage : my $bases = $stat->number_of_comparable_bases($aln);
91 Function: Returns the count of the number of bases that can be
92 compared (L) in this alignment ( length - gaps)
94 Args : L<Bio::Align::AlignI>
99 sub number_of_comparable_bases
{
100 my ($self,$aln) = @_;
101 if ( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
102 $self->throw("Must provide a Bio::Align::AlignI compliant object to ".
103 "Bio::Align::PairwiseStatistics");
105 } elsif ( $aln->num_sequences != 2 ) {
106 $self->throw("Only pairwise calculations supported. Found ".
107 $aln->num_sequences." sequences in alignment\n");
109 my $L = $aln->length - $self->number_of_gaps($aln);
113 =head2 number_of_differences
115 Title : number_of_differences
116 Usage : my $nd = $stat->number_of_distances($aln);
117 Function: Returns the number of differences between two sequences
119 Args : L<Bio::Align::AlignI>
124 sub number_of_differences
{
125 my ($self,$aln) = @_;
126 if( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
127 $self->throw("Must provide a Bio::Align::AlignI compliant object to ".
128 "Bio::Align::PairwiseStatistics");
129 } elsif ( $aln->num_sequences != 2 ) {
130 $self->throw("Only pairwise calculations supported. Found ".
131 $aln->num_sequences." sequences in alignment\n");
134 foreach my $seq ( $aln->each_seq ) {
135 push @seqs, [ split(//,$seq->seq())];
137 my $firstseq = shift @seqs;
138 #my $secondseq = shift @seqs;
140 for (my $i = 0;$i<$aln->length; $i++ ) {
141 next if ( $firstseq->[$i] =~ /^$GapChars$/ );
142 foreach my $seq ( @seqs ) {
143 next if ( $seq->[$i] =~ /^$GapChars$/ );
144 if( $firstseq->[$i] ne $seq->[$i] ) {
152 =head2 number_of_gaps
154 Title : number_of_gaps
155 Usage : my $nd = $stat->number_of_gaps($aln);
156 Function: Returns the number of gapped positions among sequences in alignment
158 Args : L<Bio::Align::AlignI>
164 my ($self,$aln) = @_;
165 if ( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
166 $self->throw("Must provide a Bio::Align::AlignI compliant object to ".
167 "Bio::Align::PairwiseStatistics");
168 } elsif ( $aln->num_sequences != 2 ) {
169 $self->throw("Only pairwise calculations supported. Found ".
170 $aln->num_sequences." sequences in alignment\n");
172 my $gapline = $aln->gap_line;
173 # this will count the number of '-' characters
174 return $gapline =~ tr/-/-/;
181 Usage : my $score = $stat->score_nuc($aln);
183 my $score = $stat->score_nuc(
190 Function: Calculate the score of an alignment of 2 nucleic acid sequences. The
191 scoring parameters can be specified. Otherwise the blastn default
192 parameters are used: match = 2, mismatch = -3, gap opening = -5, gap
194 Returns : alignment score (number)
195 Args : L<Bio::Align::AlignI>
196 match score [optional]
197 mismatch score [optional]
198 gap opening score [optional]
199 gap extension score [optional]
204 my ($self, @args) = @_;
205 my ( $aln, $match, $mismatch, $gap_open, $gap_ext) = $self->_rearrange( [qw(
206 ALN MATCH MISMATCH GAP_OPEN GAP_EXT)], @args );
207 if ( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
208 $self->throw("Must provide a Bio::Align::AlignI compliant object to ".
209 "Bio::Align::PairwiseStatistics");
210 } elsif ( $aln->num_sequences != 2 ) {
211 $self->throw("Only pairwise calculations supported. Found ".
212 $aln->num_sequences." sequences in alignment\n");
214 my $seq1 = $aln->get_seq_by_pos(1);
215 my $seq2 = $aln->get_seq_by_pos(2);
216 if (! ( ($seq1->alphabet eq 'dna' || $seq1->alphabet eq 'rna') &&
217 ($seq2->alphabet eq 'dna' || $seq2->alphabet eq 'rna') )) {
218 $self->throw("Can only score nucleic acid alignments");
220 $match ||= 2; # Blastn scoring defaults
227 for (my $pos = 1 ; $pos <= $aln->length ; $pos++) {
228 my $res1 = $seq1->subseq($pos, $pos);
229 my $res2 = $seq2->subseq($pos, $pos);
230 if (!($res1 eq '-' || $res2 eq '-')) { # no gap
231 if ($res1 eq $res2) { # same residue
233 } else { # other residue
236 } else { # open or ext gap?
238 if (!($res1 eq '-' && $res2 eq '-')) { # exactly one gap
239 my $prevres = $prevres1;
240 $prevres = $prevres2 if $res2 eq '-';
241 $open = 1 unless $prevres eq '-';
243 $open = 1 unless $prevres1 eq '-' && $prevres2 eq '-';
246 $score += $gap_open; # gap opening
248 $score += $gap_ext; # gap extension