maint: remove Travis stuff which has been replaced with Github actions (#325)
[bioperl-live.git] / lib / Bio / Align / PairwiseStatistics.pm
blob13adc339abcc0a84b8081033adbeed39eadd0b19
2 # BioPerl module for Bio::Align::PairwiseStatistics
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason@bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Align::PairwiseStatistics - Base statistic object for Pairwise Alignments
18 =head1 SYNOPSIS
20 use strict;
21 my $stats = Bio::Align::PairwiseStatistics->new();
23 # get alignment object of two sequences somehow
24 my $pwaln;
25 print $stats->number_of_comparable_bases($pwaln);
26 my $score = $stats->score_nuc($pwaln);
29 =head1 DESCRIPTION
31 Calculate pairwise statistics.
33 =head1 FEEDBACK
35 =head2 Mailing Lists
37 User feedback is an integral part of the evolution of this and other
38 Bioperl modules. Send your comments and suggestions preferably to
39 the Bioperl mailing list. Your participation is much appreciated.
41 bioperl-l@bioperl.org - General discussion
42 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
44 =head2 Support
46 Please direct usage questions or support issues to the mailing list:
48 I<bioperl-l@bioperl.org>
50 rather than to the module maintainer directly. Many experienced and
51 reponsive experts will be able look at the problem and quickly
52 address it. Please include a thorough description of the problem
53 with code and data examples if at all possible.
55 =head2 Reporting Bugs
57 Report bugs to the Bioperl bug tracking system to help us keep track
58 of the bugs and their resolution. Bug reports can be submitted via the
59 web:
61 https://github.com/bioperl/bioperl-live/issues
63 =head1 AUTHOR - Jason Stajich
65 Email jason-at-bioperl-dot-org
67 =head1 APPENDIX
69 The rest of the documentation details each of the object methods.
70 Internal methods are usually preceded with a _
72 =cut
75 # Let the code begin...
78 package Bio::Align::PairwiseStatistics;
80 use vars qw($GapChars);
81 use strict;
84 BEGIN { $GapChars = '(\.|\-)'; }
86 use base qw(Bio::Root::Root Bio::Align::StatisticsI);
88 =head2 number_of_comparable_bases
90 Title : number_of_comparable_bases
91 Usage : my $bases = $stat->number_of_comparable_bases($aln);
92 Function: Returns the count of the number of bases that can be
93 compared (L) in this alignment ( length - gaps)
94 Returns : integer
95 Args : L<Bio::Align::AlignI>
98 =cut
100 sub number_of_comparable_bases{
101 my ($self,$aln) = @_;
102 if ( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
103 $self->throw("Must provide a Bio::Align::AlignI compliant object to ".
104 "Bio::Align::PairwiseStatistics");
105 return 0;
106 } elsif ( $aln->num_sequences != 2 ) {
107 $self->throw("Only pairwise calculations supported. Found ".
108 $aln->num_sequences." sequences in alignment\n");
110 my $L = $aln->length - $self->number_of_gaps($aln);
111 return $L;
114 =head2 number_of_differences
116 Title : number_of_differences
117 Usage : my $nd = $stat->number_of_distances($aln);
118 Function: Returns the number of differences between two sequences
119 Returns : integer
120 Args : L<Bio::Align::AlignI>
123 =cut
125 sub number_of_differences{
126 my ($self,$aln) = @_;
127 if( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
128 $self->throw("Must provide a Bio::Align::AlignI compliant object to ".
129 "Bio::Align::PairwiseStatistics");
130 } elsif ( $aln->num_sequences != 2 ) {
131 $self->throw("Only pairwise calculations supported. Found ".
132 $aln->num_sequences." sequences in alignment\n");
134 my (@seqs);
135 foreach my $seq ( $aln->each_seq ) {
136 push @seqs, [ split(//,$seq->seq())];
138 my $firstseq = shift @seqs;
139 #my $secondseq = shift @seqs;
140 my $diffcount = 0;
141 for (my $i = 0;$i<$aln->length; $i++ ) {
142 next if ( $firstseq->[$i] =~ /^$GapChars$/ );
143 foreach my $seq ( @seqs ) {
144 next if ( $seq->[$i] =~ /^$GapChars$/ );
145 if( $firstseq->[$i] ne $seq->[$i] ) {
146 $diffcount++;
150 return $diffcount;
153 =head2 number_of_gaps
155 Title : number_of_gaps
156 Usage : my $nd = $stat->number_of_gaps($aln);
157 Function: Returns the number of gapped positions among sequences in alignment
158 Returns : integer
159 Args : L<Bio::Align::AlignI>
162 =cut
164 sub number_of_gaps{
165 my ($self,$aln) = @_;
166 if ( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
167 $self->throw("Must provide a Bio::Align::AlignI compliant object to ".
168 "Bio::Align::PairwiseStatistics");
169 } elsif ( $aln->num_sequences != 2 ) {
170 $self->throw("Only pairwise calculations supported. Found ".
171 $aln->num_sequences." sequences in alignment\n");
173 my $gapline = $aln->gap_line;
174 # this will count the number of '-' characters
175 return $gapline =~ tr/-/-/;
179 =head2 score_nuc
181 Title : score_nuc
182 Usage : my $score = $stat->score_nuc($aln);
184 my $score = $stat->score_nuc(
185 -aln =>$aln,
186 -match => 1,
187 -mismatch => -1,
188 -gap_open => -1,
189 -gap_ext => -1
191 Function: Calculate the score of an alignment of 2 nucleic acid sequences. The
192 scoring parameters can be specified. Otherwise the blastn default
193 parameters are used: match = 2, mismatch = -3, gap opening = -5, gap
194 extension = -2
195 Returns : alignment score (number)
196 Args : L<Bio::Align::AlignI>
197 match score [optional]
198 mismatch score [optional]
199 gap opening score [optional]
200 gap extension score [optional]
202 =cut
204 sub score_nuc {
205 my ($self, @args) = @_;
206 my ( $aln, $match, $mismatch, $gap_open, $gap_ext) = $self->_rearrange( [qw(
207 ALN MATCH MISMATCH GAP_OPEN GAP_EXT)], @args );
208 if ( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
209 $self->throw("Must provide a Bio::Align::AlignI compliant object to ".
210 "Bio::Align::PairwiseStatistics");
211 } elsif ( $aln->num_sequences != 2 ) {
212 $self->throw("Only pairwise calculations supported. Found ".
213 $aln->num_sequences." sequences in alignment\n");
215 my $seq1 = $aln->get_seq_by_pos(1);
216 my $seq2 = $aln->get_seq_by_pos(2);
217 if (! ( ($seq1->alphabet eq 'dna' || $seq1->alphabet eq 'rna') &&
218 ($seq2->alphabet eq 'dna' || $seq2->alphabet eq 'rna') )) {
219 $self->throw("Can only score nucleic acid alignments");
221 $match ||= 2; # Blastn scoring defaults
222 $mismatch ||= -3;
223 $gap_open ||= -5;
224 $gap_ext ||= -2;
225 my $score = 0;
226 my $prevres1 = '-';
227 my $prevres2 = '-';
228 for (my $pos = 1 ; $pos <= $aln->length ; $pos++) {
229 my $res1 = $seq1->subseq($pos, $pos);
230 my $res2 = $seq2->subseq($pos, $pos);
231 if (!($res1 eq '-' || $res2 eq '-')) { # no gap
232 if ($res1 eq $res2) { # same residue
233 $score += $match;
234 } else { # other residue
235 $score += $mismatch;
237 } else { # open or ext gap?
238 my $open = 0;
239 if (!($res1 eq '-' && $res2 eq '-')) { # exactly one gap
240 my $prevres = $prevres1;
241 $prevres = $prevres2 if $res2 eq '-';
242 $open = 1 unless $prevres eq '-';
243 } else { # 2 gaps
244 $open = 1 unless $prevres1 eq '-' && $prevres2 eq '-';
246 if ($open) {
247 $score += $gap_open; # gap opening
248 } else {
249 $score += $gap_ext; # gap extension
252 $prevres1 = $res1;
253 $prevres2 = $res2;
255 return $score;