4 # BioPerl module for Bio::Tools::pSW
6 # Cared for by Ewan Birney <birney@sanger.ac.uk>
8 # Copyright Ewan Birney
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Tools::pSW - DESCRIPTION of Object
20 $factory = new Bio::Tools::pSW( '-matrix' => 'blosum62.bla',
25 #use the factory to make some output
27 $factory->align_and_show($seq1,$seq2,STDOUT);
29 # make a Bio::SimpleAlign and do something with it
31 $aln = $factory->pairwise_alignment($seq1,$seq2);
33 $aln->write_MSF(\*STDOUT);
37 This module is included with the central Bioperl distribution:
39 http://bio.perl.org/Core/Latest
40 ftp://bio.perl.org/pub/DIST
42 Follow the installation instructions included in the README file.
46 pSW is an Alignment Factory. It builds pairwise alignments using the
47 smith waterman algorithm. The alignment algorithm is implemented in C
48 and added in using an XS extension. The XS extension basically comes
49 from the Wise2 package, but has been slimmed down to only be the
50 alignment part of that (this is a good thing!). The XS extension comes
51 from the bioperl-ext package which is distributed along with bioperl.
52 I<Warning> This package will not work if you have not compiled the
55 The mixture of C and Perl is ideal for this sort of
56 problem. Here are some plus points for this strategy:
60 =item Speed and Memory
62 The algorithm is actually implemented in C, which means it is faster than
63 a pure perl implementation (I have never done one, so I have no idea
64 how faster) and will use considerably less memory, as it efficiently
65 assigns memory for the calculation.
67 =item Algorithm efficiency
69 The algorithm was written using Dynamite, and so contains an automatic
70 switch to the linear space divide and conquor method. This means you
71 could effectively align very large sequences without killing your machine
72 (it could take a while though!).
78 warning - this module is under active development. Eventually it should
79 contain the ability to make alignment objects such as Bio::SimpleAlign
86 User feedback is an integral part of the evolution of this and other Bioperl modules.
87 Send your comments and suggestions preferably to one of the Bioperl mailing lists.
88 Your participation is much appreciated.
90 vsns-bcd-perl@lists.uni-bielefeld.de - General discussion
91 vsns-bcd-perl-guts@lists.uni-bielefeld.de - Technically-oriented discussion
92 http://bio.perl.org/MailList.html - About the mailing lists
96 Report bugs to the Bioperl bug tracking system to help us keep track the bugs and
97 their resolution. Bug reports can be submitted via email or the web:
99 bioperl-bugs@bio.perl.org
100 http://bio.perl.org/bioperl-bugs/
104 Ewan Birney, birney@sanger.ac.uk
108 The rest of the documentation details each of the object methods. Internal methods are usually preceded with an underscore "_".
113 # Let the code begin...
116 package Bio
::Tools
::pSW
;
117 use vars
qw($AUTOLOAD @ISA);
121 use Bio::Tools::AlignFactory;
122 use Bio::SimpleAlign;
125 @ISA = qw(Bio::Tools::AlignFactory Exporter);
129 require Bio
::Ext
::Align
;
132 print STDERR
("\nThe C-compiled engine for Smith Waterman alignments (Bio::Ext::Align) has not been installed.\n Please read the install the bioperl-ext package\n\n");
138 # new() is inherited from Bio::Root::Object
140 # _initialize is where the heavy stuff will happen when new is called
147 my($matrix,$gap,$ext) = $self->_rearrange([qw(MATRIX
152 my $make = $self->SUPER::_initialize
(@p);
154 #default values - we have to load matrix into memory, so
155 # we need to check it out now
156 if( ! defined $matrix || !($matrix =~ /\w/) ) {
157 $matrix = 'blosum62.bla';
160 $self->matrix($matrix); # will throw exception if it can't load it
164 #I'm pretty sure I am not doing this right... ho hum...
167 $gap =~ /^\d+$/ || $self->throw("Gap penalty must be a number, not [$gap]");
172 $ext =~ /^\d+$/ || $self->throw("Extension penalty must be a number, not [$ext]");
176 return $make; # success - we hope!
180 =head2 pairwise_alignment
182 Title : pairwise_alignment
183 Usage : $aln = $factory->pairwise_alignment($seq1,$seq2)
184 Function: Makes a SimpleAlign object from two sequences
185 Returns : A SimpleAlign object
191 sub pairwise_alignment
{
192 my ($self,$seq1,$seq2) = @_;
193 my($t1,$t2,$aln,$out,@str1,@str2,@ostr1,@ostr2,$alc,$tstr,$tid,$start1,$end1,$start2,$end2,$alctemp);
195 $self->set_memory_and_report();
196 # create engine objects
198 $t1 = &Bio
::Ext
::Align
::new_Sequence_from_strings
($seq1->id(),$seq1->str());
199 $t2 = &Bio
::Ext
::Align
::new_Sequence_from_strings
($seq2->id(),$seq2->str());
200 $aln = &Bio
::Ext
::Align
::Align_Sequences_ProteinSmithWaterman
($t1,$t2,$self->{'matrix'},-$self->gap,-$self->ext);
201 if( ! defined $aln || $aln == 0 ) {
202 $self->throw("Unable to build an alignment");
206 # free sequence engine objects
210 # now we have to get into the AlnBlock structure and
211 # figure out what is aligned to what...
213 # we are going to need the sequences as arrays for convience
215 @str1 = $seq1->seq();
216 @str2 = $seq2->seq();
218 # get out start points
220 # The alignment is in alignment coordinates - ie the first
221 # residues starts at -1 and ends at 0. (weird I know).
222 # bio-coordinates are +2 from this...
224 $start1 = $aln->start()->alu(0)->start +2;
225 $start2 = $aln->start()->alu(1)->start +2;
227 # step along the linked list of alc units...
229 for($alc = $aln->start();$alc->at_end() != 1;$alc = $alc->next()) {
230 if( $alc->alu(0)->text_label eq 'SEQUENCE' ) {
231 push(@ostr1,$str1[$alc->alu(0)->start+1]);
233 # assumme it is in insert!
237 if( $alc->alu(1)->text_label eq 'SEQUENCE' ) {
238 push(@ostr2,$str2[$alc->alu(1)->start+1]);
240 # assumme it is in insert!
250 # end points = real residue end in 'C' coordinates = residue
251 # end in biocoordinates. Oh... the wonder of coordinate systems!
253 $end1 = $alctemp->alu(0)->end;
254 $end2 = $alctemp->alu(1)->end;
256 # get rid of the alnblock
260 # new SimpleAlignment
261 $out = Bio
::SimpleAlign
->new(); # new SimpleAlignment
263 $tstr = join('',@ostr1);
265 $out->addSeq(Bio
::Seq
->new( -seq
=> $tstr,
270 $tstr = join('',@ostr2);
272 $out->addSeq(Bio
::Seq
->new( -seq
=> $tstr,
277 # give'm back the alignment
282 =head2 align_and_show
284 Title : align_and_show
285 Usage : $factory->align_and_show($seq1,$seq2,STDOUT)
290 my($self,$seq1,$seq2,$fh) = @_;
291 my($t1,$t2,$aln,$id,$str);
293 $self->set_memory_and_report();
295 $t1 = &bp_sw
::new_Sequence_from_strings
($seq1->id(),$seq1->str());
297 $t2 = &bp_sw
::new_Sequence_from_strings
($seq2->id(),$seq2->str());
298 $aln = &bp_sw
::Align_Sequences_ProteinSmithWaterman
($t1,$t2,$self->{'matrix'},-$self->gap,-$self->ext);
299 if( ! defined $aln || $aln == 0 ) {
300 $self->throw("Unable to build an alignment");
303 &bp_sw
::write_pretty_seq_align
($aln,$t1,$t2,12,50,$fh);
310 Usage : $factory->matrix('blosum62.bla');
311 Function : Reads in comparison matrix based on name
314 Argument : comparison matrix
319 my($self,$comp) = @_;
322 if( !defined $comp ) {
323 $self->throw("You must have a comparison matrix to set!");
326 # talking to the engine here...
328 $temp = &bp_sw
::CompMat
::read_Blast_file_CompMat
($comp);
330 if( !(defined $temp) || $temp == 0 ) {
331 $self->throw("$comp cannot be read as a BLAST comparison matrix file");
334 $self->{'matrix'} = $temp;
342 Usage : $gap = $factory->gap() #get
343 : $factory->gap($value) #set
344 Function : the set get for the gap penalty
347 Arguments : new value
352 my ($self,$val) = @_;
357 $self->throw("Can't have a gap penalty less than 0");
359 $self->{'gap'} = $val;
361 return $self->{'gap'};
368 Usage : $ext = $factory->ext() #get
369 : $factory->ext($value) #set
370 Function : the set get for the ext penalty
373 Arguments : new value
378 my ($self,$val) = @_;
382 $self->throw("Can't have a gap penalty less than 0");
384 $self->{'ext'} = $val;
386 return $self->{'ext'};