Sync that last bit with trunk. I'll have to merge that over to the tag for the next RC.
[bioperl-live.git] / examples / align / aligntutorial.pl
blob882ab58e9250c287145729ca66f252ce767e6dcc
1 #!/usr/bin/perl -w
3 # $Id$
4 # An example of how to use the different alignment tools in bioperl
5 # to align some sequences
7 # All these methods except Bio::Tools::pSW will work for DNA sequence
8 # (need to use a different matrix however)
10 use Bio::Factory::EMBOSS;
11 use Bio::SeqIO;
12 use Bio::AlignIO;
13 use Bio::Tools::pSW;
14 use Bio::PrimarySeq;
15 use Bio::Tools::Run::Alignment::Clustalw;
16 use Bio::Tools::Run::Alignment::TCoffee;
17 use Bio::Tools::Run::StandAloneBlast;
19 use strict;
20 # build the sequences since EMBOSS expects seqs to be in files
21 my $seq = new Bio::PrimarySeq(-seq =>
22 'MAVNPELAPFTLSRGIPSFDDQALSTIIQLQDCIQQAIQQLNYSTAEFLAELLYAECSILDKSSVYWSDAVYLYALSLFLNKSYHTAFQISKEFKEYHLGIAYIFGRCALQLSQGVNEAILTLLSIINVFSSNSSNTRINMVLNSNLVHIPDLATLNCLLGNLYMKLDHSKEGAFYHSEALAINPYLWESYEAICKMRATVDLKRVFFDIAGKKSNSHNNNAASSFPSTSLSHFEPRSQPSLYSKTNKNGNNNINNNVNTLFQSSNSPPSTSASSFSSIQHFSRSQQQQANTSIRTCQNKNTQTPKNPAINSKTSSALPNNISMNLVSPSSKQPTISSLAKVYNRNKLLTTPPSKLLNNDRNHQNNNNNNNNNNNNNNNNNNNNNNNNIINKTTFKTPRNLYSSTGRLTTSKKNPRSLIISNSILTSDYQITLPEIMYNFALILRSSSQYNSFKAIRLFESQIPSHIKDTMPWCLVQLGKLHFEIINYDMSLKYFNRLKDLQPARVKDMEIFSTLLWHLHDKVKSSNLANGLMDTMPNKPETWCCIGNLLSLQKDHDAAIKAFEKATQLDPNFAYAYTLQGHEHSSNDSSDSAKTCYRKALACDPQHYNAYYGLGTSAMKLGQYEEALLYFEKARSINPVNVVLICCCGGSLEKLGYKEKALQYYELACHLQPTSSLSKYKMGQLLYSMTRYNVALQTFEELVKLVPDDATAHYLLGQTYRIVGRKKDAIKELTVAMNLDPKGNQVIIDELQKCHMQE',
23 -id => 'seq1'
24 );
25 my $seq2 = new Bio::PrimarySeq( -seq =>
26 'CLIFXRLLLIQMIHPQARRAFTFLQQQEPYRIQSMEQLSTLLWHLADLPALSHLSQSLISISRSSPQAWIAVGNCFSLQKDHDEAMRCFRRATQVDEGCAYAWTLCGYEAVEMEEYERAMAFYRTAIRTDARHYNAWYVLFFFFFFFFVPGDIDSXPKKGMEWGXFISKRIDRGMRSIILKEPSKSIQLIPFFYVALVWXVGVSSYPLETMTNIDFPKKKKALEKSNDVVQALHFYERASKYAPTSAMVQFKRIRALVALQRYDEAISALVPLTHSAPDEANVFFLLGKCLLKKERRQEATMAFTNARELEPK',
27 -id => 'seq2');
29 my $out = new Bio::SeqIO(-format => 'fasta',
30 -file => ">seq1.fa");
31 $out->write_seq($seq);
32 $out->close();
33 $out = new Bio::SeqIO(-format => 'fasta',
34 -file => ">seq2.fa");
35 $out->write_seq($seq2);
36 $out->close();
39 my $embossfactory = Bio::Factory::EMBOSS->new();
41 my @alignprogs = qw(water needle stretcher matcher);
42 my $alignout = new Bio::AlignIO(-format => 'msf');
45 foreach my $prog ( @alignprogs ) {
46 my $alignfactory = $embossfactory->program('water');
49 $alignfactory->run({ '-sequencea' => 'seq1.fa',
50 '-seqall' => 'seq2.fa',
51 '-gapext' => 2.0,
52 '-datafile' => 'EBLOSUM62',
53 '-gapopen' => 14.0,
54 '-outfile' => "seq1_vs_seq2.$prog"});
56 my $alnin = new Bio::AlignIO(-format => 'emboss',
57 -file => "seq1_vs_seq2.$prog");
58 my $aln = $alnin->next_aln();
59 $alignout->write_aln($aln);
62 # this should produce the same alignment as 'water'
63 my $factory = new Bio::Tools::pSW(-matrix=> 'blosum62.bla',
64 -gap => 14,
65 -ext => 2);
66 my $aln = $factory->pairwise_alignment($seq,$seq2);
67 $alignout->write_aln($aln);
69 $factory = new Bio::Tools::Run::Alignment::Clustalw('ktuple' => 2,
70 'matrix' => 'BLOSUM');
71 $aln = $factory->align([$seq,$seq2]);
72 $alignout->write_aln($aln);
74 $factory = new Bio::Tools::Run::Alignment::TCoffee('ktuple' => 2,
75 'matrix' => 'BLOSUM');
76 $aln = $factory->align([$seq,$seq2]);
77 $alignout->write_aln($aln);
79 $factory = new Bio::Tools::Run::StandAloneBlast();
80 $aln = $factory->bl2seq($seq,$seq2);
82 # this actually returns a Bio::Tools::BPbl2seq object
83 # it can be transformed to a SimpleAlign object see
84 # the code in Bio::AlignIO::bl2seq
85 # A transformer object will be written at some point