sync w/ main trunk
[bioperl-live.git] / Bio / AlignIO / largemultifasta.pm
blob343112ff7cae4d300f825ea76c55a37b6e508f05
2 # BioPerl module for Bio::AlignIO::largemultifasta
4 # based on the Bio::SeqIO::largefasta module
5 # by Ewan Birney <birney@ebi.ac.uk>
6 # and Lincoln Stein <lstein@cshl.org>
8 # and the SimpleAlign.pm module of Ewan Birney
10 # Copyright Albert Vilella
12 # You may distribute this module under the same terms as perl itself
13 # _history
14 # January 20, 2004
15 # POD documentation - main docs before the code
17 =head1 NAME
19 Bio::AlignIO::largemultifasta - Largemultifasta MSA Sequence
20 input/output stream
22 =head1 SYNOPSIS
24 Do not use this module directly. Use it via the L<Bio::AlignIO> class.
26 =head1 DESCRIPTION
28 This object can transform L<Bio::SimpleAlign> objects to and from
29 largemultifasta flat file databases. This is for the fasta sequence
30 format NOT FastA analysis program. To process the pairwise alignments
31 from a FastA (FastX, FastN, FastP, tFastA, etc) use the Bio::SearchIO
32 module.
34 Reimplementation of Bio::AlignIO::fasta modules so that creates
35 temporary files instead of keeping the whole sequences in memory.
37 =head1 FEEDBACK
39 =head2 Support
41 Please direct usage questions or support issues to the mailing list:
43 L<bioperl-l@bioperl.org>
45 rather than to the module maintainer directly. Many experienced and
46 reponsive experts will be able look at the problem and quickly
47 address it. Please include a thorough description of the problem
48 with code and data examples if at all possible.
50 =head2 Reporting Bugs
52 Report bugs to the Bioperl bug tracking system to help us keep track
53 the bugs and their resolution. Bug reports can be submitted via the
54 web:
56 http://bugzilla.open-bio.org/
58 =head1 AUTHORS - Albert Vilella, Heikki Lehvaslaiho
60 Email: avilella-at-gmail-dot-com, heikki-at-bioperl-dot-org
63 =head1 APPENDIX
65 The rest of the documentation details each of the object
66 methods. Internal methods are usually preceded with a _
68 =cut
70 # Let the code begin...
72 package Bio::AlignIO::largemultifasta;
73 use strict;
75 use Bio::Seq::LargeLocatableSeq;
76 use Bio::Seq::SeqFactory;
78 use base qw(Bio::AlignIO Bio::SeqIO Bio::SimpleAlign);
81 sub _initialize {
82 my($self,@args) = @_;
83 $self->SUPER::_initialize(@args);
84 if( ! defined $self->sequence_factory ) {
85 $self->sequence_factory(Bio::Seq::SeqFactory->new
86 (-verbose => $self->verbose(),
87 -type => 'Bio::Seq::LargeLocatableSeq'));
91 =head2 next_seq
93 Title : next_seq
94 Usage : $seq = $stream->next_seq()
95 Function: returns the next sequence in the stream while taking care
96 of the length
97 Returns : Bio::Seq object
98 Args : NONE
100 =cut
102 sub next_seq {
103 my ($self) = @_;
104 my $largeseq = $self->sequence_factory->create();
105 my ($id,$fulldesc,$entry);
106 my $count = 0;
107 my $seen = 0;
108 while( defined ($entry = $self->_readline) ) {
109 if( $seen == 1 && $entry =~ /^\s*>/ ) {
110 $self->_pushback($entry);
111 return $largeseq;
113 if ( ($entry eq '>') ) { $seen = 1; next; }
114 elsif( $entry =~ /\s*>(.+?)$/ ) {
115 $seen = 1;
116 ($id,$fulldesc) = ($1 =~ /^\s*(\S+)\s*(.*)$/)
117 or $self->warn("Can't parse fasta header");
118 $largeseq->display_id($id);
119 $largeseq->primary_id($id);
120 $largeseq->desc($fulldesc);
121 } else {
122 $entry =~ s/\s+//g;
123 $largeseq->add_sequence_as_string($entry);
125 (++$count % 1000 == 0 && $self->verbose() > 0) && print "line $count\n";
127 if( ! $seen ) { return; }
128 return $largeseq;
132 =head2 next_aln
134 Title : next_aln
135 Usage : $aln = $stream->next_aln()
136 Function: returns the next alignment in the stream.
137 Returns : L<Bio::Align::AlignI> object - returns 0 on end of file
138 or on error
139 Args : NONE
141 =cut
143 sub next_aln {
144 my $self = shift;
145 my $largeseq;
146 my $aln = Bio::SimpleAlign->new();
147 while (defined ($largeseq = $self->next_seq) ) {
148 $aln->add_seq($largeseq);
149 $self->debug("sequence readed\n");
152 my $alnlen = $aln->length;
153 foreach my $largeseq ( $aln->each_seq ) {
154 if( $largeseq->length < $alnlen ) {
155 my ($diff) = ($alnlen - $largeseq->length);
156 $largeseq->seq("-" x $diff);
160 return $aln;
164 =head2 write_aln
166 Title : write_aln
167 Usage : $stream->write_aln(@aln)
168 Function: writes the $aln object into the stream in largemultifasta format
169 Returns : 1 for success and 0 for error
170 Args : L<Bio::Align::AlignI> object
173 =cut
175 sub write_aln {
176 my ($self,@aln) = @_;
177 my ($seq,$desc,$rseq,$name,$count,$length,$seqsub);
179 foreach my $aln (@aln) {
180 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
181 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
182 next;
184 foreach $rseq ( $aln->each_seq() ) {
185 $name = $aln->displayname($rseq->get_nse());
186 $seq = $rseq->seq();
187 $desc = $rseq->description || '';
188 $self->_print (">$name $desc\n") or return ;
189 $count =0;
190 $length = length($seq);
191 while( ($count * 60 ) < $length ) {
192 $seqsub = substr($seq,$count*60,60);
193 $self->_print ("$seqsub\n") or return ;
194 $count++;
198 $self->flush if $self->_flush_on_write && defined $self->_fh;
199 return 1;