add Codeml pairwise result for PAML4
[bioperl-live.git] / Bio / Seq / LargeLocatableSeq.pm
blob79bcca13caa480b65869a8a4b927f394ee3d3ed2
2 # BioPerl module for Bio::Seq::LargeLocatableSeq
4 # Cared for by Albert Vilella
6 # based on the Bio::LargePrimarySeq module
7 # by Ewan Birney <birney@sanger.ac.uk>
9 # and the Bio::LocatableSeq module
10 # by Ewan Birney <birney@sanger.ac.uk>
12 # Copyright Albert Vilella
14 # You may distribute this module under the same terms as perl itself
16 # POD documentation - main docs before the code
18 =head1 NAME
20 Bio::Seq::LargeLocatableSeq - LocatableSeq object that stores sequence as
21 files in the tempdir
23 =head1 SYNOPSIS
25 # normal primary seq usage
26 use Bio::Seq::LargeLocatableSeq;
27 my $seq = Bio::Seq::LargeLocatableSeq->new(-seq => "CAGT-GGT",
28 -id => "seq1",
29 -start => 1,
30 -end => 7);
32 =head1 DESCRIPTION
34 Bio::Seq::LargeLocatableSeq - object with start/end points on it that
35 can be projected into a MSA or have coordinates relative to another
36 seq.
38 This object, unlike Bio::LocatableSeq, stores a sequence as a series
39 of files in a temporary directory. The aim is to allow someone the
40 ability to store very large sequences (eg, E<gt> 100MBases) in a file
41 system without running out of memory (eg, on a 64 MB real memory
42 machine!).
44 Of course, to actually make use of this functionality, the programs
45 which use this object B<must> not call $primary_seq-E<gt>seq otherwise
46 the entire sequence will come out into memory and probably crash your
47 machine. However, calls like $primary_seq-E<gt>subseq(10,100) will cause
48 only 90 characters to be brought into real memory.
50 =head1 FEEDBACK
52 =head2 Mailing Lists
54 User feedback is an integral part of the evolution of this and other
55 Bioperl modules. Send your comments and suggestions preferably to
56 the Bioperl mailing list. Your participation is much appreciated.
58 bioperl-l@bioperl.org - General discussion
59 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
61 =head2 Reporting Bugs
63 Report bugs to the Bioperl bug tracking system to help us keep track
64 of the bugs and their resolution. Bug reports can be submitted via
65 the web:
67 http://bugzilla.open-bio.org/
69 =head1 AUTHOR - Albert Vilella
71 Email avilella-AT-gmail-DOT-com
73 =head1 APPENDIX
75 The rest of the documentation details each of the object methods.
76 Internal methods are usually preceded with a _
78 =cut
81 # Let the code begin...
83 package Bio::Seq::LargeLocatableSeq;
84 use vars qw($AUTOLOAD);
85 use strict;
88 use base qw(Bio::Seq::LargePrimarySeq Bio::LocatableSeq Bio::Root::IO);
91 =head2 new
93 Title : new
94 Usage : my $obj = Bio::Seq::LargeLocatableSeq->new();
95 Function: Builds a new Bio::Seq::LargeLocatableSeq object
96 Returns : an instance of Bio::Seq::LargeLocatableSeq
97 Args :
100 =cut
102 sub new {
103 my ($class, %params) = @_;
105 # don't let PrimarySeq set seq until we have
106 # opened filehandle
108 my $seq = $params{'-seq'} || $params{'-SEQ'};
109 if($seq ) {
110 delete $params{'-seq'};
111 delete $params{'-SEQ'};
113 my $self = $class->SUPER::new(%params);
114 $self->_initialize_io(%params);
115 my $tempdir = $self->tempdir( CLEANUP => 1);
116 my ($tfh,$file) = $self->tempfile( DIR => $tempdir );
118 $tfh && $self->_fh($tfh);
119 $file && $self->_filename($file);
120 $self->length(0);
121 $seq && $self->seq($seq);
123 return $self;
127 =head2 length
129 Title : length
130 Usage :
131 Function:
132 Example :
133 Returns :
134 Args :
137 =cut
139 sub length {
140 my ($obj,$value) = @_;
141 if( defined $value) {
142 $obj->{'length'} = $value;
145 return (defined $obj->{'length'}) ? $obj->{'length'} : 0;
148 =head2 seq
150 Title : seq
151 Usage :
152 Function:
153 Example :
154 Returns :
155 Args :
158 =cut
160 sub seq {
161 my ($self, $data) = @_;
162 if( defined $data ) {
163 if( $self->length() == 0) {
164 $self->add_sequence_as_string($data);
165 } else {
166 $self->warn("Trying to reset the seq string, cannot do this with a LargeLocatableSeq - must allocate a new object");
169 return $self->subseq(1,$self->length);
173 =head2 subseq
175 Title : subseq
176 Usage :
177 Function:
178 Example :
179 Returns :
180 Args :
183 =cut
185 sub subseq{
186 my ($self,$start,$end) = @_;
187 my $string;
188 my $fh = $self->_fh();
190 if( ref($start) && $start->isa('Bio::LocationI') ) {
191 my $loc = $start;
192 if( $loc->length == 0 ) {
193 $self->warn("Expect location lengths to be > 0");
194 return '';
195 } elsif( $loc->end < $loc->start ) {
196 # what about circular seqs
197 $self->warn("Expect location start to come before location end");
199 my $seq = '';
200 if( $loc->isa('Bio::Location::SplitLocationI') ) {
201 foreach my $subloc ( $loc->sub_Location ) {
202 if(! seek($fh,$subloc->start() - 1,0)) {
203 $self->throw("Unable to seek on file $start:$end $!");
205 my $ret = read($fh, $string, $subloc->length());
206 if( !defined $ret ) {
207 $self->throw("Unable to read $start:$end $!");
209 if( $subloc->strand < 0 ) {
210 # $string = Bio::PrimarySeq->new(-seq => $string)->revcom()->seq();
211 $string = Bio::Seq::LargePrimarySeq->new(-seq => $string)->revcom()->seq();
213 $seq .= $string;
215 } else {
216 if(! seek($fh,$loc->start()-1,0)) {
217 $self->throw("Unable to seek on file ".$loc->start.":".
218 $loc->end ." $!");
220 my $ret = read($fh, $string, $loc->length());
221 if( !defined $ret ) {
222 $self->throw("Unable to read ".$loc->start.":".
223 $loc->end ." $!");
225 $seq = $string;
227 if( defined $loc->strand &&
228 $loc->strand < 0 ) {
229 # $seq = Bio::PrimarySeq->new(-seq => $string)->revcom()->seq();
230 $seq = Bio::Seq::LargePrimarySeq->new(-seq => $seq)->revcom()->seq();
232 return $seq;
234 if( $start <= 0 || $end > $self->length ) {
235 $self->throw("Attempting to get a subseq out of range $start:$end vs ".
236 $self->length);
238 if( $end < $start ) {
239 $self->throw("Attempting to subseq with end ($end) less than start ($start). To revcom use the revcom function with trunc");
242 if(! seek($fh,$start-1,0)) {
243 $self->throw("Unable to seek on file $start:$end $!");
245 my $ret = read($fh, $string, $end-$start+1);
246 if( !defined $ret ) {
247 $self->throw("Unable to read $start:$end $!");
249 return $string;
253 =head2 add_sequence_as_string
255 Title : add_sequence_as_string
256 Usage : $seq->add_sequence_as_string("CATGAT");
257 Function: Appends additional residues to an existing LargeLocatableSeq object.
258 This allows one to build up a large sequence without storing
259 entire object in memory.
260 Returns : Current length of sequence
261 Args : string to append
263 =cut
265 sub add_sequence_as_string{
266 my ($self,$str) = @_;
267 my $len = $self->length + CORE::length($str);
268 my $fh = $self->_fh();
269 if(! seek($fh,0,2)) {
270 $self->throw("Unable to seek end of file: $!");
272 $self->_print($str);
273 $self->length($len);
277 =head2 _filename
279 Title : _filename
280 Usage : $obj->_filename($newval)
281 Function:
282 Example :
283 Returns : value of _filename
284 Args : newvalue (optional)
287 =cut
289 sub _filename{
290 my ($obj,$value) = @_;
291 if( defined $value) {
292 $obj->{'_filename'} = $value;
294 return $obj->{'_filename'};
299 =head2 alphabet
301 Title : alphabet
302 Usage : $obj->alphabet($newval)
303 Function:
304 Example :
305 Returns : value of alphabet
306 Args : newvalue (optional)
309 =cut
311 sub alphabet{
312 my ($self,$value) = @_;
313 if( defined $value) {
314 $self->SUPER::alphabet($value);
316 return $self->SUPER::alphabet() || 'dna';
321 =head2 end
323 Title : end
324 Usage : $obj->end($newval)
325 Function:
326 Returns : value of end
327 Args : newvalue (optional)
329 =cut
331 sub end {
332 my $self = shift;
333 if( @_ ) {
334 my $value = shift;
335 my $string = $self->seq;
336 if ($self->seq) {
337 my $len = $self->_ungapped_len;
338 my $id = $self->id;
339 $self->warn("In sequence $id residue count gives end value $len.
340 Overriding value [$value] with value $len for Bio::LargeLocatableSeq::end().")
341 and $value = $len if $len != $value and $self->verbose > 0;
344 $self->{'end'} = $value;
347 return $self->{'end'} || $self->_ungapped_len;
351 sub DESTROY {
352 my $self = shift;
353 my $fh = $self->_fh();
354 close($fh) if( defined $fh );
355 # this should be handled by Tempfile removal, but we'll unlink anyways.
356 unlink $self->_filename() if defined $self->_filename() && -e $self->_filename;
357 $self->SUPER::DESTROY();