see if Bio::DB::Sam package will work for tests
[bioperl-live.git] / Bio / Seq / LargeLocatableSeq.pm
blobac7f3bbcc31af0e5b5b9697177c9c3f2d33706c4
1 # BioPerl module for Bio::Seq::LargeLocatableSeq
3 # Please direct questions and support issues to <bioperl-l@bioperl.org>
5 # Cared for by Albert Vilella
7 # based on the Bio::LargePrimarySeq module
8 # by Ewan Birney <birney@sanger.ac.uk>
10 # and the Bio::LocatableSeq module
11 # by Ewan Birney <birney@sanger.ac.uk>
13 # Copyright Albert Vilella
15 # You may distribute this module under the same terms as perl itself
17 # POD documentation - main docs before the code
19 =head1 NAME
21 Bio::Seq::LargeLocatableSeq - LocatableSeq object that stores sequence as
22 files in the tempdir
24 =head1 SYNOPSIS
26 # normal primary seq usage
27 use Bio::Seq::LargeLocatableSeq;
28 my $seq = Bio::Seq::LargeLocatableSeq->new(-seq => "CAGT-GGT",
29 -id => "seq1",
30 -start => 1,
31 -end => 7);
33 =head1 DESCRIPTION
35 Bio::Seq::LargeLocatableSeq - object with start/end points on it that
36 can be projected into a MSA or have coordinates relative to another
37 seq.
39 This object, unlike Bio::LocatableSeq, stores a sequence as a series
40 of files in a temporary directory. The aim is to allow someone the
41 ability to store very large sequences (eg, E<gt> 100MBases) in a file
42 system without running out of memory (eg, on a 64 MB real memory
43 machine!).
45 Of course, to actually make use of this functionality, the programs
46 which use this object B<must> not call $primary_seq-E<gt>seq otherwise
47 the entire sequence will come out into memory and probably crash your
48 machine. However, calls like $primary_seq-E<gt>subseq(10,100) will cause
49 only 90 characters to be brought into real memory.
51 =head1 FEEDBACK
53 =head2 Mailing Lists
55 User feedback is an integral part of the evolution of this and other
56 Bioperl modules. Send your comments and suggestions preferably to
57 the Bioperl mailing list. Your participation is much appreciated.
59 bioperl-l@bioperl.org - General discussion
60 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
62 =head2 Support
64 Please direct usage questions or support issues to the mailing list:
66 I<bioperl-l@bioperl.org>
68 rather than to the module maintainer directly. Many experienced and
69 reponsive experts will be able look at the problem and quickly
70 address it. Please include a thorough description of the problem
71 with code and data examples if at all possible.
73 =head2 Reporting Bugs
75 Report bugs to the Bioperl bug tracking system to help us keep track
76 of the bugs and their resolution. Bug reports can be submitted via
77 the web:
79 https://redmine.open-bio.org/projects/bioperl/
81 =head1 AUTHOR - Albert Vilella
83 Email avilella-AT-gmail-DOT-com
85 =head1 APPENDIX
87 The rest of the documentation details each of the object methods.
88 Internal methods are usually preceded with a _
90 =cut
93 # Let the code begin...
95 package Bio::Seq::LargeLocatableSeq;
96 use vars qw($AUTOLOAD);
97 use strict;
100 use base qw(Bio::Seq::LargePrimarySeq Bio::LocatableSeq Bio::Root::IO);
103 =head2 new
105 Title : new
106 Usage : my $obj = Bio::Seq::LargeLocatableSeq->new();
107 Function: Builds a new Bio::Seq::LargeLocatableSeq object
108 Returns : an instance of Bio::Seq::LargeLocatableSeq
109 Args :
112 =cut
114 sub new {
115 my ($class, %params) = @_;
117 # don't let PrimarySeq set seq until we have
118 # opened filehandle
120 my $seq = $params{'-seq'} || $params{'-SEQ'};
121 if($seq ) {
122 delete $params{'-seq'};
123 delete $params{'-SEQ'};
125 my $self = $class->SUPER::new(%params);
126 my $mapping = exists $params{'-mapping'} ? $params{'-mapping'} : [1,1];
127 $self->mapping($mapping);
128 $self->_initialize_io(%params);
129 my $tempdir = $self->tempdir( CLEANUP => 1);
130 my ($tfh,$file) = $self->tempfile( DIR => $tempdir );
132 $tfh && $self->_fh($tfh);
133 $file && $self->_filename($file);
134 $self->length(0);
135 $seq && $self->seq($seq);
137 return $self;
141 =head2 length
143 Title : length
144 Usage :
145 Function:
146 Example :
147 Returns :
148 Args :
151 =cut
153 sub length {
154 my ($obj,$value) = @_;
155 if( defined $value) {
156 $obj->{'length'} = $value;
159 return (defined $obj->{'length'}) ? $obj->{'length'} : 0;
162 =head2 seq
164 Title : seq
165 Usage :
166 Function:
167 Example :
168 Returns :
169 Args :
172 =cut
174 sub seq {
175 my ($self, $data) = @_;
176 if( defined $data ) {
177 if( $self->length() == 0) {
178 $self->add_sequence_as_string($data);
179 } else {
180 $self->warn("Trying to reset the seq string, cannot do this with a LargeLocatableSeq - must allocate a new object");
183 return $self->subseq(1,$self->length);
187 =head2 subseq
189 Title : subseq
190 Usage :
191 Function:
192 Example :
193 Returns :
194 Args :
197 =cut
199 sub subseq{
200 my ($self,$start,$end) = @_;
201 my $string;
202 my $fh = $self->_fh();
204 if( ref($start) && $start->isa('Bio::LocationI') ) {
205 my $loc = $start;
206 if( $loc->length == 0 ) {
207 $self->warn("Expect location lengths to be > 0");
208 return '';
209 } elsif( $loc->end < $loc->start ) {
210 # what about circular seqs
211 $self->warn("Expect location start to come before location end");
213 my $seq = '';
214 if( $loc->isa('Bio::Location::SplitLocationI') ) {
215 foreach my $subloc ( $loc->sub_Location ) {
216 if(! seek($fh,$subloc->start() - 1,0)) {
217 $self->throw("Unable to seek on file $start:$end $!");
219 my $ret = read($fh, $string, $subloc->length());
220 if( !defined $ret ) {
221 $self->throw("Unable to read $start:$end $!");
223 if( $subloc->strand < 0 ) {
224 # $string = Bio::PrimarySeq->new(-seq => $string)->revcom()->seq();
225 $string = Bio::Seq::LargePrimarySeq->new(-seq => $string)->revcom()->seq();
227 $seq .= $string;
229 } else {
230 if(! seek($fh,$loc->start()-1,0)) {
231 $self->throw("Unable to seek on file ".$loc->start.":".
232 $loc->end ." $!");
234 my $ret = read($fh, $string, $loc->length());
235 if( !defined $ret ) {
236 $self->throw("Unable to read ".$loc->start.":".
237 $loc->end ." $!");
239 $seq = $string;
241 if( defined $loc->strand &&
242 $loc->strand < 0 ) {
243 # $seq = Bio::PrimarySeq->new(-seq => $string)->revcom()->seq();
244 $seq = Bio::Seq::LargePrimarySeq->new(-seq => $seq)->revcom()->seq();
246 return $seq;
248 if( $start <= 0 || $end > $self->length ) {
249 $self->throw("Attempting to get a subseq out of range $start:$end vs ".
250 $self->length);
252 if( $end < $start ) {
253 $self->throw("Attempting to subseq with end ($end) less than start ($start). To revcom use the revcom function with trunc");
256 if(! seek($fh,$start-1,0)) {
257 $self->throw("Unable to seek on file $start:$end $!");
259 my $ret = read($fh, $string, $end-$start+1);
260 if( !defined $ret ) {
261 $self->throw("Unable to read $start:$end $!");
263 return $string;
267 =head2 add_sequence_as_string
269 Title : add_sequence_as_string
270 Usage : $seq->add_sequence_as_string("CATGAT");
271 Function: Appends additional residues to an existing LargeLocatableSeq object.
272 This allows one to build up a large sequence without storing
273 entire object in memory.
274 Returns : Current length of sequence
275 Args : string to append
277 =cut
279 sub add_sequence_as_string{
280 my ($self,$str) = @_;
281 my $len = $self->length + CORE::length($str);
282 my $fh = $self->_fh();
283 if(! seek($fh,0,2)) {
284 $self->throw("Unable to seek end of file: $!");
286 $self->_print($str);
287 $self->length($len);
291 =head2 _filename
293 Title : _filename
294 Usage : $obj->_filename($newval)
295 Function:
296 Example :
297 Returns : value of _filename
298 Args : newvalue (optional)
301 =cut
303 sub _filename{
304 my ($obj,$value) = @_;
305 if( defined $value) {
306 $obj->{'_filename'} = $value;
308 return $obj->{'_filename'};
313 =head2 alphabet
315 Title : alphabet
316 Usage : $obj->alphabet($newval)
317 Function:
318 Example :
319 Returns : value of alphabet
320 Args : newvalue (optional)
323 =cut
325 sub alphabet{
326 my ($self,$value) = @_;
327 if( defined $value) {
328 $self->SUPER::alphabet($value);
330 return $self->SUPER::alphabet() || 'dna';
334 sub DESTROY {
335 my $self = shift;
336 my $fh = $self->_fh();
337 close($fh) if( defined $fh );
338 # this should be handled by Tempfile removal, but we'll unlink anyways.
339 unlink $self->_filename() if defined $self->_filename() && -e $self->_filename;
340 $self->SUPER::DESTROY();