1 # BioPerl module for Bio::Seq::LargeLocatableSeq
3 # Please direct questions and support issues to <bioperl-l@bioperl.org>
5 # Cared for by Albert Vilella
7 # based on the Bio::LargePrimarySeq module
8 # by Ewan Birney <birney@sanger.ac.uk>
10 # and the Bio::LocatableSeq module
11 # by Ewan Birney <birney@sanger.ac.uk>
13 # Copyright Albert Vilella
15 # You may distribute this module under the same terms as perl itself
17 # POD documentation - main docs before the code
21 Bio::Seq::LargeLocatableSeq - LocatableSeq object that stores sequence as
26 # normal primary seq usage
27 use Bio::Seq::LargeLocatableSeq;
28 my $seq = Bio::Seq::LargeLocatableSeq->new(-seq => "CAGT-GGT",
35 Bio::Seq::LargeLocatableSeq - object with start/end points on it that
36 can be projected into a MSA or have coordinates relative to another
39 This object, unlike Bio::LocatableSeq, stores a sequence as a series
40 of files in a temporary directory. The aim is to allow someone the
41 ability to store very large sequences (eg, E<gt> 100MBases) in a file
42 system without running out of memory (eg, on a 64 MB real memory
45 Of course, to actually make use of this functionality, the programs
46 which use this object B<must> not call $primary_seq-E<gt>seq otherwise
47 the entire sequence will come out into memory and probably crash your
48 machine. However, calls like $primary_seq-E<gt>subseq(10,100) will cause
49 only 90 characters to be brought into real memory.
55 User feedback is an integral part of the evolution of this and other
56 Bioperl modules. Send your comments and suggestions preferably to
57 the Bioperl mailing list. Your participation is much appreciated.
59 bioperl-l@bioperl.org - General discussion
60 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
64 Please direct usage questions or support issues to the mailing list:
66 I<bioperl-l@bioperl.org>
68 rather than to the module maintainer directly. Many experienced and
69 reponsive experts will be able look at the problem and quickly
70 address it. Please include a thorough description of the problem
71 with code and data examples if at all possible.
75 Report bugs to the Bioperl bug tracking system to help us keep track
76 of the bugs and their resolution. Bug reports can be submitted via
79 https://redmine.open-bio.org/projects/bioperl/
81 =head1 AUTHOR - Albert Vilella
83 Email avilella-AT-gmail-DOT-com
87 The rest of the documentation details each of the object methods.
88 Internal methods are usually preceded with a _
93 # Let the code begin...
95 package Bio
::Seq
::LargeLocatableSeq
;
96 use vars
qw($AUTOLOAD);
100 use base qw(Bio::Seq::LargePrimarySeq Bio::LocatableSeq Bio::Root::IO);
106 Usage : my $obj = Bio::Seq::LargeLocatableSeq->new();
107 Function: Builds a new Bio::Seq::LargeLocatableSeq object
108 Returns : an instance of Bio::Seq::LargeLocatableSeq
115 my ($class, %params) = @_;
117 # don't let PrimarySeq set seq until we have
120 my $seq = $params{'-seq'} || $params{'-SEQ'};
122 delete $params{'-seq'};
123 delete $params{'-SEQ'};
125 my $self = $class->SUPER::new
(%params);
126 my $mapping = exists $params{'-mapping'} ?
$params{'-mapping'} : [1,1];
127 $self->mapping($mapping);
128 $self->_initialize_io(%params);
129 my $tempdir = $self->tempdir( CLEANUP
=> 1);
130 my ($tfh,$file) = $self->tempfile( DIR
=> $tempdir );
132 $tfh && $self->_fh($tfh);
133 $file && $self->_filename($file);
135 $seq && $self->seq($seq);
154 my ($obj,$value) = @_;
155 if( defined $value) {
156 $obj->{'length'} = $value;
159 return (defined $obj->{'length'}) ?
$obj->{'length'} : 0;
175 my ($self, $data) = @_;
176 if( defined $data ) {
177 if( $self->length() == 0) {
178 $self->add_sequence_as_string($data);
180 $self->warn("Trying to reset the seq string, cannot do this with a LargeLocatableSeq - must allocate a new object");
183 return $self->subseq(1,$self->length);
200 my ($self,$start,$end) = @_;
202 my $fh = $self->_fh();
204 if( ref($start) && $start->isa('Bio::LocationI') ) {
206 if( $loc->length == 0 ) {
207 $self->warn("Expect location lengths to be > 0");
209 } elsif( $loc->end < $loc->start ) {
210 # what about circular seqs
211 $self->warn("Expect location start to come before location end");
214 if( $loc->isa('Bio::Location::SplitLocationI') ) {
215 foreach my $subloc ( $loc->sub_Location ) {
216 if(! seek($fh,$subloc->start() - 1,0)) {
217 $self->throw("Unable to seek on file $start:$end $!");
219 my $ret = read($fh, $string, $subloc->length());
220 if( !defined $ret ) {
221 $self->throw("Unable to read $start:$end $!");
223 if( $subloc->strand < 0 ) {
224 # $string = Bio::PrimarySeq->new(-seq => $string)->revcom()->seq();
225 $string = Bio
::Seq
::LargePrimarySeq
->new(-seq
=> $string)->revcom()->seq();
230 if(! seek($fh,$loc->start()-1,0)) {
231 $self->throw("Unable to seek on file ".$loc->start.":".
234 my $ret = read($fh, $string, $loc->length());
235 if( !defined $ret ) {
236 $self->throw("Unable to read ".$loc->start.":".
241 if( defined $loc->strand &&
243 # $seq = Bio::PrimarySeq->new(-seq => $string)->revcom()->seq();
244 $seq = Bio
::Seq
::LargePrimarySeq
->new(-seq
=> $seq)->revcom()->seq();
248 if( $start <= 0 || $end > $self->length ) {
249 $self->throw("Attempting to get a subseq out of range $start:$end vs ".
252 if( $end < $start ) {
253 $self->throw("Attempting to subseq with end ($end) less than start ($start). To revcom use the revcom function with trunc");
256 if(! seek($fh,$start-1,0)) {
257 $self->throw("Unable to seek on file $start:$end $!");
259 my $ret = read($fh, $string, $end-$start+1);
260 if( !defined $ret ) {
261 $self->throw("Unable to read $start:$end $!");
267 =head2 add_sequence_as_string
269 Title : add_sequence_as_string
270 Usage : $seq->add_sequence_as_string("CATGAT");
271 Function: Appends additional residues to an existing LargeLocatableSeq object.
272 This allows one to build up a large sequence without storing
273 entire object in memory.
274 Returns : Current length of sequence
275 Args : string to append
279 sub add_sequence_as_string
{
280 my ($self,$str) = @_;
281 my $len = $self->length + CORE
::length($str);
282 my $fh = $self->_fh();
283 if(! seek($fh,0,2)) {
284 $self->throw("Unable to seek end of file: $!");
294 Usage : $obj->_filename($newval)
297 Returns : value of _filename
298 Args : newvalue (optional)
304 my ($obj,$value) = @_;
305 if( defined $value) {
306 $obj->{'_filename'} = $value;
308 return $obj->{'_filename'};
316 Usage : $obj->alphabet($newval)
319 Returns : value of alphabet
320 Args : newvalue (optional)
326 my ($self,$value) = @_;
327 if( defined $value) {
328 $self->SUPER::alphabet
($value);
330 return $self->SUPER::alphabet
() || 'dna';
336 my $fh = $self->_fh();
337 close($fh) if( defined $fh );
338 # this should be handled by Tempfile removal, but we'll unlink anyways.
339 unlink $self->_filename() if defined $self->_filename() && -e
$self->_filename;
340 $self->SUPER::DESTROY
();