t/AlignIO/AlignIO.t: fix number of tests in plan (fixup c523e6bed866)
[bioperl-live.git] / Bio / Seq / LargePrimarySeq.pm
bloba6ddccd12ee1c3772ef5b49f507c29668f0fc3dc
2 # BioPerl module for Bio::Seq::LargePrimarySeq
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Ewan Birney <birney@ebi.ac.uk>
8 # Copyright Ewan Birney
10 # You may distribute this module under the same terms as perl itself
12 # updated to utilize File::Temp - jason 2000-12-12
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::Seq::LargePrimarySeq - PrimarySeq object that stores sequence as
18 files in the tempdir (as found by File::Temp) or the default method in
19 Bio::Root::Root
21 =head1 SYNOPSIS
23 # normal primary seq usage
25 =head1 DESCRIPTION
27 This object stores a sequence as a series of files in a temporary
28 directory. The aim is to allow someone the ability to store very large
29 sequences (eg, E<gt> 100MBases) in a file system without running out
30 of memory (eg, on a 64 MB real memory machine!).
32 Of course, to actually make use of this functionality, the programs
33 which use this object B<must> not call $primary_seq-E<gt>seq otherwise
34 the entire sequence will come out into memory and probably paste your
35 machine. However, calls $primary_seq-E<gt>subseq(10,100) will cause
36 only 90 characters to be brought into real memory.
38 =head1 FEEDBACK
40 =head2 Mailing Lists
42 User feedback is an integral part of the evolution of this and other
43 Bioperl modules. Send your comments and suggestions preferably to one
44 of the Bioperl mailing lists. Your participation is much appreciated.
46 bioperl-l@bioperl.org - General discussion
47 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
49 =head2 Support
51 Please direct usage questions or support issues to the mailing list:
53 I<bioperl-l@bioperl.org>
55 rather than to the module maintainer directly. Many experienced and
56 reponsive experts will be able look at the problem and quickly
57 address it. Please include a thorough description of the problem
58 with code and data examples if at all possible.
60 =head2 Reporting Bugs
62 Report bugs to the Bioperl bug tracking system to help us keep track
63 the bugs and their resolution. Bug reports can be submitted via the
64 web:
66 https://github.com/bioperl/bioperl-live/issues
68 =head1 AUTHOR - Ewan Birney, Jason Stajich
70 Email birney@ebi.ac.uk
71 Email jason@bioperl.org
73 =head1 APPENDIX
75 The rest of the documentation details each of the object
76 methods. Internal methods are usually preceded with a _
78 =cut
81 # Let the code begin...
84 package Bio::Seq::LargePrimarySeq;
85 use vars qw($AUTOLOAD);
86 use strict;
89 use base qw(Bio::PrimarySeq Bio::Root::IO Bio::Seq::LargeSeqI);
91 sub new {
92 my ($class, %params) = @_;
94 # don't let PrimarySeq set seq until we have
95 # opened filehandle
97 my $seq = $params{'-seq'} || $params{'-SEQ'};
98 if($seq ) {
99 delete $params{'-seq'};
100 delete $params{'-SEQ'};
102 my $self = $class->SUPER::new(%params);
103 $self->_initialize_io(%params);
104 my $tempdir = $self->tempdir( CLEANUP => 1);
105 my ($tfh,$file) = $self->tempfile( DIR => $tempdir );
106 $self->{tempdir} = $tempdir;
107 $tfh && $self->_fh($tfh);
108 $file && $self->_filename($file);
109 $self->length(0);
110 $seq && $self->seq($seq);
112 return $self;
116 =head2 length
118 Title : length
119 Usage :
120 Function:
121 Example :
122 Returns :
123 Args :
126 =cut
128 sub length {
129 my ($obj,$value) = @_;
130 if( defined $value) {
131 $obj->{'length'} = $value;
133 return (defined $obj->{'length'}) ? $obj->{'length'} : 0;
136 =head2 seq
138 Title : seq
139 Usage :
140 Function:
141 Example :
142 Returns :
143 Args :
146 =cut
148 sub seq {
149 my ($self, $data) = @_;
150 if( defined $data ) {
151 if( $self->length() == 0) {
152 $self->add_sequence_as_string($data);
153 } else {
154 $self->warn("Trying to reset the seq string, cannot do this with a LargePrimarySeq - must allocate a new object");
157 return $self->subseq(1,$self->length);
160 =head2 subseq
162 Title : subseq
163 Usage :
164 Function:
165 Example :
166 Returns :
167 Args :
170 =cut
172 sub subseq{
173 my ($self,$start,$end) = @_;
174 my $string;
175 my $fh = $self->_fh();
177 if( ref($start) && $start->isa('Bio::LocationI') ) {
178 my $loc = $start;
179 if( $loc->length == 0 ) {
180 $self->warn("Expect location lengths to be > 0");
181 return '';
182 } elsif( $loc->end < $loc->start ) {
183 # what about circular seqs
184 $self->warn("Expect location start to come before location end");
186 my $seq = '';
187 if( $loc->isa('Bio::Location::SplitLocationI') ) {
188 foreach my $subloc ( $loc->sub_Location ) {
189 if(! seek($fh,$subloc->start() - 1,0)) {
190 $self->throw("Unable to seek on file $start:$end $!");
192 my $ret = read($fh, $string, $subloc->length());
193 if( !defined $ret ) {
194 $self->throw("Unable to read $start:$end $!");
196 if( $subloc->strand < 0 ) {
197 $string = Bio::PrimarySeq->new(-seq => $string)->revcom()->seq();
199 $seq .= $string;
201 } else {
202 if(! seek($fh,$loc->start()-1,0)) {
203 $self->throw("Unable to seek on file ".$loc->start.":".
204 $loc->end ." $!");
206 my $ret = read($fh, $string, $loc->length());
207 if( !defined $ret ) {
208 $self->throw("Unable to read ".$loc->start.":".
209 $loc->end ." $!");
211 $seq = $string;
213 if( defined $loc->strand &&
214 $loc->strand < 0 ) {
215 $seq = Bio::PrimarySeq->new(-seq => $seq)->revcom()->seq();
217 return $seq;
219 if( $start <= 0 || $end > $self->length ) {
220 $self->throw("Attempting to get a subseq out of range $start:$end vs ".
221 $self->length);
223 if( $end < $start ) {
224 $self->throw("Attempting to subseq with end ($end) less than start ($start). To revcom use the revcom function with trunc");
227 if(! seek($fh,$start-1,0)) {
228 $self->throw("Unable to seek on file $start:$end $!");
230 my $ret = read($fh, $string, $end-$start+1);
231 if( !defined $ret ) {
232 $self->throw("Unable to read $start:$end $!");
234 return $string;
237 =head2 add_sequence_as_string
239 Title : add_sequence_as_string
240 Usage : $seq->add_sequence_as_string("CATGAT");
241 Function: Appends additional residues to an existing LargePrimarySeq object.
242 This allows one to build up a large sequence without storing
243 entire object in memory.
244 Returns : Current length of sequence
245 Args : string to append
247 =cut
249 sub add_sequence_as_string{
250 my ($self,$str) = @_;
251 my $len = $self->length + CORE::length($str);
252 my $fh = $self->_fh();
253 if(! seek($fh,0,2)) {
254 $self->throw("Unable to seek end of file: $!");
256 $self->_print($str);
257 $self->length($len);
261 =head2 _filename
263 Title : _filename
264 Usage : $obj->_filename($newval)
265 Function:
266 Example :
267 Returns : value of _filename
268 Args : newvalue (optional)
271 =cut
273 sub _filename{
274 my ($obj,$value) = @_;
275 if( defined $value) {
276 $obj->{'_filename'} = $value;
278 return $obj->{'_filename'};
283 =head2 alphabet
285 Title : alphabet
286 Usage : $obj->alphabet($newval)
287 Function:
288 Example :
289 Returns : value of alphabet
290 Args : newvalue (optional)
293 =cut
295 sub alphabet{
296 my ($self,$value) = @_;
297 if( defined $value) {
298 $self->SUPER::alphabet($value);
300 return $self->SUPER::alphabet() || 'dna';
304 sub DESTROY {
305 my $self = shift;
306 my $fh = $self->_fh();
307 close($fh) if( defined $fh );
308 # this should be handled by Tempfile removal, but we'll unlink anyways.
309 unlink $self->_filename() if defined $self->_filename() && -e $self->_filename;
310 # remove tempdirs as well
311 rmdir $self->{tempdir} if defined $self->{tempdir} && -e $self->{tempdir};
312 $self->SUPER::DESTROY();