[bug 2714]
[bioperl-live.git] / Bio / Das / SegmentI.pm
blob7f857bf565dbe50a0337882e322bf86e524ee849
1 # $Id$
3 # BioPerl module for Bio::Das::SegmentI
5 # Cared for by Lincoln Stein <lstein@cshl.org>
7 # Copyright Lincoln Stein
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::Das::SegmentI - DAS-style access to a feature database
17 =head1 SYNOPSIS
19 # Get a Bio::Das::SegmentI object from a Bio::DasI database...
21 $segment = $das->segment(-name=>'Landmark',
22 -start=>$start,
23 -end => $end);
25 @features = $segment->overlapping_features(-type=>['type1','type2']);
26 # each feature is a Bio::SeqFeatureI-compliant object
28 @features = $segment->contained_features(-type=>['type1','type2']);
30 @features = $segment->contained_in(-type=>['type1','type2']);
32 $stream = $segment->get_feature_stream(-type=>['type1','type2','type3'];
33 while (my $feature = $stream->next_seq) {
34 # do something with feature
37 $count = $segment->features_callback(-type=>['type1','type2','type3'],
38 -callback => sub { ... { }
41 =head1 DESCRIPTION
43 Bio::Das::SegmentI is a simplified alternative interface to sequence
44 annotation databases used by the distributed annotation system. In
45 this scheme, the genome is represented as a series of landmarks. Each
46 Bio::Das::SegmentI object ("segment") corresponds to a genomic region
47 defined by a landmark and a start and end position relative to that
48 landmark. A segment is created using the Bio::DasI segment() method.
50 Features can be filtered by the following attributes:
52 1) their location relative to the segment (whether overlapping,
53 contained within, or completely containing)
55 2) their type
57 3) other attributes using tag/value semantics
59 Access to the feature list uses three distinct APIs:
61 1) fetching entire list of features at a time
63 2) fetching an iterator across features
65 3) a callback
67 =head1 FEEDBACK
69 =head2 Mailing Lists
71 User feedback is an integral part of the evolution of this and other
72 Bioperl modules. Send your comments and suggestions preferably to one
73 of the Bioperl mailing lists. Your participation is much appreciated.
75 bioperl-l@bio.perl.org
77 =head2 Reporting Bugs
79 Report bugs to the Bioperl bug tracking system to help us keep track
80 the bugs and their resolution. Bug reports can be submitted via the
81 web:
83 http://bugzilla.open-bio.org/
85 =head1 AUTHOR - Lincoln Stein
87 Email lstein@cshl.org
89 =head1 APPENDIX
91 The rest of the documentation details each of the object
92 methods. Internal methods are usually preceded with a _
94 =cut
97 # Let the code begin...
99 package Bio::Das::SegmentI;
100 use strict;
103 # Object preamble - inherits from Bio::Root::RootI;
104 use base qw(Bio::Root::RootI);
106 =head2 seq_id
108 Title : seq_id
109 Usage : $ref = $s->seq_id
110 Function: return the ID of the landmark
111 Returns : a string
112 Args : none
113 Status : Public
115 =cut
117 sub seq_id { shift->throw_not_implemented }
119 =head2 display_name
121 Title : seq_name
122 Usage : $ref = $s->seq_name
123 Function: return the human-readable name for the landmark
124 Returns : a string
125 Args : none
126 Status : Public
128 This defaults to the same as seq_id.
130 =cut
132 sub display_name { shift->seq_id }
134 =head2 start
136 Title : start
137 Usage : $s->start
138 Function: start of segment
139 Returns : integer
140 Args : none
141 Status : Public
143 This is a read-only accessor for the start of the segment. Alias
144 to low() for Gadfly compatibility.
146 =cut
148 sub start { shift->throw_not_implemented }
149 sub low { shift->start }
151 =head2 end
153 Title : end
154 Usage : $s->end
155 Function: end of segment
156 Returns : integer
157 Args : none
158 Status : Public
160 This is a read-only accessor for the end of the segment. Alias to
161 high() for Gadfly compatibility.
163 =cut
165 sub end { shift->throw_not_implemented }
166 sub stop { shift->end }
167 sub high { shift->end }
169 =head2 length
171 Title : length
172 Usage : $s->length
173 Function: length of segment
174 Returns : integer
175 Args : none
176 Status : Public
178 Returns the length of the segment. Always a positive number.
180 =cut
182 sub length { shift->throw_not_implemented; }
184 =head2 seq
186 Title : seq
187 Usage : $s->seq
188 Function: get the sequence string for this segment
189 Returns : a string
190 Args : none
191 Status : Public
193 Returns the sequence for this segment as a simple string.
195 =cut
197 sub seq {shift->throw_not_implemented}
199 =head2 ref
201 Title : ref
202 Usage : $ref = $s->ref([$newlandmark])
203 Function: get/set the reference landmark for addressing
204 Returns : a string
205 Args : none
206 Status : Public
208 This method is used to examine/change the reference landmark used to
209 establish the coordinate system. By default, the landmark cannot be
210 changed and therefore this has the same effect as seq_id(). The new
211 landmark might be an ID, or another Das::SegmentI object.
213 =cut
215 sub ref { shift->seq_id }
216 *refseq = \&ref;
218 =head2 absolute
220 Title : absolute
221 Usage : $s->absolute([$new_value])
222 Function: get/set absolute addressing mode
223 Returns : flag
224 Args : new flag (optional)
225 Status : Public
227 Turn on and off absolute-addressing mode. In absolute addressing
228 mode, coordinates are relative to some underlying "top level"
229 coordinate system (such as a chromosome). ref() returns the identity
230 of the top level landmark, and start() and end() return locations
231 relative to that landmark. In relative addressing mode, coordinates
232 are relative to the landmark sequence specified at the time of segment
233 creation or later modified by the ref() method.
235 The default is to return false and to do nothing in response to
236 attempts to set absolute addressing mode.
238 =cut
240 sub absolute { return }
242 =head2 features
244 Title : features
245 Usage : @features = $s->features(@args)
246 Function: get features that overlap this segment
247 Returns : a list of Bio::SeqFeatureI objects
248 Args : see below
249 Status : Public
251 This method will find all features that intersect the segment in a
252 variety of ways and return a list of Bio::SeqFeatureI objects. The
253 feature locations will use coordinates relative to the reference
254 sequence in effect at the time that features() was called.
256 The returned list can be limited to certain types, attributes or
257 range intersection modes. Types of range intersection are one of:
259 "overlaps" the default
260 "contains" return features completely contained within the segment
261 "contained_in" return features that completely contain the segment
263 Two types of argument lists are accepted. In the positional argument
264 form, the arguments are treated as a list of feature types. In the
265 named parameter form, the arguments are a series of -name=E<gt>value
266 pairs.
268 Argument Description
269 -------- ------------
271 -types An array reference to type names in the format
272 "method:source"
274 -attributes A hashref containing a set of attributes to match
276 -rangetype One of "overlaps", "contains", or "contained_in".
278 -iterator Return an iterator across the features.
280 -callback A callback to invoke on each feature
282 The -attributes argument is a hashref containing one or more
283 attributes to match against:
285 -attributes => { Gene => 'abc-1',
286 Note => 'confirmed' }
288 Attribute matching is simple string matching, and multiple attributes
289 are ANDed together. More complex filtering can be performed using the
290 -callback option (see below).
292 If -iterator is true, then the method returns an object reference that
293 implements the next_seq() method. Each call to next_seq() returns a
294 new Bio::SeqFeatureI object.
296 If -callback is passed a code reference, the code reference will be
297 invoked on each feature returned. The code will be passed two
298 arguments consisting of the current feature and the segment object
299 itself, and must return a true value. If the code returns a false
300 value, feature retrieval will be aborted.
302 -callback and -iterator are mutually exclusive options. If -iterator
303 is defined, then -callback is ignored.
305 NOTE: the following methods all build on top of features(), and do not
306 need to be explicitly implemented.
308 overlapping_features()
309 contained_features()
310 contained_in()
311 get_feature_stream()
313 =cut
315 sub features {shift->throw_not_implemented}
317 =head2 overlapping_features
319 Title : overlapping_features
320 Usage : @features = $s->overlapping_features(@args)
321 Function: get features that overlap this segment
322 Returns : a list of Bio::SeqFeatureI objects
323 Args : see below
324 Status : Public
326 This method is identical to features() except that it defaults to
327 finding overlapping features.
329 =cut
331 sub overlapping_features {
332 my $self = shift;
333 my @args = $_[0] =~ /^-/ ? (@_, -rangetype=>'overlaps')
334 : (-types=>\@_,-rangetype=>'overlaps');
335 $self->features(@args);
338 =head2 contained_features
340 Title : contained_features
341 Usage : @features = $s->contained_features(@args)
342 Function: get features that are contained in this segment
343 Returns : a list of Bio::SeqFeatureI objects
344 Args : see below
345 Status : Public
347 This method is identical to features() except that it defaults to
348 a range type of 'contained'.
350 =cut
352 sub contained_features {
353 my $self = shift;
354 my @args = $_[0] =~ /^-/ ? (@_, -rangetype=>'contained')
355 : (-types=>\@_,-rangetype=>'contained');
356 $self->features(@args);
359 =head2 contained_in
361 Title : contained_in
362 Usage : @features = $s->contained_in(@args)
363 Function: get features that contain this segment
364 Returns : a list of Bio::SeqFeatureI objects
365 Args : see below
366 Status : Public
368 This method is identical to features() except that it defaults to
369 a range type of 'contained_in'.
371 =cut
373 sub contained_in {
374 my $self = shift;
375 my @args = $_[0] =~ /^-/ ? (@_, -rangetype=>'contained_in')
376 : (-types=>\@_,-rangetype=>'contained_in');
377 $self->features(@args);
380 =head2 get_feature_stream
382 Title : get_feature_stream
383 Usage : $iterator = $s->get_feature_stream(@args)
384 Function: get an iterator across the segment
385 Returns : an object that implements next_seq()
386 Args : see below
387 Status : Public
389 This method is identical to features() except that it always generates
390 an iterator.
392 NOTE: This is defined in the interface in terms of features(). You do not
393 have to implement it.
395 =cut
397 sub get_feature_stream {
398 my $self = shift;
399 my @args = defined $_[0] && $_[0] =~ /^-/ ? (@_, -iterator=>1)
400 : (-types=>\@_,-iterator=>1);
401 $self->features(@args);
404 =head2 factory
406 Title : factory
407 Usage : $factory = $s->factory
408 Function: return the segment factory
409 Returns : a Bio::DasI object
410 Args : see below
411 Status : Public
413 This method returns a Bio::DasI object that can be used to fetch
414 more segments. This is typically the Bio::DasI object from which
415 the segment was originally generated.
417 =cut
421 sub factory {shift->throw_not_implemented}
423 =head2 primary_tag
425 Title : primary_tag
426 Usage : $tag = $s->primary_tag
427 Function: identifies the segment as type "DasSegment"
428 Returns : a string named "DasSegment"
429 Args : none
430 Status : Public, but see below
432 This method provides Bio::Das::Segment objects with a primary_tag()
433 field that identifies them as being of type "DasSegment". This allows
434 the Bio::Graphics engine to render segments just like a feature in order
435 nis way useful.
437 This does not need to be implemented. It is defined by the interface.
439 =cut
443 sub primary_tag {"DasSegment"}
445 =head2 strand
447 Title : strand
448 Usage : $strand = $s->strand
449 Function: identifies the segment strand as 0
450 Returns : the number 0
451 Args : none
452 Status : Public, but see below
454 This method provides Bio::Das::Segment objects with a strand() field
455 that identifies it as being strandless. This allows the Bio::Graphics
456 engine to render segments just like a feature in order nis way useful.
458 This does not need to be implemented. It is defined by the interface.
460 =cut
462 sub strand { 0 }