1 # $Id: TagTree.pm 11693 2007-09-17 20:54:04Z cjfields $
3 # BioPerl module for Bio::Annotation::TagTree
5 # Cared for Chris Fields
7 # You may distribute this module under the same terms as perl itself.
8 # Refer to the Perl Artistic License (see the license accompanying this
9 # software package, or see http://www.perl.com/language/misc/Artistic.html)
10 # for the terms under which you may use, modify, and redistribute this module.
12 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
13 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
14 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16 # POD documentation - main docs before the code
20 Bio::Annotation::TagTree - AnnotationI with tree-like hierarchal key-value
21 relationships ('structured tags') that can be represented as simple text.
25 use Bio::Annotation::TagTree;
26 use Bio::Annotation::Collection;
28 my $col = Bio::Annotation::Collection->new();
30 # data structure can be an array reference with a data structure
31 # corresponding to that defined by Data::Stag:
33 my $sv = Bio::Annotation::TagTree->new(-tagname => 'mytag1',
34 -value => $data_structure);
35 $col->add_Annotation($sv);
37 # regular text passed is parsed based on the tagformat().
38 my $sv2 = Bio::Annotation::TagTree->new(-tagname => 'mytag2',
41 $col->add_Annotation($sv2);
45 This takes tagged data values and stores them in a hierarchal structured
46 element-value hierarchy (complements of Chris Mungall's Data::Stag module). Data
47 can then be represented as text using a variety of output formats (indention,
48 itext, xml, spxr). Furthermore, the data structure can be queried using various
49 means. See L<Data::Stag> for details.
51 Data passed in using value() or the '-value' parameter upon instantiation
54 1) an array reference corresponding to the data structure for Data::Stag;
56 2) a text string in 'xml', 'itext', 'spxr', or 'indent' format. The default
57 format is 'xml'; this can be changed using tagformat() prior to using value() or
58 by passing in the proper format using '-tagformat' upon instantiation;
60 3) another Bio::Annotation::TagTree or Data::Stag node instance. In both cases
61 a deep copy (duplicate) of the instance is generated.
63 Beyond checking for an array reference no format guessing occurs (so, for
64 roundtrip tests ensure that the IO formats correspond). For now, we recommend
65 when using text input to set tagformat() to one of these formats prior to data
66 loading to ensure the proper Data::Stag parser is selected. After data loading,
67 the tagformat() can be changed to change the text string format returned by
68 value(). (this may be rectified in the future)
70 This Annotation type is fully BioSQL compatible and could be considered a
71 temporary replacement for nested Bio::Annotation::Collections, at least until
72 BioSQL and bioperl-db can support nested annotation collections.
78 User feedback is an integral part of the evolution of this and other
79 Bioperl modules. Send your comments and suggestions preferably to one
80 of the Bioperl mailing lists. Your participation is much appreciated.
82 bioperl-l@bioperl.org - General discussion
83 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
87 Please direct usage questions or support issues to the mailing list:
89 I<bioperl-l@bioperl.org>
91 rather than to the module maintainer directly. Many experienced and
92 reponsive experts will be able look at the problem and quickly
93 address it. Please include a thorough description of the problem
94 with code and data examples if at all possible.
98 Report bugs to the Bioperl bug tracking system to help us keep track
99 the bugs and their resolution. Bug reports can be submitted via
102 https://redmine.open-bio.org/projects/bioperl/
110 The rest of the documentation details each of the object methods. Internal
111 methods are usually preceded with a _
115 # Let the code begin...
117 package Bio
::Annotation
::TagTree
;
120 # Object preamble - inherits from Bio::Root::Root
122 use base
qw(Bio::Annotation::SimpleValue);
128 Usage : my $sv = Bio::Annotation::TagTree->new();
129 Function: Instantiate a new TagTree object
130 Returns : Bio::Annotation::TagTree object
131 Args : -value => $value to initialize the object data field [optional]
132 -tagname => $tag to initialize the tagname [optional]
133 -tagformat => format for output [optional]
134 (types 'xml', 'itext', 'sxpr', 'indent', default = 'itext')
135 -node => Data::Stag node or Bio::Annotation::TagTree instance
140 my ( $class, @args ) = @_;
141 my $self = $class->SUPER::new
();
142 my ( $node, $value, $tag, $format, $verbose ) = $self->_rearrange(
153 $self->throw("Cant use both node and value; mutually exclusive")
154 if defined $node && defined $value;
155 defined $tag && $self->tagname($tag);
157 $self->tagformat($format);
158 defined $value && $self->value($value);
159 defined $node && $self->node($node);
160 defined $verbose && $self->verbose($verbose);
164 =head1 AnnotationI implementing functions
171 Usage : my $text = $obj->as_text
172 Function: return the string "Value: $v" where $v is the value
180 return "TagTree: " . $self->value;
186 Usage : my $str = $ann->display_text();
187 Function: returns a string. Unlike as_text(), this method returns a string
188 formatted as would be expected for the specific implementation.
190 One can pass a callback as an argument which allows custom text
191 generation; the callback is passed the current instance and any text
195 Args : [optional] callback
200 my $DEFAULT_CB = sub { $_[0]->value || '' };
203 my ( $self, $cb ) = @_;
205 $self->throw("Callback must be a code reference") if ref $cb ne 'CODE';
214 Usage : my $hashtree = $value->hash_tree
215 Function: For supporting the AnnotationI interface just returns the value
216 as a hashref with the key 'value' pointing to the value
217 Maybe reimplement using Data::Stag::hash()?
226 $h->{'value'} = $self->value;
232 Usage : $obj->tagname($newval)
233 Function: Get/set the tagname for this annotation value.
235 Setting this is optional. If set, it obviates the need to provide
236 a tag to AnnotationCollection when adding this object.
238 Returns : value of tagname (a scalar)
239 Args : new value (a scalar, optional)
244 my ( $self, $value ) = @_;
245 if ( defined $value ) {
246 $self->{'tagname'} = $value;
248 return $self->{'tagname'};
251 =head1 Specific accessors for TagTree
258 Usage : $obj->value($newval)
259 Function: Get/set the value for this annotation.
260 Returns : value of value
261 Args : newvalue (optional)
266 my ( $self, $value ) = @_;
268 # set mode? This resets the entire tagged database
269 my $format = $self->tagformat;
272 if ( ref $value eq 'ARRAY' ) {
274 # note the tagname() is not used here; it is only used for
275 # storing this AnnotationI in the annotation collection
276 eval { $self->{db
} = Data
::Stag
->nodify($value) };
280 # assuming this is blessed; passing on to node() and copy
281 $self->node( $value, 'copy' );
286 # not trying to guess here for now; we go by the tagformat() setting
287 my $h = Data
::Stag
->getformathandler($format);
288 eval { $self->{db
} = Data
::Stag
->from( $format . 'str', $value ) };
290 $self->throw("Data::Stag error:\n$@") if $@
;
294 # How do we return a data structure?
295 # for now, we use the output (if there is a Data::Stag node present)
296 # may need to run an eval {} to catch Data::Stag output errors
297 $self->node->$format;
303 Usage : $obj->tagformat($newval)
304 Function: Get/set the output tag format for this annotation.
305 Returns : value of tagformat
306 Args : newvalue (optional) - format for the data passed into value
307 must be of values 'xml', 'indent', 'sxpr', 'itext', 'perl'
311 my %IS_VALID_FORMAT = map { $_ => 1 } qw(xml indent sxpr itext);
314 my ( $self, $value ) = @_;
315 if ( defined $value ) {
316 $self->throw( "$value is not a valid format; valid format types:\n"
317 . join( ',', map { "'$_'" } keys %IS_VALID_FORMAT ) )
318 if !exists $IS_VALID_FORMAT{$value};
319 $self->{'tagformat'} = $value;
321 return $self->{'tagformat'};
328 Function: Get/set the topmost Data::Stag node used for this annotation.
329 Returns : Data::Stag node implementation
330 (default is Data::Stag::StagImpl)
331 Args : (optional) Data::Stag node implementation
332 (optional)'copy' => flag to create a copy of the node
337 my ( $self, $value, $copy ) = @_;
338 if ( defined $value && ref $value ) {
340 $value->isa('Data::Stag::StagI')
341 ?
( $copy && $copy eq 'copy' ?
$value->duplicate : $value )
342 : $value->isa('Bio::Annotation::TagTree') ?
( $copy
343 && $copy eq 'copy' ?
$value->node->duplicate : $value->node )
345 'Object must be Data::Stag::StagI or Bio::Annotation::TagTree');
348 # lazily create Data::Stag instance if not present
349 if (!$self->{'db'}) {
350 $self->{'db'} = Data
::Stag
->new();
352 return $self->{'db'};
355 =head2 Data::Stag convenience methods
357 Because Data::Stag uses blessed arrays and the core Bioperl class uses blessed
358 hashes, TagTree uses an internal instance of a Data::Stag node for data storage.
359 Therefore the following methods actually delegate to the Data:::Stag internal
362 For consistency (since one could recursively check child nodes), methods retain
363 the same names as Data::Stag. Also, no 'magic' (AUTOLOAD'ed) methods are
364 employed, simply b/c full-fledged Data::Stag functionality can be attained by
365 grabbing the Data::Stag instance using node().
371 Function: Returns the element name (key name) for this node
380 return $self->node->element;
387 Function: Returns the data structure (array ref) for this node
396 return $self->node->data;
403 Function: Get the top-level array of Data::Stag nodes or (if the top level is
404 a terminal node) a scalar value.
406 This is similar to StructuredValue's get_values() method, with the
407 key difference being instead of array refs and scalars you get either
408 Data::Stag nodes or the value for this particular node.
410 For consistency (since one could recursively check nodes),
411 we use the same method name as Data::Stag children().
420 return $self->node->children;
427 Function: Get the top-level array of Data::Stag nodes. Unlike children(),
428 this only returns an array of nodes (if this is a terminal node,
429 no value is returned)
431 Returns : an array of nodes
438 return $self->node->subnodes;
445 Function: Returns the nodes or value for the named element or path
447 Returns : returns array of nodes or a scalar (if node is terminal)
448 dependent on wantarray
454 my ( $self, @vals ) = @_;
455 return $self->node->get(@vals);
462 Function: Recursively searches for and returns the nodes or values for the
463 named element or path
465 Returns : returns array of nodes or scalars (for terminal nodes)
471 my ( $self, @vals ) = @_;
472 return $self->node->find(@vals);
479 Function: Recursively searches for and returns a list of nodes
480 of the given element path
482 Returns : returns array of nodes
488 my ( $self, @vals ) = @_;
489 return $self->node->findnode(@vals);
498 Returns : returns array of nodes or values
504 my ( $self, @vals ) = @_;
505 return $self->node->findval(@vals);
511 Usage : $struct->addchild(['name' => [['foo'=> 'bar1']]]);
512 Function: add new child node to the current node. One can pass in a node, TagTree,
513 or data structure; for instance, in the above, this would translate
521 Args : first arg = element name
522 all other args are added as tag-value pairs
527 my ( $self, @vals ) = @_;
529 # check for element tag first (if no element, must be empty Data::Stag node)
530 if ( !$self->element ) {
532 # try to do the right thing; if more than one element, wrap in array ref
533 @vals > 1 ?
$self->value( \
@vals ) : $self->value( $vals[0] );
536 elsif ( !$self->node->ntnodes ) {
538 # if this is a terminal node, can't add to it (use set?)
539 $self->throw("Can't add child to node; only terminal node is present!");
542 return $self->node->addchild(@vals);
549 Usage : $struct->add('foo', 'bar1', 'bar2', 'bar3');
550 Function: add tag-value nodes to the current node. In the above, this would
551 translate to (in XML):
556 Args : first arg = element name
557 all other args are added as tag-value pairs
562 my ( $self, @vals ) = @_;
564 # check for empty object and die for now
565 if ( !$self->node->element ) {
566 $self->throw("Can't add to terminal element!");
568 return $self->node->add(@vals);
574 Usage : $struct->set('foo','bar');
575 Function: sets a single tag-value pair in the current node. Note this
576 differs from add() in that this replaces any data already present
578 Args : first arg = element name
579 all other args are added as tag-value pairs
584 my ( $self, @vals ) = @_;
586 # check for empty object
587 if ( !$self->node->element ) {
588 $self->throw("Can't add to tree; empty tree!");
590 return $self->node->set(@vals);
596 Usage : $struct->unset('foo');
597 Function: unsets all key-value pairs of the passed element from the
605 my ( $self, @vals ) = @_;
606 return $self->node->unset(@vals);
612 Usage : $struct->free
613 Function: removes all data from the current node
621 return $self->node->free;
627 Usage : $struct->hash;
628 Function: turns the tag-value tree into a hash, all data values are array refs
630 Args : first arg = element name
631 all other args are added as tag-value pairs
637 return $self->node->hash;
643 Usage : $struct->pairs;
644 Function: turns the tag-value tree into a hash, all data values are scalar
646 Args : first arg = element name
647 all other args are added as tag-value pairs, note that duplicates
654 return $self->node->pairs;
660 Usage : @persons = $s->qmatch('person', ('name'=>'fred'));
661 Function : returns all elements in the node tree which match the
662 element name and the key-value pair
663 Returns : Array of nodes
664 Args : return-element str, match-element str, match-value str
669 my ( $self, @vals ) = @_;
670 return $self->node->qmatch(@vals);
676 Usage : @termini = $s->tnodes;
677 Function : returns all terminal nodes below this node
678 Returns : Array of nodes
679 Args : return-element str, match-element str, match-value str
685 return $self->node->tnodes;
691 Usage : @termini = $s->ntnodes;
692 Function : returns all nonterminal nodes below this node
693 Returns : Array of nodes
694 Args : return-element str, match-element str, match-value str
700 return $self->node->ntnodes;
703 =head2 StructureValue-like methods
707 =head2 get_all_values
709 Title : get_all_values
710 Usage : @termini = $s->get_all_values;
711 Function : returns all terminal node values
712 Returns : Array of values
713 Args : return-element str, match-element str, match-value str
715 This is meant to emulate the values one would get from StructureValue's
716 get_all_values() method. Note, however, using this method dissociates the
717 tag-value relationship (i.e. you only get the value list, no elements)
723 my @kids = $self->children;
725 while ( my $val = shift @kids ) {
726 ( ref $val ) ?
push @kids, $val->children : push @vals, $val;