remove comment
[bioperl-live.git] / Bio / Annotation / TagTree.pm
blobbc4b4e665492497882431fbbcf4c62b8f9646268
1 # $Id: TagTree.pm 11693 2007-09-17 20:54:04Z cjfields $
3 # BioPerl module for Bio::Annotation::TagTree
5 # Cared for Chris Fields
7 # You may distribute this module under the same terms as perl itself.
8 # Refer to the Perl Artistic License (see the license accompanying this
9 # software package, or see http://www.perl.com/language/misc/Artistic.html)
10 # for the terms under which you may use, modify, and redistribute this module.
12 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
13 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
14 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16 # POD documentation - main docs before the code
18 =head1 NAME
20 Bio::Annotation::TagTree - AnnotationI with tree-like hierarchal key-value
21 relationships ('structured tags') that can be represented as simple text.
23 =head1 SYNOPSIS
25 use Bio::Annotation::TagTree;
26 use Bio::Annotation::Collection;
28 my $col = Bio::Annotation::Collection->new();
30 # data structure can be an array reference with a data structure
31 # corresponding to that defined by Data::Stag:
33 my $sv = Bio::Annotation::TagTree->new(-tagname => 'mytag1',
34 -value => $data_structure);
35 $col->add_Annotation($sv);
37 # regular text passed is parsed based on the tagformat().
38 my $sv2 = Bio::Annotation::TagTree->new(-tagname => 'mytag2',
39 -tagformat => 'xml',
40 -value => $xmltext);
41 $col->add_Annotation($sv2);
43 =head1 DESCRIPTION
45 This takes tagged data values and stores them in a hierarchal structured
46 element-value hierarchy (complements of Chris Mungall's Data::Stag module). Data
47 can then be represented as text using a variety of output formats (indention,
48 itext, xml, spxr). Furthermore, the data structure can be queried using various
49 means. See L<Data::Stag> for details.
51 Data passed in using value() or the '-value' parameter upon instantiation
52 can either be:
54 1) an array reference corresponding to the data structure for Data::Stag;
56 2) a text string in 'xml', 'itext', 'spxr', or 'indent' format. The default
57 format is 'xml'; this can be changed using tagformat() prior to using value() or
58 by passing in the proper format using '-tagformat' upon instantiation;
60 3) another Bio::Annotation::TagTree or Data::Stag node instance. In both cases
61 a deep copy (duplicate) of the instance is generated.
63 Beyond checking for an array reference no format guessing occurs (so, for
64 roundtrip tests ensure that the IO formats correspond). For now, we recommend
65 when using text input to set tagformat() to one of these formats prior to data
66 loading to ensure the proper Data::Stag parser is selected. After data loading,
67 the tagformat() can be changed to change the text string format returned by
68 value(). (this may be rectified in the future)
70 This Annotation type is fully BioSQL compatible and could be considered a
71 temporary replacement for nested Bio::Annotation::Collections, at least until
72 BioSQL and bioperl-db can support nested annotation collections.
74 =head1 FEEDBACK
76 =head2 Mailing Lists
78 User feedback is an integral part of the evolution of this and other
79 Bioperl modules. Send your comments and suggestions preferably to one
80 of the Bioperl mailing lists. Your participation is much appreciated.
82 bioperl-l@bioperl.org - General discussion
83 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
85 =head2 Support
87 Please direct usage questions or support issues to the mailing list:
89 I<bioperl-l@bioperl.org>
91 rather than to the module maintainer directly. Many experienced and
92 reponsive experts will be able look at the problem and quickly
93 address it. Please include a thorough description of the problem
94 with code and data examples if at all possible.
96 =head2 Reporting Bugs
98 Report bugs to the Bioperl bug tracking system to help us keep track
99 the bugs and their resolution. Bug reports can be submitted via
100 or the web:
102 https://redmine.open-bio.org/projects/bioperl/
104 =head1 AUTHOR
106 Chris Fields
108 =head1 APPENDIX
110 The rest of the documentation details each of the object methods. Internal
111 methods are usually preceded with a _
113 =cut
115 # Let the code begin...
117 package Bio::Annotation::TagTree;
118 use strict;
120 # Object preamble - inherits from Bio::Root::Root
122 use base qw(Bio::Annotation::SimpleValue);
123 use Data::Stag;
125 =head2 new
127 Title : new
128 Usage : my $sv = Bio::Annotation::TagTree->new();
129 Function: Instantiate a new TagTree object
130 Returns : Bio::Annotation::TagTree object
131 Args : -value => $value to initialize the object data field [optional]
132 -tagname => $tag to initialize the tagname [optional]
133 -tagformat => format for output [optional]
134 (types 'xml', 'itext', 'sxpr', 'indent', default = 'itext')
135 -node => Data::Stag node or Bio::Annotation::TagTree instance
137 =cut
139 sub new {
140 my ( $class, @args ) = @_;
141 my $self = $class->SUPER::new();
142 my ( $node, $value, $tag, $format, $verbose ) = $self->_rearrange(
145 NODE
146 VALUE
147 TAGNAME
148 TAGFORMAT
149 VERBOSE)
151 @args
153 $self->throw("Cant use both node and value; mutually exclusive")
154 if defined $node && defined $value;
155 defined $tag && $self->tagname($tag);
156 $format ||= 'itext';
157 $self->tagformat($format);
158 defined $value && $self->value($value);
159 defined $node && $self->node($node);
160 defined $verbose && $self->verbose($verbose);
161 return $self;
164 =head1 AnnotationI implementing functions
166 =cut
168 =head2 as_text
170 Title : as_text
171 Usage : my $text = $obj->as_text
172 Function: return the string "Value: $v" where $v is the value
173 Returns : string
174 Args : none
176 =cut
178 sub as_text {
179 my ($self) = @_;
180 return "TagTree: " . $self->value;
183 =head2 display_text
185 Title : display_text
186 Usage : my $str = $ann->display_text();
187 Function: returns a string. Unlike as_text(), this method returns a string
188 formatted as would be expected for the specific implementation.
190 One can pass a callback as an argument which allows custom text
191 generation; the callback is passed the current instance and any text
192 returned
193 Example :
194 Returns : a string
195 Args : [optional] callback
197 =cut
200 my $DEFAULT_CB = sub { $_[0]->value || '' };
202 sub display_text {
203 my ( $self, $cb ) = @_;
204 $cb ||= $DEFAULT_CB;
205 $self->throw("Callback must be a code reference") if ref $cb ne 'CODE';
206 return $cb->($self);
211 =head2 hash_tree
213 Title : hash_tree
214 Usage : my $hashtree = $value->hash_tree
215 Function: For supporting the AnnotationI interface just returns the value
216 as a hashref with the key 'value' pointing to the value
217 Maybe reimplement using Data::Stag::hash()?
218 Returns : hashrf
219 Args : none
221 =cut
223 sub hash_tree {
224 my ($self) = @_;
225 my $h = {};
226 $h->{'value'} = $self->value;
229 =head2 tagname
231 Title : tagname
232 Usage : $obj->tagname($newval)
233 Function: Get/set the tagname for this annotation value.
235 Setting this is optional. If set, it obviates the need to provide
236 a tag to AnnotationCollection when adding this object.
237 Example :
238 Returns : value of tagname (a scalar)
239 Args : new value (a scalar, optional)
241 =cut
243 sub tagname {
244 my ( $self, $value ) = @_;
245 if ( defined $value ) {
246 $self->{'tagname'} = $value;
248 return $self->{'tagname'};
251 =head1 Specific accessors for TagTree
253 =cut
255 =head2 value
257 Title : value
258 Usage : $obj->value($newval)
259 Function: Get/set the value for this annotation.
260 Returns : value of value
261 Args : newvalue (optional)
263 =cut
265 sub value {
266 my ( $self, $value ) = @_;
268 # set mode? This resets the entire tagged database
269 my $format = $self->tagformat;
270 if ($value) {
271 if ( ref $value ) {
272 if ( ref $value eq 'ARRAY' ) {
274 # note the tagname() is not used here; it is only used for
275 # storing this AnnotationI in the annotation collection
276 eval { $self->{db} = Data::Stag->nodify($value) };
278 else {
280 # assuming this is blessed; passing on to node() and copy
281 $self->node( $value, 'copy' );
284 else {
286 # not trying to guess here for now; we go by the tagformat() setting
287 my $h = Data::Stag->getformathandler($format);
288 eval { $self->{db} = Data::Stag->from( $format . 'str', $value ) };
290 $self->throw("Data::Stag error:\n$@") if $@;
293 # get mode?
294 # How do we return a data structure?
295 # for now, we use the output (if there is a Data::Stag node present)
296 # may need to run an eval {} to catch Data::Stag output errors
297 $self->node->$format;
300 =head2 tagformat
302 Title : tagformat
303 Usage : $obj->tagformat($newval)
304 Function: Get/set the output tag format for this annotation.
305 Returns : value of tagformat
306 Args : newvalue (optional) - format for the data passed into value
307 must be of values 'xml', 'indent', 'sxpr', 'itext', 'perl'
309 =cut
311 my %IS_VALID_FORMAT = map { $_ => 1 } qw(xml indent sxpr itext);
313 sub tagformat {
314 my ( $self, $value ) = @_;
315 if ( defined $value ) {
316 $self->throw( "$value is not a valid format; valid format types:\n"
317 . join( ',', map { "'$_'" } keys %IS_VALID_FORMAT ) )
318 if !exists $IS_VALID_FORMAT{$value};
319 $self->{'tagformat'} = $value;
321 return $self->{'tagformat'};
324 =head2 node
326 Title : node
327 Usage : $obj->node()
328 Function: Get/set the topmost Data::Stag node used for this annotation.
329 Returns : Data::Stag node implementation
330 (default is Data::Stag::StagImpl)
331 Args : (optional) Data::Stag node implementation
332 (optional)'copy' => flag to create a copy of the node
334 =cut
336 sub node {
337 my ( $self, $value, $copy ) = @_;
338 if ( defined $value && ref $value ) {
339 $self->{'db'} =
340 $value->isa('Data::Stag::StagI')
341 ? ( $copy && $copy eq 'copy' ? $value->duplicate : $value )
342 : $value->isa('Bio::Annotation::TagTree') ? ( $copy
343 && $copy eq 'copy' ? $value->node->duplicate : $value->node )
344 : $self->throw(
345 'Object must be Data::Stag::StagI or Bio::Annotation::TagTree');
348 # lazily create Data::Stag instance if not present
349 if (!$self->{'db'}) {
350 $self->{'db'} = Data::Stag->new();
352 return $self->{'db'};
355 =head2 Data::Stag convenience methods
357 Because Data::Stag uses blessed arrays and the core Bioperl class uses blessed
358 hashes, TagTree uses an internal instance of a Data::Stag node for data storage.
359 Therefore the following methods actually delegate to the Data:::Stag internal
360 instance.
362 For consistency (since one could recursively check child nodes), methods retain
363 the same names as Data::Stag. Also, no 'magic' (AUTOLOAD'ed) methods are
364 employed, simply b/c full-fledged Data::Stag functionality can be attained by
365 grabbing the Data::Stag instance using node().
367 =head2 element
369 Title : element
370 Usage :
371 Function: Returns the element name (key name) for this node
372 Example :
373 Returns : scalar
374 Args : none
376 =cut
378 sub element {
379 my $self = shift;
380 return $self->node->element;
383 =head2 data
385 Title : data
386 Usage :
387 Function: Returns the data structure (array ref) for this node
388 Example :
389 Returns : array ref
390 Args : none
392 =cut
394 sub data {
395 my $self = shift;
396 return $self->node->data;
399 =head2 children
401 Title : children
402 Usage :
403 Function: Get the top-level array of Data::Stag nodes or (if the top level is
404 a terminal node) a scalar value.
406 This is similar to StructuredValue's get_values() method, with the
407 key difference being instead of array refs and scalars you get either
408 Data::Stag nodes or the value for this particular node.
410 For consistency (since one could recursively check nodes),
411 we use the same method name as Data::Stag children().
412 Example :
413 Returns : an array
414 Args : none
416 =cut
418 sub children {
419 my $self = shift;
420 return $self->node->children;
423 =head2 subnodes
425 Title : subnodes
426 Usage :
427 Function: Get the top-level array of Data::Stag nodes. Unlike children(),
428 this only returns an array of nodes (if this is a terminal node,
429 no value is returned)
430 Example :
431 Returns : an array of nodes
432 Args : none
434 =cut
436 sub subnodes {
437 my $self = shift;
438 return $self->node->subnodes;
441 =head2 get
443 Title : get
444 Usage :
445 Function: Returns the nodes or value for the named element or path
446 Example :
447 Returns : returns array of nodes or a scalar (if node is terminal)
448 dependent on wantarray
449 Args : none
451 =cut
453 sub get {
454 my ( $self, @vals ) = @_;
455 return $self->node->get(@vals);
458 =head2 find
460 Title : find
461 Usage :
462 Function: Recursively searches for and returns the nodes or values for the
463 named element or path
464 Example :
465 Returns : returns array of nodes or scalars (for terminal nodes)
466 Args : none
468 =cut
470 sub find {
471 my ( $self, @vals ) = @_;
472 return $self->node->find(@vals);
475 =head2 findnode
477 Title : findnode
478 Usage :
479 Function: Recursively searches for and returns a list of nodes
480 of the given element path
481 Example :
482 Returns : returns array of nodes
483 Args : none
485 =cut
487 sub findnode {
488 my ( $self, @vals ) = @_;
489 return $self->node->findnode(@vals);
492 =head2 findval
494 Title : findval
495 Usage :
496 Function:
497 Example :
498 Returns : returns array of nodes or values
499 Args : none
501 =cut
503 sub findval {
504 my ( $self, @vals ) = @_;
505 return $self->node->findval(@vals);
508 =head2 addchild
510 Title : addchild
511 Usage : $struct->addchild(['name' => [['foo'=> 'bar1']]]);
512 Function: add new child node to the current node. One can pass in a node, TagTree,
513 or data structure; for instance, in the above, this would translate
514 to (in XML):
516 <name>
517 <foo>bar1</foo>
518 </name>
520 Returns : node
521 Args : first arg = element name
522 all other args are added as tag-value pairs
524 =cut
526 sub addchild {
527 my ( $self, @vals ) = @_;
529 # check for element tag first (if no element, must be empty Data::Stag node)
530 if ( !$self->element ) {
532 # try to do the right thing; if more than one element, wrap in array ref
533 @vals > 1 ? $self->value( \@vals ) : $self->value( $vals[0] );
534 return $self->{db};
536 elsif ( !$self->node->ntnodes ) {
538 # if this is a terminal node, can't add to it (use set?)
539 $self->throw("Can't add child to node; only terminal node is present!");
541 else {
542 return $self->node->addchild(@vals);
546 =head2 add
548 Title : add
549 Usage : $struct->add('foo', 'bar1', 'bar2', 'bar3');
550 Function: add tag-value nodes to the current node. In the above, this would
551 translate to (in XML):
552 <foo>bar1</foo>
553 <foo>bar2</foo>
554 <foo>bar3</foo>
555 Returns :
556 Args : first arg = element name
557 all other args are added as tag-value pairs
559 =cut
561 sub add {
562 my ( $self, @vals ) = @_;
564 # check for empty object and die for now
565 if ( !$self->node->element ) {
566 $self->throw("Can't add to terminal element!");
568 return $self->node->add(@vals);
571 =head2 set
573 Title : set
574 Usage : $struct->set('foo','bar');
575 Function: sets a single tag-value pair in the current node. Note this
576 differs from add() in that this replaces any data already present
577 Returns : node
578 Args : first arg = element name
579 all other args are added as tag-value pairs
581 =cut
583 sub set {
584 my ( $self, @vals ) = @_;
586 # check for empty object
587 if ( !$self->node->element ) {
588 $self->throw("Can't add to tree; empty tree!");
590 return $self->node->set(@vals);
593 =head2 unset
595 Title : unset
596 Usage : $struct->unset('foo');
597 Function: unsets all key-value pairs of the passed element from the
598 current node
599 Returns : node
600 Args : element name
602 =cut
604 sub unset {
605 my ( $self, @vals ) = @_;
606 return $self->node->unset(@vals);
609 =head2 free
611 Title : free
612 Usage : $struct->free
613 Function: removes all data from the current node
614 Returns :
615 Args :
617 =cut
619 sub free {
620 my ($self) = @_;
621 return $self->node->free;
624 =head2 hash
626 Title : hash
627 Usage : $struct->hash;
628 Function: turns the tag-value tree into a hash, all data values are array refs
629 Returns : hash
630 Args : first arg = element name
631 all other args are added as tag-value pairs
633 =cut
635 sub hash {
636 my ($self) = @_;
637 return $self->node->hash;
640 =head2 pairs
642 Title : pairs
643 Usage : $struct->pairs;
644 Function: turns the tag-value tree into a hash, all data values are scalar
645 Returns : hash
646 Args : first arg = element name
647 all other args are added as tag-value pairs, note that duplicates
648 will be lost
650 =cut
652 sub pairs {
653 my ($self) = @_;
654 return $self->node->pairs;
657 =head2 qmatch
659 Title : qmatch
660 Usage : @persons = $s->qmatch('person', ('name'=>'fred'));
661 Function : returns all elements in the node tree which match the
662 element name and the key-value pair
663 Returns : Array of nodes
664 Args : return-element str, match-element str, match-value str
666 =cut
668 sub qmatch {
669 my ( $self, @vals ) = @_;
670 return $self->node->qmatch(@vals);
673 =head2 tnodes
675 Title : tnodes
676 Usage : @termini = $s->tnodes;
677 Function : returns all terminal nodes below this node
678 Returns : Array of nodes
679 Args : return-element str, match-element str, match-value str
681 =cut
683 sub tnodes {
684 my ($self) = @_;
685 return $self->node->tnodes;
688 =head2 ntnodes
690 Title : ntnodes
691 Usage : @termini = $s->ntnodes;
692 Function : returns all nonterminal nodes below this node
693 Returns : Array of nodes
694 Args : return-element str, match-element str, match-value str
696 =cut
698 sub ntnodes {
699 my ($self) = @_;
700 return $self->node->ntnodes;
703 =head2 StructureValue-like methods
705 =cut
707 =head2 get_all_values
709 Title : get_all_values
710 Usage : @termini = $s->get_all_values;
711 Function : returns all terminal node values
712 Returns : Array of values
713 Args : return-element str, match-element str, match-value str
715 This is meant to emulate the values one would get from StructureValue's
716 get_all_values() method. Note, however, using this method dissociates the
717 tag-value relationship (i.e. you only get the value list, no elements)
719 =cut
721 sub get_all_values {
722 my $self = shift;
723 my @kids = $self->children;
724 my @vals;
725 while ( my $val = shift @kids ) {
726 ( ref $val ) ? push @kids, $val->children : push @vals, $val;
728 return @vals;