1 # some of the following code was pillaged from the CPAN module
4 # Copyright (C) 1999 Ken MacLeod
5 # XML::Handler::XMLWriter is free software; you can redistribute it and/or
6 # modify it under the same terms as Perl itself.
9 # BioPerl module for Bio::SeqIO::game::gameSubs
11 # Please direct questions and support issues to <bioperl-l@bioperl.org>
13 # Cared for by Sheldon McKay <mckays@cshl.edu>
15 # You may distribute this module under the same terms as perl itself
18 # POD documentation - main docs before the code
22 Bio::SeqIO::game::gameSubs -- a base class for game-XML parsing
30 A bag of tricks for game-XML parsing. The PerlSAX handler methods were
31 stolen from Chris Mungall's XML base class, which he stole from Ken MacLeod's
38 User feedback is an integral part of the evolution of this
39 and other Bioperl modules. Send your comments and suggestions preferably
40 to one of the Bioperl mailing lists.
42 Your participation is much appreciated.
44 bioperl-l@bioperl.org - General discussion
45 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
49 Please direct usage questions or support issues to the mailing list:
51 I<bioperl-l@bioperl.org>
53 rather than to the module maintainer directly. Many experienced and
54 reponsive experts will be able look at the problem and quickly
55 address it. Please include a thorough description of the problem
56 with code and data examples if at all possible.
60 Report bugs to the Bioperl bug tracking system to help us keep track
61 of the bugs and their resolution. Bug reports can be submitted via the
64 https://github.com/bioperl/bioperl-live/issues
66 =head1 AUTHOR - Sheldon McKay
72 The rest of the documentation details each of the object
73 methods. Internal methods are usually preceded with a _
77 package Bio
::SeqIO
::game
::gameSubs
;
78 use XML
::Parser
::PerlSAX
;
83 use base
qw(Bio::Root::Root);
89 Usage : not used directly
90 Returns : a gameHandler object
91 Args : an XML filename
97 my $file = shift || "";
98 my $self = (@_ == 1) ?
{ %{ (shift) } } : { @_ };
100 $self->{file
} = $file;
103 return bless $self, $type;
110 Usage : not used directly
111 Function: starts PerlSAX XML parsing
117 XML
::Parser
::PerlSAX
->new->parse(Source
=> { SystemId
=> "$self->{file}" },
121 =head2 start_document
123 Title : start_document
124 Usage : not used directly
129 my ($self, $document) = @_;
140 Usage : not used directly
145 my ($self, $document) = @_;
147 delete $self->{Names
};
148 delete $self->{Nodes
};
155 Title : start_element
156 Usage : not used directly
161 my ($self, $element) = @_;
163 $element->{Children
} = [];
165 $element->{Name
} =~ tr/A-Z/a-z/;
166 push @
{$self->{Names
}}, $element->{Name
};
167 push @
{$self->{Nodes
}}, $element;
169 my $el_name = "s_" . $element->{Name
};
170 $el_name =~ s/[^a-zA-Z0-9_]/_/g;
171 if ($ENV{DEBUG_XML_SUBS
}) {
172 print STDERR
"xml_subs:$el_name\n";
174 if ($self->can($el_name)) {
175 $self->$el_name($element);
185 Usage : not used directly
190 my ($self, $element) = @_;
194 $element->{Name
} =~ tr/A-Z/a-z/;
196 my $el_name = "e_" . $element->{Name
};
197 $el_name =~ s/[^a-zA-Z0-9_]/_/g;
200 if ($ENV{DEBUG_XML_SUBS
}) {
201 print STDERR
"xml_subs:$el_name\n";
203 if ($self->can($ {el_name
})) {
204 $rval = $self->$el_name($element) || 0;
207 my $curr_element = $self->{Nodes
}->[$#{$self->{Nodes}}];
209 pop @
{$self->{Names
}};
210 pop @
{$self->{Nodes
}};
212 if ($rval eq -1 || !$called_sub) {
213 if (@
{$self->{Nodes
}}) {
214 my $parent = $self->{Nodes
}->[$#{$self->{Nodes}}];
215 push(@
{$parent->{Children
}}, $curr_element);
216 $parent->{"_".$curr_element->{Name
}} = $curr_element;
228 Usage : not used directly
233 my ($self, $characters) = @_;
235 my $str = $self->strip_characters($characters->{Data
});
236 my $curr_element = $self->curr_element();
237 $curr_element->{Characters
} .= $str;
241 =head2 strip_characters
243 Title : strip_characters
244 Usage : not used directly
245 Function: cleans up XML element contents
249 sub strip_characters
{
250 my ($self, $str) = @_;
251 $str =~ s/^[ \n\t]* *//g;
252 $str =~ s/ *[\n\t]*$//g;
259 Usage : not used directly
260 Function: returns the currently open element
266 return $self->{Nodes
}->[-1];
272 Usage : $self->flush($element) # or $element->flush
273 Function: prune a branch from the XML tree
274 Returns : true if successful
275 Args : an element object (optional)
281 my $victim = shift || $self->curr_element;
286 # throw a non-fatal warning
291 Usage : $self->complain("This is terrible; I am not happy")
292 Function: throw a non-fatal warning, formats message for pretty-printing
294 Args : a list of strings
300 return 0 unless $self->{verbose
};
301 my $msg = join '', @_;
303 my @msg = split /\s+/, $msg;
307 my ($last_chunk) = $new_msg =~ /\n?(.+)$/;
308 my $l = $last_chunk ?
length $last_chunk : 0;
309 if ( (length $_) + $l > 45 ) {
313 $new_msg .= $_ . ' ';
317 $self->warn($new_msg);
323 Usage : $self->db_xref($el, $tags)
324 Function: an internal method to flatten dbxref elements
325 Returns : the db_xref (eg wormbase:C02D5.1)
326 Args : an element object (reqd) and a hash ref of tag/values (optional)
331 my ($self, $el, $tags) = @_;
332 $tags ||= $self->{curr_tags
};
333 my $db = $el->{_xref_db
}->{Characters
};
334 my $acc = $el->{_unique_id
} ||
335 $el->{_db_xref_id
} ||
337 my $id = $acc->{Characters
} or return 0;
340 # capture both the database and accession number
341 $id= $id =~ /^\w+$/ ?
"$db:$id" : $id;
342 $tags->{dbxref
} ||= [];
343 push @
{$tags->{dbxref
}}, $id;
351 Usage : $self->comment($comment_element)
352 Function: a method to flatten comment elements
354 Args : an comment element (reqd) and a hash ref of tag/values (optional)
355 Note : The hope here is that we can unflatten structured comments
356 in game-derived annotations happen to make a return trip
361 my ($self, $el, $tags) = @_;
363 $tags ||= $self->{curr_tags
};
364 my $text = $el->{_text
}->{Characters
};
365 my $pers = $el->{_person
}->{Characters
};
366 my $date = $el->{_date
}->{Characters
};
367 my $int = $el->{_internal
}->{Characters
};
370 my $comment = "person=$pers; " if $pers;
371 $comment .= "date=$date; " if $date;
372 $comment .= "internal=$int; " if $int;
373 $comment .= "text=$text" if $text;
375 $tags->{comment
} ||= [];
376 push @
{$tags->{comment
}}, $comment;
383 Usage : $self->property($property_element)
384 Function: an internal method to flatten property elements
385 Returns : a hash reference
386 Args : an property/output element (reqd) and a hash ref of tag/values (optional)
387 Note: This method is aliased to 'output' to handle structurally identical output elements
391 *output
= \
&property
;
393 my ($self, $el, $tags) = @_;
395 $tags ||= $self->{curr_tags
};
396 my $key = $el->{_type
}->{Characters
};
397 my $value = $el->{_value
}->{Characters
};
400 $tags->{$key} ||= [];
401 push @
{$tags->{$key}}, $value;
408 Usage : $self->evidence($evidence_element)
409 Function: a method to flatten evidence elements
411 Args : an evidence element
416 my ($self, $el) = @_;
417 my $tags = $self->{curr_tags
};
418 my $text = $el->{Characters
} or return 0;
419 my $type = $el->{Attributes
}->{type
};
420 my $res = $el->{Attributes
}->{result
};
423 my $evidence = "type=$type; " if $type;
424 $evidence .= "result=$res; " if $res;
425 $evidence .= "evidence=$text";
427 $tags->{evidence
}||= [];
428 push @
{$tags->{evidence
}}, $evidence;
435 Usage : $self->date($date_element)
436 Function: a method to flatten date elements
437 Returns : true if successful
438 Args : a date element
443 my ($self, $el) = @_;
444 my $tags = $self->{curr_tags
};
445 my $date = $el->{Characters
} or return 0;
446 my $stamp = $el->{Attributes
}->{timestamp
};
449 $tags->{date
} ||= [];
450 push @
{$tags->{date
}}, $date;
451 $tags->{timestamp
} ||= [];
452 push @
{$tags->{timestamp
}}, $stamp;
460 Usage : $pid = $self->protein_id($cds, $standard_name)
461 Function: a method to search for a protein name
463 Args : the CDS object plus the transcript\'s 'standard_name'
468 my ($self, $cds, $sn) = @_;
470 if ( $cds->has_tag('protein_id') ) {
471 ($psn) = $cds->get_tag_values('protein_id');
473 elsif ( $cds->has_tag('product') ) {
474 ($psn) = $cds->get_tag_values('product');
475 $psn =~ s/.+?(\S+)$/$1/;
477 elsif ( $cds->has_tag('gene') ) {
478 ($psn) = $cds->get_tag_values('gene');
484 $self->complain("Could not find an ID for the protein");