t/AlignIO/AlignIO.t: fix number of tests in plan (fixup c523e6bed866)
[bioperl-live.git] / Bio / SeqIO / game / gameSubs.pm
blob198ff728db2db2efd0b38041198797adc607010d
1 # some of the following code was pillaged from the CPAN module
2 # XML::Handler::Subs
4 # Copyright (C) 1999 Ken MacLeod
5 # XML::Handler::XMLWriter is free software; you can redistribute it and/or
6 # modify it under the same terms as Perl itself.
9 # BioPerl module for Bio::SeqIO::game::gameSubs
11 # Please direct questions and support issues to <bioperl-l@bioperl.org>
13 # Cared for by Sheldon McKay <mckays@cshl.edu>
15 # You may distribute this module under the same terms as perl itself
18 # POD documentation - main docs before the code
20 =head1 NAME
22 Bio::SeqIO::game::gameSubs -- a base class for game-XML parsing
24 =head1 SYNOPSIS
26 Not used directly
28 =head1 DESCRIPTION
30 A bag of tricks for game-XML parsing. The PerlSAX handler methods were
31 stolen from Chris Mungall's XML base class, which he stole from Ken MacLeod's
32 XML::Handler::Subs
34 =head1 FEEDBACK
36 =head2 Mailing Lists
38 User feedback is an integral part of the evolution of this
39 and other Bioperl modules. Send your comments and suggestions preferably
40 to one of the Bioperl mailing lists.
42 Your participation is much appreciated.
44 bioperl-l@bioperl.org - General discussion
45 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
47 =head2 Support
49 Please direct usage questions or support issues to the mailing list:
51 I<bioperl-l@bioperl.org>
53 rather than to the module maintainer directly. Many experienced and
54 reponsive experts will be able look at the problem and quickly
55 address it. Please include a thorough description of the problem
56 with code and data examples if at all possible.
58 =head2 Reporting Bugs
60 Report bugs to the Bioperl bug tracking system to help us keep track
61 of the bugs and their resolution. Bug reports can be submitted via the
62 web:
64 https://github.com/bioperl/bioperl-live/issues
66 =head1 AUTHOR - Sheldon McKay
68 Email mckays@cshl.edu
70 =head1 APPENDIX
72 The rest of the documentation details each of the object
73 methods. Internal methods are usually preceded with a _
75 =cut
77 package Bio::SeqIO::game::gameSubs;
78 use XML::Parser::PerlSAX;
79 use UNIVERSAL;
80 use strict;
82 use vars qw {};
83 use base qw(Bio::Root::Root);
86 =head2 new
88 Title : new
89 Usage : not used directly
90 Returns : a gameHandler object
91 Args : an XML filename
93 =cut
95 sub new {
96 my $type = shift;
97 my $file = shift || "";
98 my $self = (@_ == 1) ? { %{ (shift) } } : { @_ };
99 if ($file) {
100 $self->{file} = $file;
103 return bless $self, $type;
107 =head2 go
109 Title : go
110 Usage : not used directly
111 Function: starts PerlSAX XML parsing
113 =cut
115 sub go {
116 my $self = shift;
117 XML::Parser::PerlSAX->new->parse(Source => { SystemId => "$self->{file}" },
118 Handler => $self);
121 =head2 start_document
123 Title : start_document
124 Usage : not used directly
126 =cut
128 sub start_document {
129 my ($self, $document) = @_;
131 $self->{Names} = [];
132 $self->{Nodes} = [];
137 =head2 end_document
139 Title : end_document
140 Usage : not used directly
142 =cut
144 sub end_document {
145 my ($self, $document) = @_;
147 delete $self->{Names};
148 delete $self->{Nodes};
150 return();
153 =head2 start_element
155 Title : start_element
156 Usage : not used directly
158 =cut
160 sub start_element {
161 my ($self, $element) = @_;
163 $element->{Children} = [];
165 $element->{Name} =~ tr/A-Z/a-z/;
166 push @{$self->{Names}}, $element->{Name};
167 push @{$self->{Nodes}}, $element;
169 my $el_name = "s_" . $element->{Name};
170 $el_name =~ s/[^a-zA-Z0-9_]/_/g;
171 if ($ENV{DEBUG_XML_SUBS}) {
172 print STDERR "xml_subs:$el_name\n";
174 if ($self->can($el_name)) {
175 $self->$el_name($element);
176 return 1;
179 return 0;
182 =head2 end_element
184 Title : end_element
185 Usage : not used directly
187 =cut
189 sub end_element {
190 my ($self, $element) = @_;
192 my $called_sub = 0;
194 $element->{Name} =~ tr/A-Z/a-z/;
196 my $el_name = "e_" . $element->{Name};
197 $el_name =~ s/[^a-zA-Z0-9_]/_/g;
199 my $rval = 0;
200 if ($ENV{DEBUG_XML_SUBS}) {
201 print STDERR "xml_subs:$el_name\n";
203 if ($self->can($ {el_name})) {
204 $rval = $self->$el_name($element) || 0;
205 $called_sub = 1;
207 my $curr_element = $self->{Nodes}->[$#{$self->{Nodes}}];
209 pop @{$self->{Names}};
210 pop @{$self->{Nodes}};
212 if ($rval eq -1 || !$called_sub) {
213 if (@{$self->{Nodes}}) {
214 my $parent = $self->{Nodes}->[$#{$self->{Nodes}}];
215 push(@{$parent->{Children}}, $curr_element);
216 $parent->{"_".$curr_element->{Name}} = $curr_element;
220 return $called_sub;
225 =head2 characters
227 Title : characters
228 Usage : not used directly
230 =cut
232 sub characters {
233 my ($self, $characters) = @_;
235 my $str = $self->strip_characters($characters->{Data});
236 my $curr_element = $self->curr_element();
237 $curr_element->{Characters} .= $str;
241 =head2 strip_characters
243 Title : strip_characters
244 Usage : not used directly
245 Function: cleans up XML element contents
247 =cut
249 sub strip_characters {
250 my ($self, $str) = @_;
251 $str =~ s/^[ \n\t]* *//g;
252 $str =~ s/ *[\n\t]*$//g;
253 $str;
256 =head2 curr_element
258 Title : curr_element
259 Usage : not used directly
260 Function: returns the currently open element
262 =cut
264 sub curr_element {
265 my $self = shift;
266 return $self->{Nodes}->[-1];
269 =head2 flush
271 Title : flush
272 Usage : $self->flush($element) # or $element->flush
273 Function: prune a branch from the XML tree
274 Returns : true if successful
275 Args : an element object (optional)
277 =cut
279 sub flush {
280 my $self = shift;
281 my $victim = shift || $self->curr_element;
282 $victim = {};
283 return 1;
286 # throw a non-fatal warning
288 =head2 complain
290 Title : complain
291 Usage : $self->complain("This is terrible; I am not happy")
292 Function: throw a non-fatal warning, formats message for pretty-printing
293 Returns : nothing
294 Args : a list of strings
296 =cut
298 sub complain {
299 my $self = shift;
300 return 0 unless $self->{verbose};
301 my $msg = join '', @_;
302 $msg =~ s/\n/ /g;
303 my @msg = split /\s+/, $msg;
304 my $new_msg = '';
306 for ( @msg ) {
307 my ($last_chunk) = $new_msg =~ /\n?(.+)$/;
308 my $l = $last_chunk ? length $last_chunk : 0;
309 if ( (length $_) + $l > 45 ) {
310 $new_msg .= "\n$_ ";
312 else {
313 $new_msg .= $_ . ' ';
317 $self->warn($new_msg);
320 =head2 dbxref
322 Title : dbxref
323 Usage : $self->db_xref($el, $tags)
324 Function: an internal method to flatten dbxref elements
325 Returns : the db_xref (eg wormbase:C02D5.1)
326 Args : an element object (reqd) and a hash ref of tag/values (optional)
328 =cut
330 sub dbxref {
331 my ($self, $el, $tags) = @_;
332 $tags ||= $self->{curr_tags};
333 my $db = $el->{_xref_db}->{Characters};
334 my $acc = $el->{_unique_id} ||
335 $el->{_db_xref_id} ||
336 $el->{_xref_db_id};
337 my $id = $acc->{Characters} or return 0;
338 $self->flush( $el );
340 # capture both the database and accession number
341 $id= $id =~ /^\w+$/ ? "$db:$id" : $id;
342 $tags->{dbxref} ||= [];
343 push @{$tags->{dbxref}}, $id;
344 $id;
348 =head2 comment
350 Title : comment
351 Usage : $self->comment($comment_element)
352 Function: a method to flatten comment elements
353 Returns : a string
354 Args : an comment element (reqd) and a hash ref of tag/values (optional)
355 Note : The hope here is that we can unflatten structured comments
356 in game-derived annotations happen to make a return trip
358 =cut
360 sub comment {
361 my ($self, $el, $tags) = @_;
363 $tags ||= $self->{curr_tags};
364 my $text = $el->{_text}->{Characters};
365 my $pers = $el->{_person}->{Characters};
366 my $date = $el->{_date}->{Characters};
367 my $int = $el->{_internal}->{Characters};
368 $self->flush( $el );
370 my $comment = "person=$pers; " if $pers;
371 $comment .= "date=$date; " if $date;
372 $comment .= "internal=$int; " if $int;
373 $comment .= "text=$text" if $text;
375 $tags->{comment} ||= [];
376 push @{$tags->{comment}}, $comment;
377 $comment;
380 =head2 property
382 Title : property
383 Usage : $self->property($property_element)
384 Function: an internal method to flatten property elements
385 Returns : a hash reference
386 Args : an property/output element (reqd) and a hash ref of tag/values (optional)
387 Note: This method is aliased to 'output' to handle structurally identical output elements
389 =cut
391 *output = \&property;
392 sub property {
393 my ($self, $el, $tags) = @_;
395 $tags ||= $self->{curr_tags};
396 my $key = $el->{_type}->{Characters};
397 my $value = $el->{_value}->{Characters};
398 $self->flush( $el );
400 $tags->{$key} ||= [];
401 push @{$tags->{$key}}, $value;
402 $tags;
405 =head2 evidence
407 Title : evidence
408 Usage : $self->evidence($evidence_element)
409 Function: a method to flatten evidence elements
410 Returns : a string
411 Args : an evidence element
413 =cut
415 sub evidence {
416 my ($self, $el) = @_;
417 my $tags = $self->{curr_tags};
418 my $text = $el->{Characters} or return 0;
419 my $type = $el->{Attributes}->{type};
420 my $res = $el->{Attributes}->{result};
421 $self->flush( $el );
423 my $evidence = "type=$type; " if $type;
424 $evidence .= "result=$res; " if $res;
425 $evidence .= "evidence=$text";
427 $tags->{evidence}||= [];
428 push @{$tags->{evidence}}, $evidence;
429 $evidence;
432 =head2 date
434 Title : date
435 Usage : $self->date($date_element)
436 Function: a method to flatten date elements
437 Returns : true if successful
438 Args : a date element
440 =cut
442 sub date {
443 my ($self, $el) = @_;
444 my $tags = $self->{curr_tags};
445 my $date = $el->{Characters} or return 0;
446 my $stamp = $el->{Attributes}->{timestamp};
447 $self->flush( $el );
449 $tags->{date} ||= [];
450 push @{$tags->{date}}, $date;
451 $tags->{timestamp} ||= [];
452 push @{$tags->{timestamp}}, $stamp;
457 =head2 protein_id
459 Title : protein_id
460 Usage : $pid = $self->protein_id($cds, $standard_name)
461 Function: a method to search for a protein name
462 Returns : a string
463 Args : the CDS object plus the transcript\'s 'standard_name'
465 =cut
467 sub protein_id {
468 my ($self, $cds, $sn) = @_;
469 my $psn;
470 if ( $cds->has_tag('protein_id') ) {
471 ($psn) = $cds->get_tag_values('protein_id');
473 elsif ( $cds->has_tag('product') ) {
474 ($psn) = $cds->get_tag_values('product');
475 $psn =~ s/.+?(\S+)$/$1/;
477 elsif ( $cds->has_tag('gene') ) {
478 ($psn) = $cds->get_tag_values('gene');
480 elsif ( $sn ) {
481 $psn = $sn;
483 else {
484 $self->complain("Could not find an ID for the protein");
485 return '';
488 $psn =~ s/-R/-P/;
489 return $psn;