t/AlignIO/AlignIO.t: fix number of tests in plan (fixup c523e6bed866)
[bioperl-live.git] / Bio / SeqIO / game / gameWriter.pm
blob152d85134850807a939c06cea3449a1852ae0f88
2 # BioPerl module for Bio::SeqIO::game::gameWriter
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Sheldon McKay <mckays@cshl.edu>
8 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::SeqIO::game::gameWriter -- a class for writing game-XML
17 =head1 SYNOPSIS
19 use Bio::SeqIO;
21 my $in = Bio::SeqIO->new( -format => 'genbank',
22 -file => 'myfile.gbk' );
23 my $out = Bio::SeqIO->new( -format => 'game',
24 -file => 'myfile.xml' );
26 # get a sequence object
27 my $seq = $in->next_seq;
29 #write it in GAME format
30 $out->write_seq($seq);
32 =head1 DESCRIPTION
34 Bio::SeqIO::game::gameWriter writes GAME-XML (v. 1.2) that is readable
35 by Apollo. It is best not used directly. It is accessed via
36 Bio::SeqIO.
38 =head1 FEEDBACK
40 =head2 Mailing Lists
42 User feedback is an integral part of the evolution of this and other
43 Bioperl modules. Send your comments and suggestions preferably to one
44 of the Bioperl mailing lists.
46 Your participation is much appreciated.
48 bioperl-l@bioperl.org - General discussion
49 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
51 =head2 Support
53 Please direct usage questions or support issues to the mailing list:
55 I<bioperl-l@bioperl.org>
57 rather than to the module maintainer directly. Many experienced and
58 reponsive experts will be able look at the problem and quickly
59 address it. Please include a thorough description of the problem
60 with code and data examples if at all possible.
62 =head2 Reporting Bugs
64 Report bugs to the Bioperl bug tracking system to help us keep track
65 of the bugs and their resolution. Bug reports can be submitted via the
66 web:
68 https://github.com/bioperl/bioperl-live/issues
70 =head1 AUTHOR - Sheldon McKay
72 Email mckays@cshl.edu
74 =head1 APPENDIX
76 The rest of the documentation details each of the object
77 methods. Internal methods are usually preceded with a _
79 =cut
81 package Bio::SeqIO::game::gameWriter;
83 use strict;
84 use IO::String;
85 use XML::Writer;
86 use Bio::SeqFeature::Generic;
87 use Bio::SeqFeature::Tools::Unflattener;
89 use base qw(Bio::SeqIO::game::gameSubs);
91 =head2 new
93 Title : new
94 Usage : my $writer = Bio::SeqIO::game::gameWriter->new($seq);
95 Function: constructor method for gameWriter
96 Returns : a game writer object
97 Args : a Bio::SeqI implementing object
98 optionally, an argument to set map_position to on.
99 ( map => 1 ). This will create a map_position elemant
100 that will cause the feature coordinates to be remapped to
101 a parent seqeunce. A sequence name in the format seq:xxx-xxx
102 is expected to determine the offset for the map_position.
103 The default behavior is to have features mapped relative to
104 the sequence contained in the GAME-XML file
106 =cut
108 sub new {
109 my ($caller, $seq, %arg) = @_;
110 my $class = ref($caller) || $caller;
111 my $self = bless ( { seq => $seq }, $class );
113 # make a <map_position> element only if requested
114 $self->{map} = 1 if $arg{map};
115 $self->{anon_set_counters} = {}; #counters for numbering anonymous result and feature sets
116 return $self;
119 =head2 write_to_game
121 Title : write_to_game
122 Usage : $writer->write_to_game
123 Function: writes the sequence object to game-XML
124 Returns : xml as a multiline string
125 Args : none
127 =cut
129 sub write_to_game {
130 my $self = shift;
131 my $seq = $self->{seq};
132 my @feats = $seq->remove_SeqFeatures;
134 # intercept nested features
135 my @nested_feats = grep { $_->get_SeqFeatures } @feats;
136 @feats = grep { !$_->get_SeqFeatures } @feats;
137 map { $seq->add_SeqFeature($_) } @feats;
139 # NB -- Maybe this belongs in Bio::SeqFeatute::Tools::Unflattener
141 # # intercept non-coding RNAs and transposons with contained genes
142 # # GAME-XML has these features as top level annotations which contain
143 # # gene elements
144 # my @gene_containers = ();
146 # for ( @feats ) {
147 # if ( $_->primary_tag =~ /[^m]RNA|repeat_region|transpos/ &&
148 # $_->has_tag('gene') ) {
149 # my @genes = $_->get_tag_values('gene');
150 # my ($min, $max) = (10000000000000,-10000000000000);
151 # for my $g ( @genes ) {
152 # my $gene;
153 # for my $item ( @feats ) {
154 # next unless $item->primary_tag eq 'gene';
155 # my ($n) = $item->get_tag_values('gene');
156 # next unless $n =~ /$g/;
157 # $gene = $item;
158 # last;
160 # next unless $gene && ref $gene;
161 # $max = $gene->end if $gene->end > $max;
162 # $min = $gene->start if $gene->start < $min;
165 # push @gene_containers, $_ if $_->length >= ($max - $min);
167 # else {
168 # $seq->add_SeqFeature($_);
172 # unflatten
173 my $uf = Bio::SeqFeature::Tools::Unflattener->new;
174 $uf->unflatten_seq( -seq => $seq, use_magic => 1 );
176 # rearrange snRNA and transposon hierarchies
177 # $self->_rearrange_hierarchies($seq, @gene_containers);
179 # add back nested feats
180 $seq->add_SeqFeature( $_ ) foreach @nested_feats;
182 my $atts = {};
183 my $xml = '';
185 # write the XML to a string
186 my $xml_handle = IO::String->new($xml);
187 my $writer = XML::Writer->new(OUTPUT => $xml_handle,
188 DATA_MODE => 1,
189 DATA_INDENT => 2,
190 NEWLINE => 1
192 $self->{writer} = $writer;
193 # $writer->xmlDecl("UTF-8");
194 # $writer->doctype("game", 'game', "http://www.fruitfly.org/annot/gamexml.dtd.txt");
195 $writer->comment("GAME-XML generated by Bio::SeqIO::game::gameWriter");
196 $writer->comment("Created " . localtime);
197 $writer->comment('Questions: mckays@cshl.edu');
198 $writer->startTag('game', version => 1.2);
200 my @sources = grep { $_->primary_tag =~ /source|origin|region/i } $seq->get_SeqFeatures;
202 for my $source ( @sources ) {
203 next unless $source->length == $seq->length;
204 for ( qw{ name description db_xref organism md5checksum } ) {
205 if ( $source->has_tag($_) ) {
206 $self->{has_organism} = 1 if /organism/;
207 ($atts->{$_}) = $source->get_tag_values($_);
213 #set a name in the attributes if none was given
214 $atts->{name} ||= $seq->accession_number ne 'unknown'
215 ? $seq->accession_number : $seq->display_name;
217 $self->_seq($seq, $atts);
219 # make a map_position element if req'd
220 if ( $self->{map} ) {
221 my $seqtype;
222 if ( $atts->{mol_type} || $seq->alphabet ) {
223 $seqtype = $atts->{mol_type} || $seq->alphabet;
225 else {
226 $seqtype = 'unknown';
229 $writer->startTag(
230 'map_position',
231 seq => $atts->{name},
232 type => $seqtype
235 my ($arm, $start, undef, $end) = $atts->{name} =~ /(\S+):(-?\d+)(\.\.|-)(-?\d+)/;
236 $self->_element('arm', $arm) if $arm;
237 $self->_span($start, $end);
238 $writer->endTag('map_position');
241 for ( $seq->top_SeqFeatures ) {
243 if($_->isa('Bio::SeqFeature::Computation')) {
244 $self->_comp_analysis($_);
246 else {
247 # if the feature has subfeatures, we will assume it is a gene
248 # (hope this is safe!)
249 if ( $_->get_SeqFeatures ) {
250 $self->_write_gene($_);
251 } else {
252 # non-gene stuff only
253 next if $_->primary_tag =~ /CDS|mRNA|exon|UTR/;
254 $self->_write_feature($_);
259 $writer->endTag('game');
260 $writer->end;
261 $xml;
264 =head2 _rearrange_hierarchies
266 Title : _rearrange_hierarchies
267 Usage : $self->_rearrange_hierarchies($seq)
268 Function: internal method to rearrange gene containment hierarchies
269 so that snRNA or transposon features contain their genes
270 rather than the other way around
271 Returns : nothing
272 Args : a Bio::RichSeq object
273 Note : Not currently used, may be removed
275 =cut
277 sub _rearrange_hierarchies { #renamed to not conflict with Bio::Root::_rearrange
278 my ($self, $seq, @containers) = @_;
279 my @feats = $seq->remove_SeqFeatures;
280 my @genes = grep { $_->primary_tag eq 'gene' } @feats;
281 my @addback = grep { $_->primary_tag ne 'gene' } @feats;
283 for ( @containers ) {
284 my @has_genes = $_->get_tag_values('gene');
285 for my $has_gene ( @has_genes ) {
286 for my $gene ( @genes ) {
287 next unless $gene;
288 my ($gname) = $gene->get_tag_values('gene');
289 if ( $gname eq $has_gene ) {
290 $_->add_SeqFeature($gene);
291 undef $gene;
297 push @addback, (@containers, grep { defined $_ } @genes );
298 $seq->add_SeqFeature($_) foreach @addback;
302 =head2 _write_feature
304 Title : _write_feature
305 Usage : $seld->_write_feature($feat, 1)
306 Function: internal method for writing generic features as <annotation> elements
307 Returns : nothing
308 Args : a Bio::SeqFeature::Generic object and an optional flag to write a
309 bare feature set with no annotation wrapper
311 =cut
313 sub _write_feature {
314 my ($self, $feat, $bare) = @_;
315 my $writer = $self->{writer};
316 my $id;
318 for ( 'standard_name', $feat->primary_tag, 'ID' ) {
319 $id = $self->_find_name($feat, $_ );
320 last if $id;
323 $id ||= $feat->primary_tag . '_' . ++$self->{$feat->primary_tag}->{id};
325 unless ( $bare ) {
326 $writer->startTag('annotation', id => $id);
327 $self->_element('name', $id);
328 $self->_element('type', $feat->primary_tag);
331 $writer->startTag('feature_set', id => $id);
332 $self->_element('name', $id);
333 $self->_element('type', $feat->primary_tag);
334 $self->_render_tags( $feat,
335 \&_render_date_tags,
336 \&_render_comment_tags,
337 \&_render_tags_as_properties
339 $self->_feature_span($id, $feat);
340 $writer->endTag('feature_set');
341 $writer->endTag('annotation') unless $bare;
344 =head2 _write_gene
346 Title : _write_gene
347 Usage : $self->_write_gene($feature)
348 Function: internal method for rendering gene containment hierarchies into
349 a nested <annotation> element
350 Returns : nothing
351 Args : a nested Bio::SeqFeature::Generic gene feature
352 Note : A nested gene hierarchy (gene->mRNA->CDS->exon) is expected. If other gene
353 subfeatures occur as level one subfeatures (same level as mRNA subfeats)
354 an attempt will be made to link them to transcripts via the 'standard_name'
355 qualifier
357 =cut
359 sub _write_gene {
360 my ($self, $feat) = @_;
361 my $writer = $self->{writer};
362 my $str = $feat->strand;
363 my $id = $self->_find_name($feat, 'standard_name')
364 || $self->_find_name($feat, 'gene')
365 || $self->_find_name($feat, $feat->primary_tag)
366 || $self->_find_name($feat, 'locus_tag')
367 || $self->_find_name($feat, 'symbol')
368 || $self->throw(<<EOM."Feature name was: '".($feat->display_name || 'not set')."'");
369 Could not find a gene/feature ID, feature must have a primary tag or a tag
370 with one of the names: 'standard_name', 'gene', 'locus_tag', or 'symbol'.
372 my $gid = $self->_find_name($feat, 'gene') || $id;
374 $writer->startTag('annotation', id => $id);
375 $self->_element('name', $gid);
376 $self->_element('type', $feat->primary_tag);
377 $self->_render_tags( $feat,
378 \&_render_date_tags,
379 \&_render_dbxref_tags,
380 \&_render_comment_tags,
381 \&_render_tags_as_properties,
384 my @genes;
386 if ( $feat->primary_tag eq 'gene' ) {
387 @genes = ($feat);
389 else {
390 # we are in a gene container; gene must then be one level down
391 @genes = grep { $_->primary_tag eq 'gene' } $feat->get_SeqFeatures;
394 for my $g ( @genes ) {
395 my $id ||= $self->_find_name($g, 'standard_name')
396 || $self->_find_name($g, 'gene')
397 || $self->_find_name($feat, 'locus_tag')
398 || $self->_find_name($feat, 'symbol')
399 || $self->throw("Could not find a gene ID");
400 my $gid ||= $self->_find_name($g, 'gene') || $self->_find_name($g);
402 $writer->startTag('gene', association => 'IS');
403 $self->_element('name', $gid);
404 $writer->endTag('gene');
406 my $proteins;
407 my @mRNAs = grep { $_->primary_tag =~ /mRNA|transcript/ } $g->get_SeqFeatures;
408 my @other_stuff = grep { $_->primary_tag !~ /mRNA|transcript/ } $g->get_SeqFeatures;
409 my @variants = ('A' .. 'Z');
411 for my $mRNA (@mRNAs) {
412 my ($sn, @units);
413 # if the mRNA is a generic transcript, it must be a non-spliced RNA gene
414 # Make a synthetic exon to help build a hierarchy. We have to assume that
415 # the location is not segmented (otherwise it should be a mRNA)
416 if ( $mRNA->primary_tag eq 'transcript') {
417 my $exon = Bio::SeqFeature::Generic->new ( -primary => 'exon' );
418 $exon->location($mRNA->location);
419 $mRNA->add_SeqFeature($exon);
422 # no subfeats? Huh? revert to generic feature
423 unless ( $mRNA->get_SeqFeatures ) {
424 $self->_write_feature($mRNA, 1); # 1 flag writes the bare feature
425 # with no annotation wrapper
426 next;
429 my $name = $self->_find_name($mRNA, $mRNA->primary_tag)
430 || $self->_find_name($mRNA, 'standard_name');
432 my %attributes;
433 my ($cds) = grep { $_->primary_tag eq 'CDS' } $mRNA->get_SeqFeatures;
435 # make sure we have the right CDS for alternatively spliced genes
436 # This is meant to deal with sequences from flattened game annotations,
437 # where both the mRNA and CDS have split locations
438 if ( $cds && @mRNAs > 1 && $name ) {
439 $cds = $self->_check_cds($cds, $name);
441 elsif ( $cds && @mRNAs == 1 ) {
442 # The mRNA/CDS pairing must be right. Get the transcript name from the CDS
443 if ( $cds->has_tag('standard_name') ) {
444 ($name) = $cds->get_tag_values('standard_name');
448 if ( !$name ) {
449 # assign a name to the transcript if it has no 'standard_name' binder
450 $name = $id . '-R' . (shift @variants);
453 my $pname;
455 if ( $cds ) {
456 ($sn) = $cds->get_tag_values('standard_name')
457 if $cds->has_tag('standard_name');
458 ($sn) ||= $cds->get_tag_values('mRNA')
459 if $cds->has_tag('mRNA');
461 # the protein needs a name
462 my $psn = $self->protein_id($cds, $sn);
463 $self->{curr_pname} = $psn;
465 # the mRNA need to know the name of its protein
466 unless ( $feat->has_tag('protein_id') ) {
467 $feat->add_tag_value('protein_id', $psn);
470 # define the translation offset
471 my ($c_start, $c_end);
472 if ( $cds->has_tag('codon_start') ){
473 ($c_start) = $cds->get_tag_values('codon_start');
474 $cds->remove_tag('codon_start');
476 else {
477 $c_start = 1;
479 my $cs = Bio::SeqFeature::Generic->new;
480 if ( $c_start == 1 ) {
481 $c_start = $cds->strand > 0 ? $cds->start : $cds->end;
483 if ( $cds->strand < 1 ) {
484 $c_end = $c_start;
485 $c_start = $c_start - 2;
487 else {
488 $c_end = $c_start + 2;
490 $cs->start($c_start);
491 $cs->end($c_end);
492 $cs->strand($cds->strand);
493 $cs->primary_tag('start_codon');
494 $cs->add_tag_value( 'standard_name' => $name );
495 push @units, $cs;
498 if ( $cds->has_tag('problem') ) {
499 my ($val) = $cds->get_tag_values('problem');
500 $cds->remove_tag('problem');
501 $attributes{problem} = $val;
504 my ($aa) = $cds->get_tag_values('translation')
505 if $cds->has_tag('translation');
507 if ( $aa && $psn ) {
508 $cds->remove_tag('translation');
509 my %add_seq = ();
510 $add_seq{residues} = $aa;
511 $add_seq{header} = ['seq',
512 id => $psn,
513 length => length $aa,
514 type => 'aa' ];
516 if ( $cds->has_tag('product_desc') ) {
517 ($add_seq{desc}) = $cds->get_tag_values('product_desc');
518 $cds->remove_tag('product_desc');
521 unless ( $add_seq{desc} && $add_seq{desc} =~ /cds_boundaries/ ) {
522 my $start = $cds->start;
523 my $end = $cds->end;
524 my $str = $cds->strand;
525 my $acc = $self->{seq}->accession || $self->{seq}->display_id;
526 $str = $str < 0 ? '[-]' : '';
527 $add_seq{desc} = "translation from_gene[$gid] " .
528 "cds_boundaries:(" . $acc .
529 ":$start..$end$str) transcript_info:[$name]";
531 $self->{add_seqs} ||= [];
532 push @{$self->{add_seqs}}, \%add_seq;
537 $writer->startTag('feature_set', id => $name);
538 $self->_element('name', $name);
539 $self->_element('type', 'transcript');
540 $self->_render_tags($_,
541 \&_render_date_tags,
542 \&_render_comment_tags,
543 \&_render_tags_as_properties,
544 ) for ( $mRNA, ($cds) || () );
546 # any UTR's, etc associated with this transcript?
547 for my $thing ( @other_stuff ) {
548 if ( $thing->has_tag('standard_name') ) {
549 my ($v) = $thing->get_tag_values('standard_name');
550 if ( $v eq $sn ) {
551 push @units, $thing;
556 # add the exons
557 push @units, grep { $_->primary_tag eq 'exon' } $mRNA->get_SeqFeatures;
558 @units = sort { $a->start <=> $b->start } @units;
560 my $count = 0;
562 if ( $str < 0 ) {
563 @units = reverse @units;
566 for my $unit ( @units ) {
567 if ( $unit->primary_tag eq 'exon' ) {
568 my $ename = $id;
569 $ename .= ':' . ++$count;
570 $self->_feature_span($ename, $unit);
572 elsif ( $unit->primary_tag eq 'start_codon' ) {
573 $self->_feature_span(($sn || $gid), $unit, $self->{curr_pname});
575 else {
576 my $uname = $unit->primary_tag . ":$id";
577 $self->_feature_span($uname, $unit);
580 $self->{curr_pname} = '';
581 $writer->endTag('feature_set');
584 $self->{other_stuff} = \@other_stuff;
587 $writer->endTag('annotation');
589 # add the protein sequences
590 for ( @{$self->{add_seqs}} ) {
591 my %h = %$_;
592 $writer->startTag(@{$h{header}});
593 my @desc = split /\s+/, $h{desc};
594 my $desc = '';
595 for my $word (@desc) {
596 my ($lastline) = $desc =~ /.*^(.+)$/sm;
597 $lastline ||= '';
598 $desc .= length $lastline < 50 ? " $word " : "\n $word ";
600 $self->_element('description', "\n $desc\n ");
602 my $aa = $h{residues};
603 $aa =~ s/(\w{60})/$1\n /g;
604 $aa =~ s/\n\s+$//m;
605 $aa = "\n " . $aa . "\n ";
606 $self->_element('residues', $aa);
607 $writer->endTag('seq');
608 $self->{add_seqs} = [];
611 # Is there anything else associated with the gene? We have to write other
612 # features as stand-alone annotations or apollo will assume they are
613 # transcripts
614 for my $thing ( @{$self->{other_stuff}} ) {
615 next if $thing->has_tag('standard_name');
616 $self->_write_feature($thing);
618 $self->{other_stuff} = [];
622 =head2 _check_cds
624 Title : _check_cds
625 Usage : $self->_check_cds($cds, $name)
626 Function: internal method to check if the CDS associated with an mRNA is
627 the correct alternative splice variant
628 Returns : a Bio::SeqFeature::Generic CDS object
629 Args : the CDS object plus the transcript\'s 'standard_name'
630 Note : this method only works if alternatively spliced transcripts are bound
631 together by a 'standard_name' or 'mRNA' qualifier. If none is present,
632 we will hope that the exons were derived from a segmented RNA or a CDS
633 with no associated mRNA feature. Neither of these two cases would be
634 confounded by alternative splice variants.
636 =cut
639 sub _check_cds {
640 my ($self, $cds, $name) = @_;
641 my $cname = $self->_find_name( $cds, 'standard_name' )
642 || $self->_find_name( $cds, 'mRNA');
644 if ( $cname ) {
645 if ( $cname eq $name ) {
646 return $cds;
648 else {
649 my @CDS = grep { $_->primary_tag eq 'CDS' } @{$self->{feats}};
650 for ( @CDS ) {
651 my ($sname) = $_->_find_name( $_, 'standard_name' )
652 || $_->_find_name( $_, $_->primary_tag );
653 return $_ if $sname eq $name;
655 return '';
658 else {
659 return $cds;
664 =head2 _comp_analysis
666 Usage:
667 Desc :
668 Ret :
669 Args :
670 Side Effects:
671 Example:
673 =cut
675 sub _comp_analysis {
676 my ($self, $feat) = @_;
677 my $writer = $self->{writer};
679 $writer->startTag('computational_analysis');
680 $self->_element('program', $feat->program_name || 'unknown program');
681 $self->_element('database', $feat->database_name) if $feat->database_name;
682 $self->_element('version', $feat->program_version) if $feat->program_version;
683 $self->_element('type', $feat->primary_tag) if $feat->primary_tag;
684 $self->_render_tags($feat,
685 \&_render_date_tags,
686 \&_render_tags_as_properties,
688 $self->_comp_result($feat);
689 $writer->endTag('computational_analysis');
692 =head2 _comp_result
694 Usage:
695 Desc : recursively render a feature and its subfeatures as
696 <result_set> and <result_span> elements
697 Ret : nothing meaningful
698 Args : a feature
700 =cut
703 sub _comp_result {
704 my ($self,$feat) = @_;
706 #check that all our subfeatures have the same strand
709 #write result sets for things that have subfeatures, or things
710 #that have some tags
711 if( my @subfeats = $feat->get_SeqFeatures or $feat->get_all_tags ) {
712 my $writer = $self->{writer};
713 $writer->startTag('result_set',
714 ($feat->can('computation_id') && defined($feat->computation_id))
715 ? (id => $feat->computation_id) : ()
717 my $fakename = $feat->primary_tag || 'no_name';
718 $self->_element('name', $feat->display_name || ($fakename).'_'.++$self->{anon_result_set_counters}{$fakename} );
719 $self->_seq_relationship('query', $feat);
720 $self->_render_tags($feat,
721 \&_render_output_tags
723 for (@subfeats) { #render the subfeats, if any
724 $self->_comp_result($_);
726 $self->_comp_result_span($feat); #also have a span to hold this info
727 $writer->endTag('result_set');
728 } else {
729 #just write result spans for simple things
730 $self->_comp_result_span($feat);
734 =head2 _comp_result_span
736 Usage: _comp_result_span('foo12',$feature);
737 Desc : write GAME XML for a Bio::SeqFeature::Computation feature
738 that has no subfeatures
739 Ret : nothing meaningful
740 Args : name for this span (some kind of identifier),
741 SeqFeature object to put into this span
742 Side Effects:
743 Example:
745 =cut
747 sub _comp_result_span {
749 my ($self, $feat) = @_;
750 my $writer = $self->{writer};
752 $writer->startTag('result_span',
753 ($feat->can('computation_id') && defined($feat->computation_id) ? (id => $feat->computation_id) : ())
755 $self->_element('name', $feat->display_name) if $feat->display_name;
756 $self->_element('type', $feat->primary_tag) if $feat->primary_tag;
757 my $has_score = $feat->can('has_score') ? $feat->has_score : defined($feat->score);
758 $self->_element('score', $feat->score) if $has_score;
759 $self->_render_tags($feat,
760 \&_render_output_tags
762 $self->_seq_relationship('query', $feat);
763 $self->_render_tags($feat,
764 \&_render_target_tags,
766 $writer->endTag('result_span');
769 =head2 _render_tags
771 Usage:
772 Desc :
773 Ret :
774 Args :
775 Side Effects:
776 Example:
778 =cut
780 sub _render_tags {
781 my ($self,$feat,@render_funcs) = @_;
783 my @tagnames = $feat->get_all_tags;
785 #do a chain-of-responsibility down the allowed
786 #tag handlers types for the context in which this is
787 #called
788 foreach my $func (@render_funcs) {
789 @tagnames = $self->$func($feat,@tagnames);
793 =head2 _render_output_tags
795 Usage:
796 Desc : print out <output> elements, with contents
797 taken from the SeqFeature::Computation's 'output' tag
798 Ret : array of tag names this did not render
799 Args : feature object, list of tag names to maybe render
801 In game xml, only <result_span> and <result_set> elements can
802 have <output> elements.
804 =cut
806 sub _render_output_tags {
807 my ($self, $feat, @tagnames) = @_;
808 my $writer = $self->{writer};
809 my @passed_up;
811 for my $tag (@tagnames) {
812 if(lc($tag) eq 'output') {
813 my @outputs = $feat->get_tag_values($tag);
814 while(my($type,$val) = splice @outputs,0,2) {
815 $writer->startTag('output');
816 $self->_element('type',$type);
817 $self->_element('value',$val);
818 $writer->endTag('output');
821 else {
822 push @passed_up,$tag;
825 return @passed_up;
828 =head2 _render_tags_as_properties
830 Usage:
831 Desc :
832 Ret : empty array
833 Args : feature object, array of tag names
834 Side Effects:
835 Example:
837 In game xml, <annotation>, <computational_analysis>,
838 and <feature_set> elements can have properties.
840 =cut
842 sub _render_tags_as_properties {
843 my ($self,$feat,@tagnames) = @_;
845 foreach my $tag (@tagnames) {
846 if( $tag ne $feat->primary_tag ) {
847 $self->_property($tag,$_) for $feat->get_tag_values($tag);
850 return ();
853 =head2 _render_comment_tags
855 Usage:
856 Desc :
857 Ret : names of tags that were not comment tags
858 Args : feature object, tag names available for us to render
859 Side Effects: writes XML
860 Example:
862 In game xml, <annotation> and <feature_set> elements can
863 have comments.
865 =cut
867 sub _render_comment_tags {
868 my ($self,$feat,@tagnames) = @_;
869 my $writer = $self->{writer};
870 my @passed_up;
871 for my $tag ( @tagnames ) {
872 if( lc($tag) eq 'comment' ) {
873 for my $val ($feat->get_tag_values($tag)) {
874 if ( $val =~ /=.+?;.+=/ ) {
875 $self->_unflatten_attribute('comment', $val);
876 } else {
877 $writer->startTag('comment');
878 $self->_element('text', $val);
879 $writer->endTag('comment');
882 } else {
883 push @passed_up,$tag;
886 return @passed_up;
889 =head2 _render_date_tags
891 Usage:
892 Desc :
893 Ret : names of tags that were not date tags
894 Args : feature, list of tag names available for us to render
895 Side Effects: writes XML for <date> elements
896 Example:
898 In game xml, <annotation>, <computational_analysis>,
899 <transaction>, <comment>, and <feature_set> elements
900 can have <date>s.
902 =cut
904 sub _render_date_tags {
905 my ($self,$feat,@tagnames) = @_;
906 my @passed_up;
907 my $date;
908 my %timestamp;
909 foreach my $tag (@tagnames) {
910 if ( lc($tag) eq 'date' ) {
911 ($date) = $feat->get_tag_values($tag);
912 } elsif ( lc($tag) eq 'timestamp' ) {
913 ($timestamp{'timestamp'}) = $feat->get_tag_values($tag);
914 #ignore timestamps, they are folded in with date elem above
915 } else {
916 push @passed_up,$tag;
919 $self->_element('date', $date, \%timestamp) if defined($date);
920 return @passed_up;
923 =head2 _render_dbxref_tags
925 Desc : look for xref tags and render them if they are there
926 Ret : tag names that we didn't render
927 Args : feature object, list of tag names to render
928 Side Effects: writes a <dbxref> element if a tag with name
929 matching /xref$/i is present
932 In game xml, <annotation> and <seq> elements can have dbxrefs.
934 =cut
936 #TODO: can't sequences also have database xrefs? how to find those?
937 sub _render_dbxref_tags {
938 my ($self, $feat, @tagnames) = @_;
939 my @passed_up;
940 for my $tag ( @tagnames ) { #look through all the tags
941 if( $tag =~ /xref$/i ) { #if they are xref tags
942 my $writer = $self->{writer};
943 for my $val ( $feat->get_all_tag_values($tag) ) { #get all their values
944 if( my ($db,$dbid) = $val =~ /(\S+):(\S+)/ ) { #and render them as xrefs
945 $writer->startTag('dbxref');
946 $self->_element('xref_db', $db);
947 $dbid = $val if $db =~ /^[A-Z]O$/; # -> ontology, like GO
948 $self->_element('db_xref_id', $dbid);
949 $writer->endTag('dbxref');
952 } else {
953 push @passed_up,$tag;
956 return @passed_up;
960 =head2 _render_target_tags
962 Usage:
963 Desc : process any 'Target' tags that would indicate a sequence alignment subject
964 Ret : array of tag names that we didn't render
965 Args : feature object
966 Side Effects: writes a <seq_relationship> of type 'subject' if it finds
967 any properly formed tags named 'Target'
968 Example:
970 In game xml, <result_span>, <feature_span>, and <result_set> can have
971 <seq_relationship>s. <result_set> can only have one, a 'query' relation.
973 =cut
975 sub _render_target_tags {
976 my ($self,$feat,@tagnames) = @_;
977 my @passed_up;
978 foreach my $tag (@tagnames) {
979 if($tag eq 'Target' && (my @alignment = $feat->get_tag_values('Target')) >= 3) {
980 $self->_seq_relationship('subject',
981 Bio::Location::Simple->new( -start => $alignment[1],
982 -end => $alignment[2],
984 $alignment[0],
985 $alignment[3],
987 } else {
988 push @passed_up, $tag;
991 return @passed_up;
995 =head2 _property
997 Title : _property
998 Usage : $self->_property($tag => $value);
999 Function: an internal method to write property XML elements
1000 Returns : nothing
1001 Args : a tag/value pair
1003 =cut
1005 sub _property {
1006 my ($self, $tag, $val) = @_;
1007 my $writer = $self->{writer};
1009 if ( length $val > 45 ) {
1010 my @val = split /\s+/, $val;
1011 $val = '';
1013 for my $word (@val) {
1014 my ($lastline) = $val =~ /.*^(.+)$/sm;
1015 $lastline ||= '';
1016 $val .= length $lastline < 45 ? " $word " : "\n $word";
1018 $val = "\n $val\n ";
1019 $val =~ s/(\S)\s{2}(\S)/$1 $2/g;
1021 $writer->startTag('property');
1022 $self->_element('type', $tag);
1023 $self->_element('value', $val);
1024 $writer->endTag('property');
1027 =head2 _unflatten_attribute
1029 Title : _unflatten_attribute
1030 Usage : $self->_unflatten_attribute($name, $value)
1031 Function: an internal method to unflatten and write comment or evidence elements
1032 Returns : nothing
1033 Args : a list of strings
1035 =cut
1037 sub _unflatten_attribute {
1038 my ($self, $name, $val) = @_;
1039 my $writer = $self->{writer};
1040 my %pair;
1041 my @pairs = split ';', $val;
1042 for my $p ( @pairs ) {
1043 my @pair = split '=', $p;
1044 $pair[0] =~ s/^\s+|\s+$//g;
1045 $pair[1] =~ s/^\s+|\s+$//g;
1046 $pair{$pair[0]} = $pair[1];
1048 $writer->startTag($name);
1049 for ( keys %pair ) {
1050 $self->_element($_, $pair{$_});
1052 $writer->endTag($name);
1057 =head2 _xref
1059 Title : _xref
1060 Usage : $self->_xref($value)
1061 Function: an internal method to write db_xref elements
1062 Returns : nothing
1063 Args : a list of strings
1065 =cut
1067 sub _xref {
1068 my ($self, @xrefs) = @_;
1069 my $writer = $self->{writer};
1070 for my $xref ( @xrefs ) {
1071 my ($db, $acc) = $xref =~ /(\S+):(\S+)/;
1072 $writer->startTag('dbxref');
1073 $self->_element('xref_db', $db);
1074 $acc = $xref if $db eq 'GO';
1075 $self->_element('db_xref_id', $acc);
1076 $writer->endTag('dbxref');
1080 =head2 _feature_span
1082 Title : _feature_span
1083 Usage : $self->_feature_span($name, $type, $loc)
1084 Function: an internal method to write a feature_span element
1085 (the actual feature with coordinates)
1086 Returns : nothing
1087 Args : a feature name and Bio::SeqFeatureI-compliant object
1089 =cut
1091 sub _feature_span {
1092 my ($self, $name, $feat, $pname) = @_;
1093 my $type = $feat->primary_tag;
1094 my $writer = $self->{writer};
1095 my %atts = ( id => $name );
1097 if ( $pname ) {
1098 $pname =~ s/-R/-P/;
1099 $atts{produces_seq} = $pname;
1102 $writer->startTag('feature_span', %atts );
1103 $self->_element('name', $name);
1104 $self->_element('type', $type);
1105 $self->_seq_relationship('query', $feat);
1106 $writer->endTag('feature_span');
1109 =head2 _seq_relationship
1111 Title : _seq_relationship
1112 Usage : $self->_seq_relationship($type, $loc)
1113 Function: an internal method to handle feature_span sequence relationships
1114 Returns : nothing
1115 Args : feature type, a Bio::LocationI-compliant object,
1116 (optional) sequence name (defaults to the query seq)
1117 and (optional) alignment string
1119 =cut
1121 sub _seq_relationship {
1122 my ($self, $type, $loc, $seqname, $alignment) = @_;
1123 my $writer = $self->{'writer'};
1125 $seqname ||= #if no seqname passed in, use the name of our annotating seq
1126 $self->{seq}->accession_number ne 'unknown' && $self->{seq}->accession_number
1127 || $self->{seq}->display_id || 'unknown';
1128 $writer->startTag(
1129 'seq_relationship',
1130 type => $type,
1131 seq => $seqname,
1133 $self->_span($loc);
1134 $writer->_element('alignment',$alignment) if $alignment;
1135 $writer->endTag('seq_relationship');
1138 =head2 _element
1140 Title : _element
1141 Usage : $self->_element($name, $chars, $atts)
1142 Function: an internal method to generate 'generic' XML elements
1143 Example :
1144 my $name = 'foo';
1145 my $content = 'bar';
1146 my $attributes = { baz => 1 };
1147 # print the element
1148 $self->_element($name, $content, $attributes);
1149 Returns : nothing
1150 Args : the element name and content plus a ref to an attribute hash
1152 =cut
1154 sub _element {
1155 my ($self, $name, $chars, $atts) = @_;
1156 my $writer = $self->{writer};
1157 my %atts = $atts ? %$atts : ();
1159 $writer->startTag($name, %atts);
1160 $writer->characters($chars);
1161 $writer->endTag($name);
1164 =head2 _span
1166 Title : _span
1167 Usage : $self->_span($loc)
1168 Function: an internal method to write the 'span' element
1169 Returns : nothing
1170 Args : a Bio::LocationI-compliant object
1172 =cut
1174 sub _span {
1175 my ($self, @loc) = @_;
1176 my ($loc, $start, $end);
1178 if ( @loc == 1 ) {
1179 $loc = $loc[0];
1181 elsif ( @loc == 2 ) {
1182 ($start, $end) = @loc;
1185 if ( $loc ) {
1186 ($start, $end) = ($loc->start, $loc->end);
1187 ($start, $end) = ($end, $start) if $loc->strand < 0;
1189 elsif ( !$start ) {
1190 ($start, $end) = (1, $self->{seq}->length);
1193 my $writer = $self->{writer};
1194 $writer->startTag('span');
1195 $self->_element('start', $start);
1196 $self->_element('end', $end);
1197 $writer->endTag('span');
1200 =head2 _seq
1202 Title : _seq
1203 Usage : $self->_seq($seq, $dna)
1204 Function: an internal method to print the 'sequence' element
1205 Returns : nothing
1206 Args : and Bio::SeqI-compliant object and a reference to an attribute hash
1208 =cut
1210 sub _seq {
1211 my ($self, $seq, $atts) = @_;
1213 my $writer = $self->{'writer'};
1216 # game moltypes
1217 my $alphabet = $seq->alphabet;
1218 $alphabet ||= $seq->mol_type if $seq->can('mol_type');
1219 $alphabet =~ s/protein/aa/;
1220 $alphabet =~ s/rna/cdna/;
1222 my @seq = ( 'seq',
1223 id => $atts->{name},
1224 length => $seq->length,
1225 type => $alphabet,
1226 focus => "true"
1229 if ( $atts->{md5checksum} ) {
1230 push @seq, (md5checksum => $atts->{md5checksum});
1231 delete $atts->{md5checksum};
1233 $writer->startTag(@seq);
1235 for my $k ( keys %{$atts} ) {
1236 if ( $k =~ /xref/ ) {
1237 $self->_xref($atts->{$k});
1239 else {
1240 $self->_element($k, $atts->{$k});
1244 # add leading spaces and line breaks for
1245 # nicer xml formatting/indentation
1246 my $sp = (' ' x 6);
1247 my $dna = $seq->seq;
1248 $dna =~ s/(\w{60})/$1\n$sp/g;
1249 $dna = "\n$sp" . $dna . "\n ";
1251 if ( $seq->species && !$self->{has_organism}) {
1252 my $species = $seq->species->binomial;
1253 $self->_element('organism', $species);
1256 $self->_element('residues', $dna);
1257 $writer->endTag('seq');
1260 =head2 _find_name
1262 Title : _find_name
1263 Usage : my $name = $self->_find_name($feature)
1264 Function: an internal method to look for a gene name
1265 Returns : a string
1266 Args : a Bio::SeqFeatureI-compliant object
1268 =cut
1270 sub _find_name {
1271 my ($self, $feat, $key) = @_;
1272 my $name;
1274 if ( $key && $feat->has_tag($key) ) {
1275 ($name) = $feat->get_tag_values($key);
1276 return $name;
1278 else {
1279 # warn "Could not find name '$key'\n";
1280 return '';