maint: restructure to use Dist::Zilla
[bioperl-live.git] / lib / Bio / Variation / IO / xml.pm
blobc7c3e3b8a55f08a785236fc66cbfd710ad659b6d
1 # BioPerl module for Bio::Variation::IO::xml
3 # Please direct questions and support issues to <bioperl-l@bioperl.org>
5 # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
7 # Copyright Heikki Lehvaslaiho
9 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Variation::IO::xml - XML sequence variation input/output stream
18 =head1 SYNOPSIS
20 Do not use this module directly. Use it via the Bio::Variation::IO class.
22 =head1 DESCRIPTION
24 This object can transform L<Bio::Variation::SeqDiff> objects to and from XML
25 file databases.
27 The XML format, although consistent, is still evolving. The current
28 DTD for it is at L<http://www.ebi.ac.uk/mutations/DTDE/seqDiff.dtd>.
30 =head1 REQUIREMENTS
32 To use this code you need the module L<XML::Twig> which creates an
33 interface to L<XML::Parser> to read XML and modules L<XML::Writer> and
34 L<IO::String> to write XML out.
36 =head1 FEEDBACK
38 =head2 Mailing Lists
40 User feedback is an integral part of the evolution of this and other
41 Bioperl modules. Send your comments and suggestions preferably to the
42 Bioperl mailing lists Your participation is much appreciated.
44 bioperl-l@bioperl.org - General discussion
45 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
47 =head2 Support
49 Please direct usage questions or support issues to the mailing list:
51 I<bioperl-l@bioperl.org>
53 rather than to the module maintainer directly. Many experienced and
54 reponsive experts will be able look at the problem and quickly
55 address it. Please include a thorough description of the problem
56 with code and data examples if at all possible.
58 =head2 Reporting Bugs
60 report bugs to the Bioperl bug tracking system to help us keep track
61 the bugs and their resolution. Bug reports can be submitted via the
62 web:
64 https://github.com/bioperl/bioperl-live/issues
66 =head1 AUTHOR - Heikki Lehvaslaiho
68 Email: heikki-at-bioperl-dot-org
70 =head1 APPENDIX
72 The rest of the documentation details each of the object
73 methods. Internal methods are usually preceded with a _
75 =cut
77 # Let the code begin...
79 package Bio::Variation::IO::xml;
81 use vars qw($seqdiff $var $prevdnaobj $prevrnaobj $prevaaobj);
82 use strict;
84 use XML::Twig;
85 use XML::Writer 0.4;
86 use IO::String;
87 use Bio::Variation::SeqDiff;
88 use Bio::Variation::DNAMutation;
89 use Bio::Variation::RNAChange;
90 use Bio::Variation::AAChange;
91 use Bio::Variation::Allele;
93 use base qw(Bio::Variation::IO);
95 # _initialize is where the heavy stuff will happen when new is called
97 sub new {
98 my ($class,@args) = @_;
99 my $self = bless {}, $class;
100 $self->_initialize(@args);
101 return $self;
104 sub _initialize {
105 my($self,@args) = @_;
106 return unless $self->SUPER::_initialize(@args);
109 =head2 next
111 Title : next
112 Usage : $haplo = $stream->next()
113 Function: returns the next seqDiff in the stream
114 Returns : Bio::Variation::SeqDiff object
115 Args : NONE
117 =cut
120 sub _seqDiff {
121 my ($t, $term)= @_;
122 $seqdiff->id( $term->att('id') );
123 $seqdiff->alphabet( $term->att('moltype') );
124 $seqdiff->offset( $term->att('offset') );
126 foreach my $child ($term->children) {
127 _variant($t, $child);
131 sub _variant {
132 my ($t, $term)= @_;
133 my $var;
134 my $att = $term->atts();
135 my ($variation_number, $change_number) = split /\./, $att->{number};
137 # if more than two alleles
138 if ($variation_number and $change_number and $change_number > 1 ) {
139 my $a3 = Bio::Variation::Allele->new;
140 $a3->seq( $term->first_child_text('allele_mut') )
141 if $term->first_child_text('allele_mut');
142 if ($term->gi eq 'DNA') {
143 $prevdnaobj->add_Allele($a3);
145 elsif ($term->gi eq 'RNA') {
146 $prevrnaobj->add_Allele($a3);
147 } else { # AA
148 $prevaaobj->add_Allele($a3);
150 } else { # create new variants
151 if ($term->gi eq 'DNA') {
152 $var = Bio::Variation::DNAMutation->new();
154 elsif ($term->gi eq 'RNA') {
155 $var = Bio::Variation::RNAChange->new();
156 } else { # AA
157 $var = Bio::Variation::AAChange->new();
160 # these are always present
161 $var->start( $att->{start} );
162 $var->end( $att->{end});
163 $var->length($att->{len});
164 $var->mut_number( $att->{number});
165 $var->upStreamSeq($term->first_child_text('upFlank'));
166 $var->dnStreamSeq($term->first_child_text('dnFlank'));
167 $var->proof($term->first_child_text('proof'));
169 # region
170 my $region = $term->first_child('region');
171 if ($region) {
172 $var->region($region->text);
173 my $region_atts = $region->atts;
174 $var->region_value( $region_atts->{value} )
175 if $region_atts->{value};
176 $var->region_dist( $region_atts->{dist} )
177 if $region_atts->{dist};
180 # alleles
181 my $a1 = Bio::Variation::Allele->new;
182 $a1->seq($term->first_child_text('allele_ori') )
183 if $term->first_child_text('allele_ori');
184 $var->allele_ori($a1);
185 my $a2 = Bio::Variation::Allele->new;
186 $a2->seq($term->first_child_text('allele_mut') )
187 if $term->first_child_text('allele_mut');
188 $var->isMutation(1) if $term->att('isMutation');
189 $var->allele_mut($a2);
190 $var->add_Allele($a2);
191 $var->length( $term->att('length') );
192 $seqdiff->add_Variant($var);
194 # variant specific code
195 if ($term->gi eq 'DNA') {
196 $prevdnaobj = $var;
198 elsif ($term->gi eq 'RNA') {
199 my $codon = $term->first_child('codon');
200 if ($codon) {
201 my $codon_atts = $codon->atts;
202 $var->codon_table( $codon->att('codon_table') )
203 if $codon_atts->{codon_table} and $codon_atts->{codon_table} != 1;
204 $var->codon_pos( $codon->att('codon_pos') )
205 if $codon_atts->{codon_pos};
207 $prevdnaobj->RNAChange($var);
208 $var->DNAMutation($prevdnaobj);
209 $prevrnaobj = $var;
210 } else {
211 $prevrnaobj->AAChange($var);
212 $var->RNAChange($prevrnaobj);
213 $prevaaobj = $var;
218 sub next {
219 my( $self ) = @_;
221 local $/ = "</seqDiff>\n";
222 return unless my $entry = $self->_readline;
223 # print STDERR "|$entry|";
224 return unless $entry =~ /^\W*<seqDiff/;
226 $seqdiff = Bio::Variation::SeqDiff->new;
228 # create new parser object
229 my $twig_handlers = {'seqDiff' => \&_seqDiff };
230 my $t = XML::Twig->new ( TwigHandlers => $twig_handlers,
231 KeepEncoding => 1 );
232 $t->parse($entry);
234 return $seqdiff;
237 =head2 write
239 Title : write
240 Usage : $stream->write(@haplos)
241 Function: writes the $seqDiff objects into the stream
242 Returns : 1 for success and 0 for error
243 Args : Bio::Variation::SeqDiff object
245 =cut
247 sub write {
248 my ($self,@h) = @_;
250 if( !defined $h[0] ) {
251 $self->throw("Attempting to write with no information!");
253 my $str;
254 my $output = IO::String->new($str);
255 my $w = XML::Writer->new(OUTPUT => $output, DATA_MODE => 1, DATA_INDENT => 4 );
257 foreach my $h (@h) {
259 # seqDiff
261 $h->alphabet || $self->throw("Moltype of the reference sequence is not set!");
262 my $hasAA = 0;
263 foreach my $mut ($h->each_Variant) {
264 $hasAA = 1 if $mut->isa('Bio::Variation::AAChange');
266 if ($hasAA) {
267 $w->startTag("seqDiff",
268 "id" => $h->id,
269 "moltype" => $h->alphabet,
270 "offset" => $h->offset,
271 "sysname" => $h->sysname,
272 "trivname" => $h->trivname
274 } else {
275 $w->startTag("seqDiff",
276 "id" => $h->id,
277 "moltype" => $h->alphabet,
278 "offset" => $h->offset,
279 "sysname" => $h->sysname
282 my @allvariants = $h->each_Variant;
283 #print "allvars:", scalar @allvariants, "\n";
284 my %variants = ();
285 foreach my $mut ($h->each_Variant) {
286 #print STDERR $mut->mut_number, "\t", $mut, "\t",
287 #$mut->proof, "\t", scalar $mut->each_Allele, "\n";
288 push @{$variants{$mut->mut_number} }, $mut;
290 foreach my $var (sort keys %variants) {
291 foreach my $mut (@{$variants{$var}}) {
293 # DNA
295 if( $mut->isa('Bio::Variation::DNAMutation') ) {
296 $mut->isMutation(0) if not $mut->isMutation;
297 my @alleles = $mut->each_Allele;
298 my $count = 0;
299 foreach my $allele (@alleles) {
300 $count++;
301 my ($variation_number, $change_number) = split /\./, $mut->mut_number;
302 if ($change_number and $change_number != $count){
303 $mut->mut_number("$change_number.$count");
305 $mut->allele_mut($allele);
306 $w->startTag("DNA",
307 "number" => $mut->mut_number,
308 "start" => $mut->start,
309 "end" => $mut->end,
310 "length" => $mut->length,
311 "isMutation" => $mut->isMutation
313 if ($mut->label) {
314 foreach my $label (split ', ', $mut->label) {
315 $w->startTag("label");
316 $w->characters($label);
317 $w->endTag;
320 if ($mut->proof) {
321 $w->startTag("proof");
322 $w->characters($mut->proof );
323 $w->endTag;
325 if ($mut->upStreamSeq) {
326 $w->startTag("upFlank");
327 $w->characters($mut->upStreamSeq );
328 $w->endTag;
330 #if ( $mut->isMutation) {
331 #if ($mut->allele_ori) {
332 $w->startTag("allele_ori");
333 $w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq ;
334 $w->endTag;
336 #if ($mut->allele_mut) {
337 $w->startTag("allele_mut");
338 $w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq;
339 $w->endTag;
342 if ($mut->dnStreamSeq) {
343 $w->startTag("dnFlank");
344 $w->characters($mut->dnStreamSeq );
345 $w->endTag;
347 if ($mut->restriction_changes) {
348 $w->startTag("restriction_changes");
349 $w->characters($mut->restriction_changes);
350 $w->endTag;
352 if ($mut->region) {
353 if($mut->region_value and $mut->region_dist) {
354 $w->startTag("region",
355 "value" => $mut->region_value,
356 "dist" => $mut->region_dist
359 elsif($mut->region_value) {
360 $w->startTag("region",
361 "value" => $mut->region_value
364 elsif($mut->region_dist) {
365 $w->startTag("region",
366 "dist" => $mut->region_dist
368 } else {
369 $w->startTag("region");
371 $w->characters($mut->region );
372 $w->endTag;
374 $w->endTag; #DNA
378 # RNA
380 elsif( $mut->isa('Bio::Variation::RNAChange') ) {
381 $mut->isMutation(0) if not $mut->isMutation;
382 my @alleles = $mut->each_Allele;
383 my $count = 0;
384 foreach my $allele (@alleles) {
385 $count++;
386 my ($variation_number, $change_number) = split /\./, $mut->mut_number;
387 if ($change_number and $change_number != $count){
388 $mut->mut_number("$change_number.$count");
390 $mut->allele_mut($allele);
391 $w->startTag("RNA",
392 "number" => $mut->mut_number,
393 "start" => $mut->start,
394 "end" => $mut->end,
395 "length" => $mut->length,
396 "isMutation" => $mut->isMutation
399 if ($mut->label) {
400 foreach my $label (split ', ', $mut->label) {
401 $w->startTag("label");
402 $w->characters($label );
403 $w->endTag;
406 if ($mut->proof) {
407 $w->startTag("proof");
408 $w->characters($mut->proof );
409 $w->endTag;
411 if ($mut->upStreamSeq) {
412 $w->startTag("upFlank");
413 $w->characters($mut->upStreamSeq );
414 $w->endTag;
416 #if ( $mut->isMutation) {
417 if ($mut->allele_ori) {
418 $w->startTag("allele_ori");
419 $w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq ;
420 $w->endTag;
422 if ($mut->allele_mut) {
423 $w->startTag("allele_mut");
424 $w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq ;
425 $w->endTag;
428 if ($mut->dnStreamSeq) {
429 $w->startTag("dnFlank");
430 $w->characters($mut->dnStreamSeq );
431 $w->endTag;
433 if ($mut->region eq 'coding') {
434 if (! $mut->codon_mut) {
435 $w->startTag("codon",
436 "codon_ori" => $mut->codon_ori,
437 "codon_pos" => $mut->codon_pos
439 } else {
440 $w->startTag("codon",
441 "codon_ori" => $mut->codon_ori,
442 "codon_mut" => $mut->codon_mut,
443 "codon_pos" => $mut->codon_pos
446 $w->endTag;
448 if ($mut->codon_table != 1) {
449 $w->startTag("codon_table");
450 $w->characters($mut->codon_table);
451 $w->endTag;
454 if ($mut->restriction_changes) {
455 $w->startTag("restriction_changes");
456 $w->characters($mut->restriction_changes);
457 $w->endTag;
459 if ($mut->region) {
460 if($mut->region_value and $mut->region_dist) {
461 $w->startTag("region",
462 "value" => $mut->region_value,
463 "dist" => $mut->region_dist
466 elsif($mut->region_value) {
467 $w->startTag("region",
468 "value" => $mut->region_value
471 elsif($mut->region_dist) {
472 $w->startTag("region",
473 "dist" => $mut->region_dist
475 } else {
476 $w->startTag("region");
478 $w->characters($mut->region );
479 $w->endTag;
481 $w->endTag; #RNA
485 # AA
487 elsif( $mut->isa('Bio::Variation::AAChange') ) {
488 $mut->isMutation(0) if not $mut->isMutation;
489 my @alleles = $mut->each_Allele;
490 my $count = 0;
491 foreach my $allele (@alleles) {
492 $count++;
493 my ($variation_number, $change_number) = split /\./, $mut->mut_number;
494 if ($change_number and $change_number != $count){
495 $mut->mut_number("$change_number.$count");
497 $mut->allele_mut($allele);
498 $w->startTag("AA",
499 "number" => $mut->mut_number,
500 "start" => $mut->start,
501 "end" => $mut->end,
502 "length" => $mut->length,
503 "isMutation" => $mut->isMutation
506 if ($mut->label) {
507 foreach my $label (split ', ', $mut->label) {
508 $w->startTag("label");
509 $w->characters($label );
510 $w->endTag;
513 if ($mut->proof) {
514 $w->startTag("proof");
515 $w->characters($mut->proof );
516 $w->endTag;
518 #if ( $mut->isMutation) {
519 if ($mut->allele_ori) {
520 $w->startTag("allele_ori");
521 $w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq;
522 $w->endTag;
524 if ($mut->allele_mut) {
525 $w->startTag("allele_mut");
526 $w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq;
527 $w->endTag;
530 if ($mut->region) {
531 if($mut->region_value and $mut->region_dist) {
532 $w->startTag("region",
533 "value" => $mut->region_value,
534 "dist" => $mut->region_dist
537 elsif($mut->region_value) {
538 $w->startTag("region",
539 "value" => $mut->region_value
542 elsif($mut->region_dist) {
543 $w->startTag("region",
544 "dist" => $mut->region_dist
546 } else {
547 $w->startTag("region");
549 $w->characters($mut->region );
550 $w->endTag;
552 $w->endTag; #AA
558 $w->endTag;
561 $w->end;
562 $self->_print($str);
563 $output = undef;
564 return 1;