1 # BioPerl module for Bio::Variation::IO::xml
3 # Please direct questions and support issues to <bioperl-l@bioperl.org>
5 # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
7 # Copyright Heikki Lehvaslaiho
9 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Variation::IO::xml - XML sequence variation input/output stream
20 Do not use this module directly. Use it via the Bio::Variation::IO class.
24 This object can transform L<Bio::Variation::SeqDiff> objects to and from XML
27 The XML format, although consistent, is still evolving. The current
28 DTD for it is at L<http://www.ebi.ac.uk/mutations/DTDE/seqDiff.dtd>.
32 To use this code you need the module L<XML::Twig> which creates an
33 interface to L<XML::Parser> to read XML and modules L<XML::Writer> and
34 L<IO::String> to write XML out.
40 User feedback is an integral part of the evolution of this and other
41 Bioperl modules. Send your comments and suggestions preferably to the
42 Bioperl mailing lists Your participation is much appreciated.
44 bioperl-l@bioperl.org - General discussion
45 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
49 Please direct usage questions or support issues to the mailing list:
51 I<bioperl-l@bioperl.org>
53 rather than to the module maintainer directly. Many experienced and
54 reponsive experts will be able look at the problem and quickly
55 address it. Please include a thorough description of the problem
56 with code and data examples if at all possible.
60 report bugs to the Bioperl bug tracking system to help us keep track
61 the bugs and their resolution. Bug reports can be submitted via the
64 https://github.com/bioperl/bioperl-live/issues
66 =head1 AUTHOR - Heikki Lehvaslaiho
68 Email: heikki-at-bioperl-dot-org
72 The rest of the documentation details each of the object
73 methods. Internal methods are usually preceded with a _
77 # Let the code begin...
79 package Bio
::Variation
::IO
::xml
;
81 use vars
qw($seqdiff $var $prevdnaobj $prevrnaobj $prevaaobj);
87 use Bio::Variation::SeqDiff;
88 use Bio::Variation::DNAMutation;
89 use Bio::Variation::RNAChange;
90 use Bio::Variation::AAChange;
91 use Bio::Variation::Allele;
93 use base qw(Bio::Variation::IO);
95 # _initialize is where the heavy stuff will happen when new is called
98 my ($class,@args) = @_;
99 my $self = bless {}, $class;
100 $self->_initialize(@args);
105 my($self,@args) = @_;
106 return unless $self->SUPER::_initialize
(@args);
112 Usage : $haplo = $stream->next()
113 Function: returns the next seqDiff in the stream
114 Returns : Bio::Variation::SeqDiff object
122 $seqdiff->id( $term->att('id') );
123 $seqdiff->alphabet( $term->att('moltype') );
124 $seqdiff->offset( $term->att('offset') );
126 foreach my $child ($term->children) {
127 _variant
($t, $child);
134 my $att = $term->atts();
135 my ($variation_number, $change_number) = split /\./, $att->{number
};
137 # if more than two alleles
138 if ($variation_number and $change_number and $change_number > 1 ) {
139 my $a3 = Bio
::Variation
::Allele
->new;
140 $a3->seq( $term->first_child_text('allele_mut') )
141 if $term->first_child_text('allele_mut');
142 if ($term->gi eq 'DNA') {
143 $prevdnaobj->add_Allele($a3);
145 elsif ($term->gi eq 'RNA') {
146 $prevrnaobj->add_Allele($a3);
148 $prevaaobj->add_Allele($a3);
150 } else { # create new variants
151 if ($term->gi eq 'DNA') {
152 $var = Bio
::Variation
::DNAMutation
->new();
154 elsif ($term->gi eq 'RNA') {
155 $var = Bio
::Variation
::RNAChange
->new();
157 $var = Bio
::Variation
::AAChange
->new();
160 # these are always present
161 $var->start( $att->{start
} );
162 $var->end( $att->{end
});
163 $var->length($att->{len
});
164 $var->mut_number( $att->{number
});
165 $var->upStreamSeq($term->first_child_text('upFlank'));
166 $var->dnStreamSeq($term->first_child_text('dnFlank'));
167 $var->proof($term->first_child_text('proof'));
170 my $region = $term->first_child('region');
172 $var->region($region->text);
173 my $region_atts = $region->atts;
174 $var->region_value( $region_atts->{value
} )
175 if $region_atts->{value
};
176 $var->region_dist( $region_atts->{dist
} )
177 if $region_atts->{dist
};
181 my $a1 = Bio
::Variation
::Allele
->new;
182 $a1->seq($term->first_child_text('allele_ori') )
183 if $term->first_child_text('allele_ori');
184 $var->allele_ori($a1);
185 my $a2 = Bio
::Variation
::Allele
->new;
186 $a2->seq($term->first_child_text('allele_mut') )
187 if $term->first_child_text('allele_mut');
188 $var->isMutation(1) if $term->att('isMutation');
189 $var->allele_mut($a2);
190 $var->add_Allele($a2);
191 $var->length( $term->att('length') );
192 $seqdiff->add_Variant($var);
194 # variant specific code
195 if ($term->gi eq 'DNA') {
198 elsif ($term->gi eq 'RNA') {
199 my $codon = $term->first_child('codon');
201 my $codon_atts = $codon->atts;
202 $var->codon_table( $codon->att('codon_table') )
203 if $codon_atts->{codon_table
} and $codon_atts->{codon_table
} != 1;
204 $var->codon_pos( $codon->att('codon_pos') )
205 if $codon_atts->{codon_pos
};
207 $prevdnaobj->RNAChange($var);
208 $var->DNAMutation($prevdnaobj);
211 $prevrnaobj->AAChange($var);
212 $var->RNAChange($prevrnaobj);
221 local $/ = "</seqDiff
>\n";
222 return unless my $entry = $self->_readline;
223 # print STDERR "|$entry|";
224 return unless $entry =~ /^\W*<seqDiff/;
226 $seqdiff = Bio::Variation::SeqDiff->new;
228 # create new parser object
229 my $twig_handlers = {'seqDiff' => \&_seqDiff };
230 my $t = XML::Twig->new ( TwigHandlers => $twig_handlers,
240 Usage : $stream->write(@haplos)
241 Function: writes the $seqDiff objects into the stream
242 Returns : 1 for success and 0 for error
243 Args : Bio::Variation::SeqDiff object
250 if( !defined $h[0] ) {
251 $self->throw("Attempting to
write with
no information
!");
254 my $output = IO::String->new($str);
255 my $w = XML::Writer->new(OUTPUT => $output, DATA_MODE => 1, DATA_INDENT => 4 );
261 $h->alphabet || $self->throw("Moltype of the reference sequence is
not set
!");
263 foreach my $mut ($h->each_Variant) {
264 $hasAA = 1 if $mut->isa('Bio::Variation::AAChange');
267 $w->startTag("seqDiff
",
269 "moltype
" => $h->alphabet,
270 "offset
" => $h->offset,
271 "sysname
" => $h->sysname,
272 "trivname
" => $h->trivname
275 $w->startTag("seqDiff
",
277 "moltype
" => $h->alphabet,
278 "offset
" => $h->offset,
279 "sysname
" => $h->sysname
282 my @allvariants = $h->each_Variant;
283 #print "allvars
:", scalar @allvariants, "\n";
285 foreach my $mut ($h->each_Variant) {
286 #print STDERR $mut->mut_number, "\t", $mut, "\t",
287 #$mut->proof, "\t", scalar $mut->each_Allele, "\n";
288 push @{$variants{$mut->mut_number} }, $mut;
290 foreach my $var (sort keys %variants) {
291 foreach my $mut (@{$variants{$var}}) {
295 if( $mut->isa('Bio::Variation::DNAMutation') ) {
296 $mut->isMutation(0) if not $mut->isMutation;
297 my @alleles = $mut->each_Allele;
299 foreach my $allele (@alleles) {
301 my ($variation_number, $change_number) = split /\./, $mut->mut_number;
302 if ($change_number and $change_number != $count){
303 $mut->mut_number("$change_number.$count");
305 $mut->allele_mut($allele);
307 "number
" => $mut->mut_number,
308 "start
" => $mut->start,
310 "length" => $mut->length,
311 "isMutation
" => $mut->isMutation
314 foreach my $label (split ', ', $mut->label) {
315 $w->startTag("label
");
316 $w->characters($label);
321 $w->startTag("proof
");
322 $w->characters($mut->proof );
325 if ($mut->upStreamSeq) {
326 $w->startTag("upFlank
");
327 $w->characters($mut->upStreamSeq );
330 #if ( $mut->isMutation) {
331 #if ($mut->allele_ori) {
332 $w->startTag("allele_ori
");
333 $w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq ;
336 #if ($mut->allele_mut) {
337 $w->startTag("allele_mut
");
338 $w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq;
342 if ($mut->dnStreamSeq) {
343 $w->startTag("dnFlank
");
344 $w->characters($mut->dnStreamSeq );
347 if ($mut->restriction_changes) {
348 $w->startTag("restriction_changes
");
349 $w->characters($mut->restriction_changes);
353 if($mut->region_value and $mut->region_dist) {
354 $w->startTag("region
",
355 "value
" => $mut->region_value,
356 "dist
" => $mut->region_dist
359 elsif($mut->region_value) {
360 $w->startTag("region
",
361 "value
" => $mut->region_value
364 elsif($mut->region_dist) {
365 $w->startTag("region
",
366 "dist
" => $mut->region_dist
369 $w->startTag("region
");
371 $w->characters($mut->region );
380 elsif( $mut->isa('Bio::Variation::RNAChange') ) {
381 $mut->isMutation(0) if not $mut->isMutation;
382 my @alleles = $mut->each_Allele;
384 foreach my $allele (@alleles) {
386 my ($variation_number, $change_number) = split /\./, $mut->mut_number;
387 if ($change_number and $change_number != $count){
388 $mut->mut_number("$change_number.$count");
390 $mut->allele_mut($allele);
392 "number
" => $mut->mut_number,
393 "start
" => $mut->start,
395 "length" => $mut->length,
396 "isMutation
" => $mut->isMutation
400 foreach my $label (split ', ', $mut->label) {
401 $w->startTag("label
");
402 $w->characters($label );
407 $w->startTag("proof
");
408 $w->characters($mut->proof );
411 if ($mut->upStreamSeq) {
412 $w->startTag("upFlank
");
413 $w->characters($mut->upStreamSeq );
416 #if ( $mut->isMutation) {
417 if ($mut->allele_ori) {
418 $w->startTag("allele_ori
");
419 $w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq ;
422 if ($mut->allele_mut) {
423 $w->startTag("allele_mut
");
424 $w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq ;
428 if ($mut->dnStreamSeq) {
429 $w->startTag("dnFlank
");
430 $w->characters($mut->dnStreamSeq );
433 if ($mut->region eq 'coding') {
434 if (! $mut->codon_mut) {
435 $w->startTag("codon
",
436 "codon_ori
" => $mut->codon_ori,
437 "codon_pos
" => $mut->codon_pos
440 $w->startTag("codon
",
441 "codon_ori
" => $mut->codon_ori,
442 "codon_mut
" => $mut->codon_mut,
443 "codon_pos
" => $mut->codon_pos
448 if ($mut->codon_table != 1) {
449 $w->startTag("codon_table
");
450 $w->characters($mut->codon_table);
454 if ($mut->restriction_changes) {
455 $w->startTag("restriction_changes
");
456 $w->characters($mut->restriction_changes);
460 if($mut->region_value and $mut->region_dist) {
461 $w->startTag("region
",
462 "value
" => $mut->region_value,
463 "dist
" => $mut->region_dist
466 elsif($mut->region_value) {
467 $w->startTag("region
",
468 "value
" => $mut->region_value
471 elsif($mut->region_dist) {
472 $w->startTag("region
",
473 "dist
" => $mut->region_dist
476 $w->startTag("region
");
478 $w->characters($mut->region );
487 elsif( $mut->isa('Bio::Variation::AAChange') ) {
488 $mut->isMutation(0) if not $mut->isMutation;
489 my @alleles = $mut->each_Allele;
491 foreach my $allele (@alleles) {
493 my ($variation_number, $change_number) = split /\./, $mut->mut_number;
494 if ($change_number and $change_number != $count){
495 $mut->mut_number("$change_number.$count");
497 $mut->allele_mut($allele);
499 "number
" => $mut->mut_number,
500 "start
" => $mut->start,
502 "length" => $mut->length,
503 "isMutation
" => $mut->isMutation
507 foreach my $label (split ', ', $mut->label) {
508 $w->startTag("label
");
509 $w->characters($label );
514 $w->startTag("proof
");
515 $w->characters($mut->proof );
518 #if ( $mut->isMutation) {
519 if ($mut->allele_ori) {
520 $w->startTag("allele_ori
");
521 $w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq;
524 if ($mut->allele_mut) {
525 $w->startTag("allele_mut
");
526 $w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq;
531 if($mut->region_value and $mut->region_dist) {
532 $w->startTag("region
",
533 "value
" => $mut->region_value,
534 "dist
" => $mut->region_dist
537 elsif($mut->region_value) {
538 $w->startTag("region
",
539 "value
" => $mut->region_value
542 elsif($mut->region_dist) {
543 $w->startTag("region
",
544 "dist
" => $mut->region_dist
547 $w->startTag("region
");
549 $w->characters($mut->region );