tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / Bio / SeqIO / tinyseq / tinyseqHandler.pm
blob968a0a6f6f651c6208a3d679ff06f00dd07188fc
1 # BioPerl module for Bio::SeqIO::tinyseqHandler
3 # Please direct questions and support issues to <bioperl-l@bioperl.org>
5 # Cared for by Donald Jackson, donald.jackson@bms.com
7 # Copyright Bristol-Myers Squibb
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::SeqIO::tinyseq::tinyseqHandler - XML event handlers to support NCBI TinySeq XML parsing
17 =head1 SYNOPSIS
19 Do not use this module directly; use the SeqIO handler system:
21 $stream = Bio::SeqIO->new( -file => $filename, -format => 'tinyseq' );
23 while ( my $seq = $stream->next_seq ) {
24 ....
27 =head1 DESCRIPTION
29 This object provides event handler methods for parsing sequence files
30 in the NCBI TinySeq XML format. A TinySeq is a lightweight XML file
31 of sequence information on one or more sequences, analgous to FASTA
32 format.
34 See L<http://www.ncbi.nlm.nih.gov/dtd/NCBI_TSeq.mod.dtd> for the DTD.
36 =head1 FEEDBACK
38 =head2 Mailing Lists
40 User feedback is an integral part of the evolution of this and other
41 Bioperl modules. Send your comments and suggestions preferably to
42 the Bioperl mailing list. Your participation is much appreciated.
44 bioperl-l@bioperl.org - General discussion
45 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
47 =head2 Support
49 Please direct usage questions or support issues to the mailing list:
51 I<bioperl-l@bioperl.org>
53 rather than to the module maintainer directly. Many experienced and
54 reponsive experts will be able look at the problem and quickly
55 address it. Please include a thorough description of the problem
56 with code and data examples if at all possible.
58 =head2 Reporting Bugs
60 Report bugs to the Bioperl bug tracking system to help us keep track
61 of the bugs and their resolution. Bug reports can be submitted via
62 the web:
64 http://bugzilla.open-bio.org/
66 =head1 SEE ALSO
68 L<Bio::SeqIO>, L<Bio::Seq>.
70 =head1 AUTHOR
72 Donald Jackson, E<lt>donald.jackson@bms.comE<gt>
74 =head1 APPENDIX
76 The rest of the documentation details each of the object methods.
77 Internal methods are usually preceded with a _
79 =cut
81 package Bio::SeqIO::tinyseq::tinyseqHandler;
83 use strict;
84 use warnings;
87 use vars qw(%ATTMAP);
89 use base qw(Bio::Root::Root);
91 # %ATTMAP defines correspondence between TSeq elements, PrimarySeq attributes
92 # Format: element_name => { xml_attname => pseq_attname }
93 %ATTMAP = ( TSeq_sequence => { Data => '-seq'},
94 TSeq_gi => { Data => '-primary_id' },
95 TSeq_defline => { Data => '-desc' },
96 TSeq_sid => { Data => '-sid' },
97 TSeq_accver => { Data => '-accver' },
98 TSeq_taxid => { Data => '-taxid' },
99 TSeq_orgname => { Data => '-organism' }
102 =head2 new
104 Title : new
105 Usage : $handler = Bio::SeqIO::tinyseq::tinyseqHandler->new()
106 Function : instantiates a tinyseqHandler for use by
107 XML::Parser::PerlSAX
108 Returns : Bio::SeqIO::tinyseq::tinyseqHandler object
109 Args : NONE
111 =cut
113 sub new {
114 my ($proto, @args) = @_;
115 my $class = ref($proto) || $proto;
117 my $self = bless({}, $class);
119 return $self;
122 #######################################
123 # Event hadling methods for PerlSax #
124 #######################################
126 sub doctype_decl {
127 my ($self, $doctype) = @_;
128 # make sure we have a tinyseq
129 unless ($doctype->{'SystemId'} eq 'http://www.ncbi.nlm.nih.gov/dtd/NCBI_TSeq.dtd') {
130 $self->throw("This document doesn't use the NCBI TinySeq dtd; it's a ", $doctype->{'SystemId'} );
135 =head2 start_document
137 Title : start_document
138 Usage : NONE
139 Function : start_document handler for use by XML::Parser::PerlSAX
140 Returns : NONE
141 Args : NONE
143 =cut
145 sub start_document {
146 my ($self) = @_;
148 $self->{'_seqatts'} = [];
149 $self->{'_elements'} = [];
152 =head2 end_document
154 Title : end_document
155 Usage : NONE
156 Function : end_document handler for use by XML::Parser::PerlSAX
157 Returns : NONE
158 Args : NONE
160 =cut
162 sub end_document {
163 my ($self) = @_;
164 return $self->{'_seqatts'};
167 =head2 start_element
169 Title : start_element
170 Usage : NONE
171 Function : start_element handler for use by XML::Parser::PerlSAX
172 Returns : NONE
173 Args : NONE
175 =cut
177 sub start_element {
178 my ($self, $starting) = @_;
180 push(@{$self->{'_elements'}}, $starting);
183 =head2 end_element
185 Title : end_element
186 Usage : NONE
187 Function : end_element handler for use by XML::Parser::PerlSAX
188 Returns : NONE
189 Args : NONE
191 =cut
193 sub end_element {
194 my ($self, $ending) = @_;
196 # do I have a handler for this element?
197 my $ename = $ending->{'Name'};
198 $self->$ename if ($self->can($ename));
201 =head2 characters
203 Title : characters
204 Usage : NONE
205 Function : characters handler for use by XML::Parser::PerlSAX
206 Returns : NONE
207 Args : NONE
209 =cut
211 sub characters {
212 my ($self, $characters) = @_;
214 my $data = $characters->{'Data'};
216 return unless (defined($data) and $data =~ /\S/);
218 my $current = $self->_current_element;
219 $current->{'Data'} = $data;
223 ###########################################
224 # Element-specific handlers
225 # called at END of element name
226 ##########################################
228 =head2 TSeq
230 Title : TSeq
231 Usage : NONE
232 Function : event handler for END of a TSeq element
233 Returns : loh of parsed sequence atts for Bio::SeqIO::tinyseq
234 Args : NONE
236 =cut
238 sub TSeq {
239 my ($self) = @_;
241 my %seqatts;
243 # map elements onto PrimarySeq keys
244 while (my $element = pop @{ $self->{'_elements'} }) {
245 my $element_name = $element->{'Name'};
246 last if ($element_name eq 'TSeq');
248 my $conversion = $ATTMAP{$element_name} or next;
250 while(my($element_att, $pseq_att) = each %$conversion) {
251 $seqatts{$pseq_att} = $element->{$element_att};
255 push(@{ $self->{'_seqatts'} }, \%seqatts);
259 #############################################
260 # Utility method to return current element info
261 ##############################################
263 =head2 _current_element
265 Title : _current_element
266 Usage : Internal method
267 Function : Utility method to return current element info
268 Returns : XML::Parser::PerlSAX hash for current element
269 Args : NONE
271 =cut
273 sub _current_element {
274 my ($self) = @_;
275 return $self->{'_elements'}->[-1];