[bug 2714]
[bioperl-live.git] / Bio / FeatureIO / interpro.pm
blob581f0dce5013f63814fafd3606628daf89f6d22b
2 =head1 NAME
4 Bio::FeatureIO::interpro - read features from InterPro XML
6 =head1 SYNOPSIS
8 my $in = Bio::FeatureIO(-format=>'interpro');
9 while (my $feat = $in->next_feature) {
10 # do something with the Bio::SeqFeatureI object
13 =head1 DESCRIPTION
15 See L<http://www.ebi.ac.uk/interpro/documentation.html>.
17 =head1 FEEDBACK
19 =head2 Mailing Lists
21 User feedback is an integral part of the evolution of this and other
22 Bioperl modules. Send your comments and suggestions preferably to
23 the Bioperl mailing list. Your participation is much appreciated.
25 bioperl-l@bioperl.org - General discussion
26 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
28 =head2 Reporting Bugs
30 Report bugs to the Bioperl bug tracking system to help us keep track
31 of the bugs and their resolution. Bug reports can be submitted via
32 the web:
34 http://bugzilla.open-bio.org/
36 =head1 AUTHOR - Allen Day
38 Email allenday@ucla.edu
40 =head1 APPENDIX
42 The rest of the documentation details each of the object methods.
43 Internal methods are usually preceded with a _
45 =cut
48 # Let the code begin...
50 package Bio::FeatureIO::interpro;
52 use strict;
53 use base qw(Bio::FeatureIO);
54 use Bio::SeqFeature::Annotated;
55 use Bio::OntologyIO;
57 use Bio::Annotation::Comment;
58 use Bio::Annotation::DBLink;
59 use Bio::Annotation::OntologyTerm;
60 use Bio::Annotation::SimpleValue;
61 use Bio::Annotation::Target;
63 use URI::Escape;
64 use XML::DOM;
65 use XML::DOM::XPath;
67 sub _initialize {
68 my($self,%arg) = @_;
70 $self->SUPER::_initialize(%arg);
71 $self->xml_parser(XML::DOM::Parser->new());
72 my $buf;
73 while(($buf = $self->_readline()) && $buf !~ /<protein/){
74 next;
76 $self->_pushback($buf);
79 sub next_feature {
80 my $self =shift;
81 my $buf; #line buffer
82 my $ok = 0; #true if there is another <protein/> record in stream
83 my $record; #holds the record to be parsed and returned.
85 #try to dump buffer from last record before moving on to next record
86 my $f = $self->_shift_feature_buffer();
87 if($f){
88 return $f;
91 while(my $buf = $self->_readline()){
92 $ok = 1 if $buf =~ m!<protein!;
93 $record .= $buf;
94 last if $buf =~ m!</protein>!;
96 return unless $ok;
98 my $dom = $self->xml_parser->parse($record);
101 my ($pNode) = $dom->findnodes('/protein');
103 my @iNodes = $pNode->findnodes('/protein/interpro');
105 foreach my $iNode (@iNodes){
106 my @cNodes = $iNode->findnodes('classification');
107 my @mNodes = $iNode->findnodes('match');
109 #we don't handle these
110 #my @nNodes = $iNode->findnodes('contains');
111 #my @fNodes = $iNode->findnodes('found_in');
113 foreach my $mNode (@mNodes){
114 my @lNodes = $mNode->findnodes('location');
115 foreach my $lNode (@lNodes){
116 my $feature = Bio::SeqFeature::Annotated->new(
117 -start => $lNode->getAttribute('start'),
118 -end => $lNode->getAttribute('end'),
119 -score => $lNode->getAttribute('score'),
120 # -seq_id => $pNode->getAttribute('id'),
122 $feature->seq_id->value($pNode->getAttribute('id'));
124 #warn $pNode->getAttribute('id');
126 $feature->source( $lNode->getAttribute('evidence') );
128 my $t = Bio::Annotation::OntologyTerm->new(-identifier => 'SO:0000417', -name => 'polypeptide_domain');
129 $feature->add_Annotation('type',$t);
131 my $c = Bio::Annotation::Comment->new(-tagname => 'comment', -text => $iNode->getAttribute('name'));
132 $feature->add_Annotation($c);
134 my $d = Bio::Annotation::DBLink->new();
135 $d->database($mNode->getAttribute('dbname'));
136 $d->primary_id($mNode->getAttribute('id'));
137 $d->optional_id($mNode->getAttribute('name'));
138 $feature->annotation->add_Annotation('dblink',$d);
140 my $s = Bio::Annotation::SimpleValue->new(-tagname => 'status', -value => $lNode->getAttribute('status'));
141 $feature->annotation->add_Annotation($s);
143 foreach my $cNode (@cNodes){
144 my $o = Bio::Annotation::OntologyTerm->new(-identifier => $cNode->getAttribute('id'));
145 $feature->annotation->add_Annotation('ontology_term',$o);
148 $self->_push_feature_buffer($feature);
153 return $self->_shift_feature_buffer;
156 =head2 _push_feature_buffer()
158 Usage :
159 Function:
160 Returns :
161 Args :
164 =cut
166 sub _push_feature_buffer {
167 my ($self,$f) = @_;
169 if(ref($f)){
170 push @{ $self->{feature_buffer} }, $f;
174 =head2 _shift_feature_buffer()
176 Usage :
177 Function:
178 Returns :
179 Args :
182 =cut
184 sub _shift_feature_buffer {
185 my ($self) = @_;
186 return $self->{feature_buffer} ? shift @{ $self->{feature_buffer} } : undef;
189 =head2 xml_parser()
191 Usage : $obj->xml_parser($newval)
192 Function:
193 Example :
194 Returns : value of xml_parser (a scalar)
195 Args : on set, new value (a scalar or undef, optional)
198 =cut
200 sub xml_parser {
201 my($self,$val) = @_;
202 $self->{'xml_parser'} = $val if defined($val);
203 return $self->{'xml_parser'};