* sync with trunk
[bioperl-live.git] / Bio / OntologyIO.pm
blob7077d434ced9f6c523ee5fb5647316a70072ea50
1 # $Id$
3 # BioPerl module for Bio::OntologyIO
5 # Cared for by Hilmar Lapp <hlapp at gmx.net>
7 # Copyright Hilmar Lapp
9 # You may distribute this module under the same terms as perl itself
12 # (c) Hilmar Lapp, hlapp at gmx.net, 2003.
13 # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2003.
15 # You may distribute this module under the same terms as perl itself.
16 # Refer to the Perl Artistic License (see the license accompanying this
17 # software package, or see http://www.perl.com/language/misc/Artistic.html)
18 # for the terms under which you may use, modify, and redistribute this module.
20 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
21 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
22 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
25 # POD documentation - main docs before the code
27 =head1 NAME
29 Bio::OntologyIO - Parser factory for Ontology formats
31 =head1 SYNOPSIS
33 use Bio::OntologyIO;
35 my $parser = Bio::OntologyIO->new(-format => "go",
36 -file=> $file);
38 while(my $ont = $parser->next_ontology()) {
39 print "read ontology ",$ont->name()," with ",
40 scalar($ont->get_root_terms)," root terms, and ",
41 scalar($ont->get_leaf_terms)," leaf terms\n";
44 =head1 DESCRIPTION
46 This is the parser factory for different ontology sources and
47 formats. Conceptually, it is very similar to L<Bio::SeqIO>, but the
48 difference is that the chunk of data returned as an object is an
49 entire ontology.
51 =head1 FEEDBACK
53 =head2 Mailing Lists
55 User feedback is an integral part of the evolution of this and other
56 Bioperl modules. Send your comments and suggestions preferably to
57 the Bioperl mailing list. Your participation is much appreciated.
59 bioperl-l@bioperl.org - General discussion
60 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
62 =head2 Reporting Bugs
64 Report bugs to the Bioperl bug tracking system to help us keep track
65 of the bugs and their resolution. Bug reports can be submitted via
66 the web:
68 http://bugzilla.open-bio.org/
70 =head1 AUTHOR - Hilmar Lapp
72 Email hlapp at gmx.net
74 =head1 APPENDIX
76 The rest of the documentation details each of the object methods.
77 Internal methods are usually preceded with a _
79 =cut
82 # Let the code begin...
85 package Bio::OntologyIO;
86 use strict;
88 # Object preamble - inherits from Bio::Root::Root
91 use base qw(Bio::Root::Root Bio::Root::IO);
94 # Maps from format name to driver suitable for the format.
96 my %format_driver_map = (
97 "go" => "goflat",
98 "so" => "soflat",
99 "interpro" => "InterProParser",
100 "interprosax" => "Handlers::InterPro_BioSQL_Handler",
101 "evoc" => "simplehierarchy",
102 "obo" => "obo"
105 =head2 new
107 Title : new
108 Usage : my $parser = Bio::OntologyIO->new(-format => 'go', @args);
109 Function: Returns a stream of ontologies opened on the specified input
110 for the specified format.
111 Returns : An ontology parser (an instance of Bio::OntologyIO) initialized
112 for the specified format.
113 Args : Named parameters. Common parameters are
115 -format - the format of the input; the following are
116 presently supported:
117 goflat: DAG-Edit Gene Ontology flat files
118 go : synonymous to goflat
119 soflat: DAG-Edit Sequence Ontology flat files
120 so : synonymous to soflat
121 simplehierarchy: text format with one term per line
122 and indentation giving the hierarchy
123 evoc : synonymous to simplehierarchy
124 interpro: InterPro XML
125 interprosax: InterPro XML - this is actually not a
126 Bio::OntologyIO compliant parser; instead it
127 persists terms as they are encountered.
128 L<Bio::OntologyIO::Handlers::InterPro_BioSQL_Handler>
129 obo : OBO format style from Gene Ontology Consortium
130 -file - the file holding the data
131 -fh - the stream providing the data (-file and -fh are
132 mutually exclusive)
133 -ontology_name - the name of the ontology
134 -engine - the L<Bio::Ontology::OntologyEngineI> object
135 to be reused (will be created otherwise); note
136 that every L<Bio::Ontology::OntologyI> will
137 qualify as well since that one inherits from the
138 former.
139 -term_factory - the ontology term factory to use. Provide a
140 value only if you know what you are doing.
142 DAG-Edit flat file parsers will usually also accept the
143 following parameters.
145 -defs_file - the name of the file holding the term
146 definitions
147 -files - an array ref holding the file names (for GO,
148 there will usually be 3 files: component.ontology,
149 function.ontology, process.ontology)
151 Other parameters are specific to the parsers.
153 =cut
155 sub new {
156 my ($caller,@args) = @_;
157 my $class = ref($caller) || $caller;
158 # or do we want to call SUPER on an object if $caller is an
159 # object?
160 if( $class =~ /Bio::OntologyIO::(\S+)/ ) {
161 my ($self) = $class->SUPER::new(@args);
162 $self->_initialize(@args);
163 return $self;
164 } else {
165 my %param = @args;
166 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
167 my $format = $class->_map_format($param{'-format'});
169 # normalize capitalization
170 return unless( $class->_load_format_module($format) );
171 return "Bio::OntologyIO::$format"->new(@args);
176 sub _initialize {
177 my($self, @args) = @_;
179 # initialize factories etc
180 my ($eng,$fact,$ontname) =
181 $self->_rearrange([qw(TERM_FACTORY)
182 ], @args);
183 # term object factory
184 $self->term_factory($fact) if $fact;
186 # initialize the Bio::Root::IO part
187 $self->_initialize_io(@args);
190 =head2 next_ontology
192 Title : next_ontology
193 Usage : $ont = $stream->next_ontology()
194 Function: Reads the next ontology object from the stream and returns it.
195 Returns : a L<Bio::Ontology::OntologyI> compliant object, or undef at the
196 end of the stream
197 Args : none
200 =cut
202 sub next_ontology {
203 shift->throw_not_implemented();
206 =head2 term_factory
208 Title : term_factory
209 Usage : $obj->term_factory($newval)
210 Function: Get/set the ontology term factory to use.
212 As a user of this module it is not necessary to call this
213 method as there will be default. In order to change the
214 default, the easiest way is to instantiate
215 L<Bio::Ontology::TermFactory> with the proper -type
216 argument. Most if not all parsers will actually use this
217 very implementation, so even easier than the aforementioned
218 way is to simply call
219 $ontio->term_factory->type("Bio::Ontology::MyTerm").
221 Example :
222 Returns : value of term_factory (a Bio::Factory::ObjectFactoryI object)
223 Args : on set, new value (a Bio::Factory::ObjectFactoryI object, optional)
226 =cut
228 sub term_factory{
229 my $self = shift;
231 return $self->{'term_factory'} = shift if @_;
232 return $self->{'term_factory'};
235 =head1 Private Methods
237 Some of these are actually 'protected' in OO speak, which means you
238 may or will want to utilize them in a derived ontology parser, but
239 you should not call them from outside.
241 =cut
243 =head2 _load_format_module
245 Title : _load_format_module
246 Usage : *INTERNAL OntologyIO stuff*
247 Function: Loads up (like use) a module at run time on demand
248 Example :
249 Returns :
250 Args :
252 =cut
254 sub _load_format_module {
255 my ($self, $format) = @_;
256 my $module = "Bio::OntologyIO::" . $format;
257 my $ok;
259 eval {
260 $ok = $self->_load_module($module);
262 if ( $@ ) {
263 print STDERR <<END;
264 $self: $format cannot be found
265 Exception $@
266 For more information about the OntologyIO system please see the docs.
267 This includes ways of checking for formats at compile time, not run time
270 return $ok;
273 sub DESTROY {
274 my $self = shift;
276 $self->close();
279 sub _map_format {
280 my $self = shift;
281 my $format = shift;
282 my $mod;
284 if($format) {
285 $mod = $format_driver_map{lc($format)};
286 $mod = lc($format) unless $mod;
287 } else {
288 $self->throw("unable to guess ontology format, specify -format");
290 return $mod;
293 sub unescape {
294 my( $self, $ref ) = @_;
295 $ref =~ s/&lt\\;/\</g;
296 $ref =~ s/&gt\\;/\>/g;
297 $ref =~ s/&pct\\;/\%/g;
298 $ref =~ s/\\n/\n/g;
299 $ref =~ s/\\t/\t/g;
300 return $ref;