3 # BioPerl module for Bio::OntologyIO
5 # Cared for by Hilmar Lapp <hlapp at gmx.net>
7 # Copyright Hilmar Lapp
9 # You may distribute this module under the same terms as perl itself
12 # (c) Hilmar Lapp, hlapp at gmx.net, 2003.
13 # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2003.
15 # You may distribute this module under the same terms as perl itself.
16 # Refer to the Perl Artistic License (see the license accompanying this
17 # software package, or see http://www.perl.com/language/misc/Artistic.html)
18 # for the terms under which you may use, modify, and redistribute this module.
20 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
21 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
22 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
25 # POD documentation - main docs before the code
29 Bio::OntologyIO - Parser factory for Ontology formats
35 my $parser = Bio::OntologyIO->new(-format => "go",
38 while(my $ont = $parser->next_ontology()) {
39 print "read ontology ",$ont->name()," with ",
40 scalar($ont->get_root_terms)," root terms, and ",
41 scalar($ont->get_leaf_terms)," leaf terms\n";
46 This is the parser factory for different ontology sources and
47 formats. Conceptually, it is very similar to L<Bio::SeqIO>, but the
48 difference is that the chunk of data returned as an object is an
55 User feedback is an integral part of the evolution of this and other
56 Bioperl modules. Send your comments and suggestions preferably to
57 the Bioperl mailing list. Your participation is much appreciated.
59 bioperl-l@bioperl.org - General discussion
60 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
64 Report bugs to the Bioperl bug tracking system to help us keep track
65 of the bugs and their resolution. Bug reports can be submitted via
68 http://bugzilla.open-bio.org/
70 =head1 AUTHOR - Hilmar Lapp
72 Email hlapp at gmx.net
76 The rest of the documentation details each of the object methods.
77 Internal methods are usually preceded with a _
82 # Let the code begin...
85 package Bio
::OntologyIO
;
88 # Object preamble - inherits from Bio::Root::Root
91 use base
qw(Bio::Root::Root Bio::Root::IO);
94 # Maps from format name to driver suitable for the format.
96 my %format_driver_map = (
99 "interpro" => "InterProParser",
100 "interprosax" => "Handlers::InterPro_BioSQL_Handler",
101 "evoc" => "simplehierarchy",
108 Usage : my $parser = Bio::OntologyIO->new(-format => 'go', @args);
109 Function: Returns a stream of ontologies opened on the specified input
110 for the specified format.
111 Returns : An ontology parser (an instance of Bio::OntologyIO) initialized
112 for the specified format.
113 Args : Named parameters. Common parameters are
115 -format - the format of the input; the following are
117 goflat: DAG-Edit Gene Ontology flat files
118 go : synonymous to goflat
119 soflat: DAG-Edit Sequence Ontology flat files
120 so : synonymous to soflat
121 simplehierarchy: text format with one term per line
122 and indentation giving the hierarchy
123 evoc : synonymous to simplehierarchy
124 interpro: InterPro XML
125 interprosax: InterPro XML - this is actually not a
126 Bio::OntologyIO compliant parser; instead it
127 persists terms as they are encountered.
128 L<Bio::OntologyIO::Handlers::InterPro_BioSQL_Handler>
129 obo : OBO format style from Gene Ontology Consortium
130 -file - the file holding the data
131 -fh - the stream providing the data (-file and -fh are
133 -ontology_name - the name of the ontology
134 -engine - the L<Bio::Ontology::OntologyEngineI> object
135 to be reused (will be created otherwise); note
136 that every L<Bio::Ontology::OntologyI> will
137 qualify as well since that one inherits from the
139 -term_factory - the ontology term factory to use. Provide a
140 value only if you know what you are doing.
142 DAG-Edit flat file parsers will usually also accept the
143 following parameters.
145 -defs_file - the name of the file holding the term
147 -files - an array ref holding the file names (for GO,
148 there will usually be 3 files: component.ontology,
149 function.ontology, process.ontology)
151 Other parameters are specific to the parsers.
156 my ($caller,@args) = @_;
157 my $class = ref($caller) || $caller;
158 # or do we want to call SUPER on an object if $caller is an
160 if( $class =~ /Bio::OntologyIO::(\S+)/ ) {
161 my ($self) = $class->SUPER::new
(@args);
162 $self->_initialize(@args);
166 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
167 my $format = $class->_map_format($param{'-format'});
169 # normalize capitalization
170 return unless( $class->_load_format_module($format) );
171 return "Bio::OntologyIO::$format"->new(@args);
177 my($self, @args) = @_;
179 # initialize factories etc
180 my ($eng,$fact,$ontname) =
181 $self->_rearrange([qw(TERM_FACTORY)
183 # term object factory
184 $self->term_factory($fact) if $fact;
186 # initialize the Bio::Root::IO part
187 $self->_initialize_io(@args);
192 Title : next_ontology
193 Usage : $ont = $stream->next_ontology()
194 Function: Reads the next ontology object from the stream and returns it.
195 Returns : a L<Bio::Ontology::OntologyI> compliant object, or undef at the
203 shift->throw_not_implemented();
209 Usage : $obj->term_factory($newval)
210 Function: Get/set the ontology term factory to use.
212 As a user of this module it is not necessary to call this
213 method as there will be default. In order to change the
214 default, the easiest way is to instantiate
215 L<Bio::Ontology::TermFactory> with the proper -type
216 argument. Most if not all parsers will actually use this
217 very implementation, so even easier than the aforementioned
218 way is to simply call
219 $ontio->term_factory->type("Bio::Ontology::MyTerm").
222 Returns : value of term_factory (a Bio::Factory::ObjectFactoryI object)
223 Args : on set, new value (a Bio::Factory::ObjectFactoryI object, optional)
231 return $self->{'term_factory'} = shift if @_;
232 return $self->{'term_factory'};
235 =head1 Private Methods
237 Some of these are actually 'protected' in OO speak, which means you
238 may or will want to utilize them in a derived ontology parser, but
239 you should not call them from outside.
243 =head2 _load_format_module
245 Title : _load_format_module
246 Usage : *INTERNAL OntologyIO stuff*
247 Function: Loads up (like use) a module at run time on demand
254 sub _load_format_module
{
255 my ($self, $format) = @_;
256 my $module = "Bio::OntologyIO::" . $format;
260 $ok = $self->_load_module($module);
264 $self: $format cannot be found
266 For more information about the OntologyIO system please see the docs.
267 This includes ways of checking for formats at compile time, not run time
285 $mod = $format_driver_map{lc($format)};
286 $mod = lc($format) unless $mod;
288 $self->throw("unable to guess ontology format, specify -format");
294 my( $self, $ref ) = @_;
295 $ref =~ s/<\\;/\</g;
296 $ref =~ s/>\\;/\>/g;
297 $ref =~ s/&pct\\;/\%/g;