Bio::DB::SeqFeature::* move namespace into its own distribution.
[bioperl-live.git] / lib / Bio / DB / GFF / Adaptor / biofetch_oracle.pm
blob7f8e0ef8a80ae5598c1344b92958d63d2abcac03
1 package Bio::DB::GFF::Adaptor::biofetch_oracle;
3 #$Id$
5 =head1 NAME
7 Bio::DB::GFF::Adaptor::biofetch_oracle -- Cache BioFetch objects in a Bio::DB::GFF database
9 =head1 SYNOPSIS
11 Proof of principle. Not for production use.
13 =head1 DESCRIPTION
15 This adaptor is a proof-of-principle. It is used to fetch BioFetch
16 sequences into a Bio::DB::GFF database (currently uses a hard-coded
17 EMBL database) as needed. This allows the Generic Genome Browser to
18 be used as a Genbank/EMBL browser.
20 =head1 AUTHOR
22 Lincoln Stein E<lt>lstein@cshl.orgE<gt>.
24 Copyright 2002 Cold Spring Harbor Laboratory.
26 This library is free software; you can redistribute it and/or modify
27 it under the same terms as Perl itself.
29 =cut
31 use strict;
32 use Bio::DB::GFF::Util::Rearrange; # for rearrange()
33 use Bio::DB::BioFetch;
34 use Bio::SeqIO;
36 use vars qw(%default_preferred_tags);
37 use base qw(Bio::DB::GFF::Adaptor::dbi::oracle);
39 # priority for choosing names of CDS tags, higher is higher priority
40 %default_preferred_tags = (
41 strain => 10,
42 organism => 20,
43 protein_id => 40,
44 locus_tag => 50,
45 locus => 60,
46 gene => 70,
47 standard_name => 80,
50 sub _preferred_tags {
51 my ($self, $tags) = @_;
52 if ($tags && (ref($tags) =~ /HASH/)){
53 $self->{preferred_tags} = $tags;
55 return $self->{preferred_tags};
59 =head2 new
61 Title : new
62 Usage : $db = Bio::DB::GFF->new(-adaptor=>'biofetch_oracle', -preferred_tags => \%preferred, @args)
63 Function: create a new adaptor
64 Returns : a Bio::DB::GFF object
65 Args : -adaptor : required. Which adaptor to use; biofetch for mysql, biofetch_oracle for Oracle
66 -preferred_tags : optional. A hash of {classname => weight,...}
67 used to determine the class and name of the feature
68 when a choice of possible feature classes is available
69 (e.g. a feature has both a 'gene' and a 'locus' tag).
70 Common defaults are provided that work well for eukaryotic
71 features (but not well for viral/prokaryotic)
72 see below for additional arguments.
73 Status : Public
75 This is the constructor for the adaptor. It is called automatically
76 by Bio::DB::GFF-E<gt>new. In addition to arguments that are common among
77 all adaptors, the following class-specific arguments are recgonized:
79 Argument Description
80 -------- -----------
82 -dsn the DBI data source, e.g. 'dbi:mysql:ens0040'
84 -user username for authentication
86 -pass the password for authentication
88 -proxy [['http','ftp'],'http://proxy:8080']
90 -create initialize the database
92 -dsn,-user and -pass indicate the local database to cache results in,
93 and as are per Bio::DB::GFF::Adaptor::dbi. The -proxy argument allows
94 you to set the biofetch web proxy, and uses the same syntax described
95 for the proxy() method of L<Bio::DB::WebDBSeqI>, except that the
96 argument must be passed as an array reference.
98 =cut
100 sub new {
101 my $class = shift;
102 my $args = shift;
103 my $self = $class->SUPER::new($args);
104 my ($preferred) = rearrange(['PREFERRED_TAGS'],$args);
105 $self->_preferred_tags($preferred?$preferred:\%default_preferred_tags); # if the caller sent their own preferences, then use these, otherwise use defaults.
107 my ($proxy) = rearrange(['PROXY'],$args);
108 if ($proxy) {
109 my @args = ref($proxy) ? @$proxy : eval $proxy;
110 $self->{_proxy} = \@args if @args;
112 $self;
115 sub segment {
116 my $self = shift;
117 my @segments = $self->SUPER::segment(@_);
119 if (!@segments) {
120 my $refclass = $self->refclass;
122 my %args = $self->setup_segment_args(@_);
123 if ($args{-class} && $args{-class} =~ /$refclass/oi) {
124 return unless $self->load_from_embl('embl'=>$args{-name});
125 @segments = $self->SUPER::segment(@_);
126 } elsif ($args{-class} && $args{-class} =~ /refseq|swall|embl/i) { #hack to get refseq names
127 return unless $self->load_from_embl(lc($args{-class})=>$args{-name});
128 $args{-class} = $self->refclass;
129 @segments = $self->SUPER::segment(%args);
133 $self->_multiple_return_args(@segments);
136 # default is to return 'Sequence' as the class of all references
137 sub refclass {
138 my $self = shift;
139 my $refname = shift;
140 'Accession';
143 sub load_from_embl {
144 my $self = shift;
145 my $db = shift;
146 my $acc = shift or $self->throw('Must provide an accession ID');
148 my $biofetch;
149 if ($self->{_biofetch}{$db}) {
150 $biofetch = $self->{_biofetch}{$db};
151 } else {
152 $biofetch = $self->{_biofetch}{$db} = Bio::DB::BioFetch->new(-db=>$db);
153 $biofetch->retrieval_type('tempfile');
154 $biofetch->proxy(@{$self->{_proxy}}) if $self->{_proxy};
157 my $seq = eval {$biofetch->get_Seq_by_id($acc)} or return;
158 $self->_load_embl($acc,$seq);
162 sub load_from_file {
163 my $self = shift;
164 my $file = shift;
166 my $format = $file =~ /\.(gb|genbank|gbk)$/i ? 'genbank' : 'embl';
168 my $seqio = Bio::SeqIO->new( '-format' => $format, -file => $file);
169 my $seq = $seqio->next_seq;
171 $self->_load_embl($seq->accession,$seq);
175 sub _load_embl {
176 my $self = shift;
177 my $acc = shift;
178 my $seq = shift;
179 my $refclass = $self->refclass;
180 my $locus = $seq->id;
182 # begin loading
183 $self->setup_load();
185 # first synthesize the entry for the top-level feature
186 my @aliases;
187 foreach ($seq->accession,$seq->get_secondary_accessions) {
188 next if lc($_) eq lc($acc);
189 push @aliases,[Alias => $_];
191 $self->load_gff_line(
193 ref => $acc,
194 class => $refclass,
195 source => 'EMBL',
196 method => 'origin',
197 start => 1,
198 stop => $seq->length,
199 score => undef,
200 strand => '.',
201 phase => '.',
202 gclass => $self->refclass,
203 gname => $acc,
204 tstart => undef,
205 tstop => undef,
206 attributes => [[Note => $seq->desc],@aliases],
209 # now load each feature in turn
210 for my $feat ($seq->all_SeqFeatures) {
211 my $attributes = $self->get_attributes($feat);
212 my $name = $self->guess_name($attributes);
214 my $location = $feat->location;
215 my @segments = map {[$_->start,$_->end,$_->seq_id]}
216 $location->can('sub_Location') ? $location->sub_Location : $location;
218 my $type = $feat->primary_tag eq 'CDS' ? 'mRNA' : $feat->primary_tag;
219 my $parttype = $feat->primary_tag eq 'gene' ? 'exon' : $feat->primary_tag;
221 if ($feat->primary_tag =~ /^(gene|CDS)$/) {
222 $self->load_gff_line( {
223 ref => $acc,
224 class => $refclass,
225 source => 'EMBL',
226 method => $type,
227 start => $location->start,
228 stop => $location->end,
229 score => $feat->score || undef,
230 strand => $feat->strand > 0 ? '+' : ($feat->strand < 0 ? '-' : '.'),
231 phase => $feat->frame || '.',
232 gclass => $name->[0],
233 gname => $name->[1],
234 tstart => undef,
235 tstop => undef,
236 attributes => $attributes,
239 @$attributes = ();
242 for my $segment (@segments) {
244 $self->load_gff_line( {
245 ref => $segment->[2] eq $locus ? $acc : $segment->[2],
246 class => $refclass,
247 source => 'EMBL',
248 method => $parttype,
249 start => $segment->[0],
250 stop => $segment->[1],
251 score => $feat->score || undef,
252 strand => $feat->strand > 0 ? '+' : ($feat->strand < 0 ? '-' : '.'),
253 phase => $feat->frame || '.',
254 gclass => $name->[0],
255 gname => $name->[1],
256 tstart => undef,
257 tstop => undef,
258 attributes => $attributes,
265 # finish loading
266 $self->finish_load();
268 # now load the DNA
269 $self->load_sequence_string($acc,$seq->seq);
274 sub get_attributes {
275 my $self = shift;
276 my $seq = shift;
278 my @tags = $seq->all_tags or return;
279 my @result;
280 foreach my $tag (@tags) {
281 foreach my $value ($seq->each_tag_value($tag)) {
282 push @result,[$tag=>$value];
285 \@result;
288 sub guess_name {
289 my $self = shift;
290 my $attributes = shift;
291 # remove this fix when Lincoln fixes it properly
292 return ["Misc" => "Misc"] unless ($attributes); # these are arbitrary, and possibly destructive defaults
293 my @ordered_attributes = sort {($self->_preferred_tags->{$a->[0]} || 0) <=> ($self->_preferred_tags->{$b->[0]} || 0)} @$attributes;
294 my $best = pop @ordered_attributes;
295 @$attributes = @ordered_attributes;
296 return $best;