Bio/DB/CUTG.pm

   1 #
   2 # BioPerl module for Bio::DB::CUTG
   3 #
   4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
   5 #
   6 # Cared for by Richard Adams (richard.adams@ed.ac.uk)
   7 #
   8 # Copyright Richard Adams
   9 #
  10 # You may distribute this module under the same terms as perl itself
  11
  12 # POD documentation - main docs before the code
  13
  14 =head1 NAME
  15
  16 Bio::DB::CUTG - for access to the Codon usage Database
  17 at http://www.kazusa.or.jp/codon.
  18
  19 =head1 SYNOPSIS
  20
  21        use Bio::CodonUsage::Table;
  22        use Bio::DB::CUTG;
  23
  24        my $db = Bio::DB::CUTG->new(-sp =>'Pan troglodytes');
  25        my $CUT = $db->get_request();
  26
  27
  28 =head1 DESCRIPTION
  29
  30 This class retrieves and objectifies codon usage tables either from the
  31 CUTG web database . The idea is that you can initially retrieve a CUT from
  32 the web database, and write it to file in a way that can be read in
  33 later, using the Bio::CodonUsage::IO module.
  34
  35 For a web query, two parameters need to be specified: species(sp) and
  36 genetic code id (gc). The database is searched using regular
  37 expressions, therefore the full latin name must be given to specify
  38 the organism. If the species name is ambiguous the first CUT in the
  39 list is retrieved.  Defaults are Homo sapiens and 1(standard genetic
  40 code).  If you are retrieving CUTs from organisms using other genetic
  41 codes this needs to be put in as a parameter. Parameters can be
  42 entered in the constructor or in the get_web_request
  43 ()method. Allowable parameters are listed in the $QUERY_KEYS hash
  44 reference variable.
  45
  46 I intend at a later date to allow retrieval of multiple codon tables
  47 e.g., from a wildcard search.
  48
  49 =head1 SEE ALSO
  50
  51 L<Bio::Tools::CodonTable>,
  52 L<Bio::WebAgent>,
  53 L<Bio::CodonUsage::Table>,
  54 L<Bio::CodonUsage::IO>
  55
  56 =head1 FEEDBACK
  57
  58 =head2 Mailing Lists
  59
  60
  61 User feedback is an integral part of the evolution of this and other
  62 Bioperl modules. Send your comments and suggestions preferably to one
  63 of the Bioperl mailing lists.  Your participation is much appreciated.
  64
  65   bioperl-l@bioperl.org                  - General discussion
  66   http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
  67
  68 =head2 Support
  69
  70 Please direct usage questions or support issues to the mailing list:
  71
  72 I<bioperl-l@bioperl.org>
  73
  74 rather than to the module maintainer directly. Many experienced and
  75 reponsive experts will be able look at the problem and quickly
  76 address it. Please include a thorough description of the problem
  77 with code and data examples if at all possible.
  78
  79 =head2 Reporting Bugs
  80
  81 Report bugs to the Bioperl bug tracking system to help us keep track
  82 the bugs and their resolution.  Bug reports can be submitted via the web:
  83
  84   https://redmine.open-bio.org/projects/bioperl/
  85
  86 =head1 AUTHORS
  87
  88 Richard Adams, Richard.Adams@ed.ac.uk
  89
  90 =head1 APPENDIX
  91
  92 The rest of the documentation details each of the object
  93 methods. Internal methods are usually preceded with a _
  94
  95 =cut
  96
  97 # Let the code begin...
  98
  99 package Bio::DB::CUTG;
 100 use Bio::CodonUsage::IO;
 101 use IO::String;
 102 use URI::Escape;
 103 use vars qw($URL $QUERY_KEYS);
 104
 105 use base qw(Bio::WebAgent);
 106
 107 $QUERY_KEYS = {
 108     sp => 'full Latin species name',
 109     gc => 'genetic code id'
 110 };
 111
 112 BEGIN {
 113     $URL = "http://www.kazusa.or.jp";
 114 }
 115
 116 =head2 new
 117
 118  Title   : new
 119  Usage   : my $db = Bio::DB::CUTG->new()
 120  Returns : a reference to a new Bio::DB::CUTG
 121  Args    : hash of optional values for db query
 122
 123 =cut
 124
 125 sub new {
 126     my ( $class, @args ) = @_;
 127     _check_args(@args);
 128     my $self = $class->SUPER::new(@args);
 129     return $self;
 130 }
 131
 132 =head2 query_keys
 133
 134  Title   : query_keys
 135  Usage   : $db->query_keys()
 136  Purpose : To determine valid keys for parameters for db query.
 137  Returns : a reference to a hash describing valid query keys
 138  Args    : none
 139
 140 =cut
 141
 142 sub query_keys {
 143     return $QUERY_KEYS;
 144 }
 145
 146 =head2  sp
 147
 148  Title  : sp
 149  Usage  : my $sp = $db->sp();
 150  Purpose: Get/set method for species name
 151  Returns: void or species name string
 152  Args   : None or species name string
 153
 154 =cut
 155
 156 sub sp {
 157     my $self = shift;
 158     if (@_) {
 159         my $name = shift;
 160         $self->{'_sp'} = $name;
 161     }
 162     return $self->{'_sp'} || "Homo sapiens";
 163
 164 }
 165
 166 =head2  gc
 167
 168  Title  : gc
 169  Usage  : my $gc = $db->gc();
 170  Purpose: Get/set method for genetic code id
 171  Returns: void or genetic code  integer
 172  Args   : None or genetic code integer
 173
 174 =cut
 175
 176 sub gc {
 177     #### genetic code id for translations ####
 178     my $self = shift;
 179     if (@_) {
 180         if (   $_[0] =~ /^\d+$/
 181             && $_[0] >= 1
 182             && $_[0] <= 15
 183             && $_[0] != 7
 184             && $_[0] != 8 )
 185         {
 186             $self->{'_gc'} = shift;
 187         }
 188         else {
 189             $self->warn(
 190                 "invalid genetic code index - setting to standard default (1)");
 191             $self->{'_gc'} = 1;
 192         }
 193     }
 194     return $self->{'_gc'} || 1;    #return 1 if not defined
 195
 196 }
 197
 198 =head2  get_request
 199
 200  Title  : get_request
 201  Usage  : my $cut = $db->get_request();
 202  Purpose: To query remote CUT with a species name
 203  Returns: a new codon usage table object
 204  Args   : species  name(mandatory), genetic code id(optional)
 205
 206 =cut
 207
 208 sub get_request {
 209     my ( $self, @args ) = @_;
 210     _check_args(@args);
 211     shift;
 212     ### can put in parameters here as well
 213     while (@_) {
 214         my $key = shift;
 215         $key =~ s/^-//;
 216         $self->$key(shift);
 217     }
 218     $self->url($URL);
 219
 220     ###1st of all search DB to check species exists and is unique
 221     my $nameparts = join "+", $self->sp =~ /(\S+)/g;
 222     my $search_url =
 223       $self->url . "/codon/cgi-bin/spsearch.cgi?species=" . $nameparts . "&c=s";
 224     my $rq = HTTP::Request->new( GET => $search_url );
 225     my $reply = $self->request($rq);
 226     if ( $reply->is_error ) {
 227         $self->throw(
 228             $reply->as_string() . "\nError getting for url $search_url!\n" );
 229     }
 230     my $content = $reply->content;
 231     return 0 unless $content;
 232     $self->debug(" reply from query is \n  $content");
 233     #####  if no matches, assign defaults - or can throw here?  ######
 234     if ( $content =~ /not found/i ) {
 235         $self->warn("organism not found -selecting human [9606] as default");
 236         $self->sp("9606");
 237         $self->_db("gbpri");
 238     }
 239
 240     else {
 241         my @names = $content =~ /species=([^"]+)/g;
 242         ### get 1st species data from report ####
 243         my @dbs = $content =~ /\[([^\]]+)\]:\s+\d+/g;
 244         ## warn if  more than 1 matching species ##
 245         ## if multiple species retrieved, choose first one by default ##
 246         $self->throw("No names returned for $nameparts") unless @names;
 247         if ( @names > 1 ) {
 248             $self->warn( "too many species - not a unique species id\n"
 249                   . "selecting $names[0] using database [$dbs[0]]" );
 250         }
 251         ### now assign species and database value
 252         $self->sp( $names[0] );
 253         $self->_db( $dbs[0] );
 254     }
 255
 256     ######## now get codon table , all defaults established now
 257
 258     ##construct URL##
 259     $nameparts = $self->sp;
 260
 261     my $CT_url =
 262         $self->url
 263       . "/codon/cgi-bin/showcodon.cgi?species="
 264       . $nameparts . "&aa="
 265       . $self->gc
 266       . "&style=GCG";
 267     $self->debug("URL : $CT_url\n");
 268     ## retrieve data in html##
 269     my $rq2 = HTTP::Request->new( GET => $CT_url );
 270     $reply = $self->request($rq2);
 271     if ( $reply->is_error ) {
 272         $self->throw(
 273             $reply->as_string() . "\nError getting for url $CT_url!\n" );
 274     }
 275     my $content2 = $reply->content;
 276
 277     ## strip html tags, basic but works here
 278     $content2 =~ s/<[^>]+>//sg;
 279     $content2 =~ s/Format.*//sg;
 280     $self->debug("raw DDB table is :\n $content2");
 281
 282     ### and pass to Bio::CodonUsage::IO for parsing
 283     my $iostr = IO::String->new($content2);
 284     my $io = Bio::CodonUsage::IO->new( -fh => $iostr );
 285
 286     ##return object ##
 287     return $io->next_data;
 288 }
 289
 290 sub _check_args {
 291
 292     ###checks parameters for matching $QUERYKEYS
 293     my @args = @_;
 294     while ( my $key = shift @args ) {
 295         $key = lc($key);
 296         $key =~ s/\-//;
 297
 298         if ( !exists( $QUERY_KEYS->{$key} ) ) {
 299             Bio::Root::Root->throw( "invalid parameter - must be one of ["
 300                   . ( join "] [", keys %$QUERY_KEYS )
 301                   . "]" );
 302         }
 303         shift @args;
 304     }
 305 }
 306
 307 #### internal URL parameter not specifiable ######
 308 sub _db {
 309     my $self = shift;
 310     if (@_) {
 311         $self->{'_db'} = shift;
 312     }
 313     return $self->{'_db'};
 314 }
 315
 316 1;