add latest changes to, um, Changes
[bioperl-live.git] / Bio / DB / Expression / geo.pm
blob20ab87f8043abaedcc5d1ea1d66a52bfdd488fcf
1 =head1 NAME
3 Bio::DB::Expression::geo - *** DESCRIPTION of Class
5 =head1 SYNOPSIS
7 *** Give standard usage here
9 =head1 DESCRIPTION
11 *** Describe the object here
13 =head1 FEEDBACK
15 =head2 Mailing Lists
17 User feedback is an integral part of the evolution of this and other
18 Bioperl modules. Send your comments and suggestions preferably to the
19 Bioperl mailing list. Your participation is much appreciated.
21 bioperl-l@bioperl.org - General discussion
22 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
24 =head2 Support
26 Please direct usage questions or support issues to the mailing list:
28 I<bioperl-l@bioperl.org>
30 rather than to the module maintainer directly. Many experienced and
31 reponsive experts will be able look at the problem and quickly
32 address it. Please include a thorough description of the problem
33 with code and data examples if at all possible.
35 =head2 Reporting Bugs
37 Report bugs to the Bioperl bug tracking system to help us keep track
38 of the bugs and their resolution. Bug reports can be submitted via the
39 web:
41 https://github.com/bioperl/bioperl-live/issues
43 =head1 AUTHOR
45 Allen Day E<lt>allenday@ucla.eduE<gt>
47 =head1 APPENDIX
49 The rest of the documentation details each of the object methods.
50 Internal methods are usually preceded with a '_'. Methods are
51 in alphabetical order for the most part.
53 =cut
56 # Let the code begin...
58 package Bio::DB::Expression::geo;
59 use strict;
60 use base qw(Bio::DB::Expression);
62 use Bio::Expression::Contact;
63 use Bio::Expression::DataSet;
64 use Bio::Expression::Platform;
65 use Bio::Expression::Sample;
67 use constant URL_PLATFORMS => 'http://www.ncbi.nlm.nih.gov/geo/query/browse.cgi?pgsize=100000&mode=platforms&submitter=-1&filteron=0&filtervalue=-1&private=1&sorton=pub_date&sortdir=1&start=1';
68 use constant URL_PLATFORM => 'http://www.ncbi.nlm.nih.gov/projects/geo/query/acc.cgi?form=text&view=full&acc=';
69 use constant URL_DATASET => 'http://www.ncbi.nlm.nih.gov/projects/geo/query/acc.cgi?form=text&view=full&acc=';
70 use constant URL_SAMPLE => 'http://www.ncbi.nlm.nih.gov/projects/geo/query/acc.cgi?form=text&view=full&acc=';
72 =head2 _initialize()
74 Usage : $obj->_initialize(%arg);
75 Function: Internal method to initialize a new Bio::DB::Expression::geo object
76 Returns : true on success
77 Args : Arguments passed to new()
79 =cut
81 sub _initialize {
82 my($self,%arg) = @_;
84 foreach my $arg (keys %arg){
85 my $marg = $arg;
86 $marg =~ s/^-//;
87 $self->$marg($arg{$arg}) if $self->can($marg);
90 return 1;
93 =head2 get_platforms()
95 Usage :
96 Function:
97 Example :
98 Returns : a list of Bio::Expression::Platform objects
99 Args :
101 =cut
103 sub get_platforms {
104 my ($self,@args) = @_;
106 my $doc = $self->_get_url( URL_PLATFORMS );
107 $doc =~ s!^.+?>Release date<.+?</tr>(.+)</table>!$1!gs;
109 my @platforms = ();
110 my @records = split m!</tr>\s+<tr>!, $doc;
112 foreach my $record ( @records ) {
113 my ($platform_acc,$name,$tax_acc,$contact_acc,$contact_name) =
114 $record =~ m!acc\.cgi\?acc=(.+?)".+?<td.+?>(.+?)<.+?<td.+?>.+?<.+?<td.+?>.+?href=".+?id=(.+?)".+?<td.+?OpenSubmitter\((\d+?)\).+?>(.+?)<!s;
115 next unless $platform_acc;
117 my $platform = Bio::Expression::Platform->new(
118 -accession => $platform_acc,
119 -name => $name,
120 -_taxon_id => $tax_acc,
121 -contact => Bio::Expression::Contact->new(
122 -source => 'geo',
123 -accession => $contact_acc,
124 -name => $contact_name,
125 -db => $self
127 -db => $self,
129 push @platforms, $platform;
132 return @platforms;
135 =head2 get_samples()
137 Usage :
138 Function:
139 Example :
140 Returns : a list of Bio::Expression::Sample objects
141 Args :
143 =cut
145 sub get_samples {
146 my ($self,@args) = @_;
147 $self->throw_not_implemented();
150 =head2 get_contacts()
152 Usage :
153 Function:
154 Example :
155 Returns : a list of Bio::Expression::Contact objects
156 Args :
158 =cut
160 sub get_contacts {
161 my ($self,@args) = @_;
162 $self->throw_not_implemented();
165 =head2 get_datasets()
167 Usage : $db->get_datasets('accession');
168 Function:
169 Example :
170 Returns : a list of Bio::Expression::DataSet objects
171 Args :
173 =cut
175 sub get_datasets {
176 my ($self,$platform) = @_;
178 my @lines = split /\n/, $self->_get_url( URL_PLATFORM . $platform->accession );
180 my @datasets = ();
182 foreach my $line ( @lines ) {
183 my ($dataset_acc) = $line =~ /^\!Platform_series_id = (\S+?)\s*$/;
184 next unless $dataset_acc;
186 my $dataset = Bio::Expression::DataSet->new(
187 -accession => $dataset_acc,
188 -platform => $platform,
189 -db => $self,
192 push @datasets, $dataset;
195 return @datasets;
198 sub fill_sample {
199 my ( $self, $sample ) = @_;
201 my @lines = split /\n/, $self->_get_url( URL_SAMPLE. $sample->accession );
203 foreach my $line ( @lines ) {
204 if ( my ($name) = $line =~ /^\!Sample_title = (.+?)\s*$/ ) {
205 $sample->name( $name );
207 elsif ( my ($desc) = $line =~ /^\!Sample_characteristics.*? = (.+?)\s*$/ ) {
208 $sample->description( $desc );
210 elsif ( my ($source_name) = $line =~ /^\!Sample_source_name.*? = (.+?)\s*$/ ) {
211 $sample->source_name( $source_name );
213 elsif ( my ($treatment_desc) = $line =~ /^\!Sample_treatment_protocol.*? = (.+?)\s*$/ ) {
214 $sample->treatment_description( $treatment_desc );
217 return 1;
220 sub fill_dataset {
221 my ( $self, $dataset ) = @_;
223 my @lines = split /\n/, $self->_get_url( URL_DATASET . $dataset->accession );
225 my @samples = ();
227 foreach my $line ( @lines ) {
228 if ( my ($sample_acc) = $line =~ /^\!Series_sample_id = (\S+?)\s*$/ ) {
229 my $sample = Bio::Expression::Sample->new(
230 -accession => $sample_acc,
231 -dataset => $dataset,
232 -db => $self,
234 push @samples, $sample;
236 elsif ( my ($pubmed_acc) = $line =~ /^\!Series_pubmed_id = (.+?)\s*$/ ) {
237 $dataset->pubmed_id( $pubmed_acc );
239 elsif ( my ($web_link) = $line =~ /^\!Series_web_link = (.+?)\s*$/ ) {
240 $dataset->web_link( $web_link );
242 elsif ( my ($contact) = $line =~ /^\!Series_contact_name = (.+?)\s*$/ ) {
243 $dataset->contact( $contact );
245 elsif ( my ($name) = $line =~ /^\!Series_title = (.+?)\s*$/ ) {
246 $dataset->name( $name );
248 elsif ( my ($desc) = $line =~ /^\!Series_summary = (.+?)\s*$/ ) {
249 $dataset->description( $desc );
251 elsif ( my ($design) = $line =~ /^\!Series_type = (.+?)\s*$/ ) {
252 $dataset->design( $design );
254 elsif ( my ($design_desc) = $line =~ /^\!Series_overall_design = (.+?)\s*$/ ) {
255 $dataset->design_description( $design_desc );
259 $dataset->samples(\@samples);
262 #################################################
264 =head2 _platforms_doc()
266 Usage :
267 Function:
268 Example :
269 Returns : an HTML document containing a table of all platforms
270 Args :
273 =cut
275 sub _get_url {
276 my ($self,$url) = @_;
278 my $response;
279 eval {
280 $response = $self->get( $url );
282 if( $@ ) {
283 $self->warn("Can't query website: $@");
284 return;
286 $self->debug( "resp is $response\n");
288 return $response;