add the two cyclic ref testers (not required for general users, but nice
[bioperl-live.git] / scripts / das / bp_das_server.pl
blob169b7df7b2ddbb7d08a79ad31361813d0a52316a
1 #!/usr/bin/perl
3 # minimal annotation server
5 use strict;
6 use warnings;
7 use Apache::DBI;
8 use Bio::DB::GFF;
9 use CGI qw/header path_info param url request_method/;
10 use Digest::MD5 'md5_hex';
11 use Carp;
13 my $VERSION = 'DAS/1.00';
14 (my $BASENAME = url(-absolute=>1)) =~ s!http://[^/]+/!!;
16 use vars qw($DB %ERRCODES %CATEGORIES $HEADER
17 %DSN %TYPE2CATEGORY %TYPEOBJECTS
18 %EXCLUDE
21 # dsn description db server map master
22 %DSN = (
23 'chr22_transcripts' => [q(EST-predicted transcripts on chr22 from Jean Thierry-Mieg),
24 'dbi:mysql:database=tm_chr22;host=brie3.cshl.org',
25 'http://servlet.sanger.ac.uk:8080/das/ensembl110']
27 ########################################################################################
29 %ERRCODES = (
30 200 => 'OK',
31 400 => 'Bad command',
32 401 => 'Bad data source',
33 402 => 'Bad command arguments',
34 403 => 'Bad reference object',
35 404 => 'Bad stylesheet',
36 405 => 'Coordinate error',
37 500 => 'Internal server error (oops)',
38 501 => 'Unimplemented feature',
41 %CATEGORIES = (
42 component => [qw(Sequence:Contig Sequence:Link Sequence:Genomic_canonical
43 static_golden_path_contig:ensembl ensembl_contig:ensembl)],
44 transcription => [qw(Sequence:curated polyA_site stop CpG misc_signal intron exon transcript CDS)],
45 homology => [qw(similarity)],
46 repeat => [qw(Alu repeat repeat_region repeat_unit misc_feature)],
47 structural => [qw(Clone cosmid OLIGO PCR_product structural compression Comment Conflict)],
48 experimental => [qw(experimental RNAi)],
51 %EXCLUDE = (
52 'static_golden_path_contig:ensembl' => 1,
53 'ensembl_contig:ensembl' => 1,
54 'Sequence:Contig' => 1,
57 while (my($c,$v) = each %CATEGORIES) { # invert nicely
58 for my $typename (@$v) {
59 my $typeobj = Bio::DB::GFF::Typename->new($typename);
60 $TYPE2CATEGORY{$typeobj} = $c;
61 $TYPEOBJECTS{$typeobj} = $typeobj;
65 $HEADER = 0;
66 my ($junk,$DSN,$OPERATION) = split '/',path_info();
68 do { error_header('invalid request',400); exit 0 } unless $DSN;
69 do { list_dsns(); exit 0 } if $DSN eq 'dsn' or $OPERATION eq 'dsn';
70 do { error_header('invalid data source, use the dsn command to get list',401); exit 0 } unless $DSN{$DSN};
72 do { error_header('Could not open database',500); exit 0 }
73 unless $DB = openDB($DSN);
75 do { entry_points(); exit 0 } if $OPERATION eq 'entry_points';
76 do { types(); exit 0 } if $OPERATION eq 'types';
77 do { features(); exit 0 } if $OPERATION eq 'features';
78 do { stylesheet(); exit 0 } if $OPERATION eq 'stylesheet';
80 error_header('invalid request',400);
81 exit 0;
83 # -----------------------------------------------------------------
84 sub openDB {
85 my $name = shift;
86 my $db = Bio::DB::GFF->new(-adaptor=>'dbi::mysqlopt',-dsn=>$DSN{$name}[1]);
87 $db->automerge(0);
88 $db->debug(0);
89 return $db;
92 # -----------------------------------------------------------------
93 sub list_dsns {
94 my $j = ' 'x3;
95 ok_header();
96 print qq(<?xml version="1.0" standalone="yes"?>\n<!DOCTYPE DASDSN SYSTEM "http://www.biodas.org/dtd/dasdsn.dtd">\n);
97 print "<DASDSN>\n";
99 for my $dsn (sort keys %DSN) {
100 print "$j<DSN>\n";
101 print qq($j$j<SOURCE id="$dsn">$DSN{$dsn}[0]</SOURCE>\n);
102 print qq($j$j<MAPMASTER>$DSN{$dsn}[2]/</MAPMASTER>\n);
103 print qq($j$j<DESCRIPTION>This is the $DSN{$dsn}[0] database</DESCRIPTION>\n);
104 print "$j</DSN>\n";
106 print "</DASDSN>\n";
109 # -----------------------------------------------------------------
110 sub entry_points {
111 my $segments = get_segments();
113 my @parts;
114 if ($segments) {
115 @parts = map { get_segment_obj(@$_) } @$segments;
116 @parts = map { $_->contained_features(-types=>['Sequence:Link','Sequence:Contig','Sequence:Genomic_canonical'],-merge=>0) } @parts;
117 } else {
118 @parts = grep {$_->name =~ /^CHR/i} $DB->features(-types=>['Sequence:Link','Sequence:Contig','Sequence:Genomic_canonical'],-merge=>0);
121 my $url = get_url();
123 ok_header();
124 print <<END;
125 <?xml version="1.0" standalone="no"?>
126 <!DOCTYPE DASEP SYSTEM "http://www.biodas.org/dtd/dasep.dtd">
127 <DASEP>
128 <ENTRY_POINTS href="$url" version="1.0">
132 for my $part (@parts) {
133 $part->absolute(1);
134 my $name = $part->name;
135 my $st = $part->start;
136 my $en = $part->stop;
137 my $class = $part->class;
138 my $length = $part->length;
139 my $orientation = $part->strand > 0 ? '+' : '-';
140 my $subparts = $part->source =~ /Link|Chromosome|Contig/ ? 'yes' : 'no';
141 print qq(<SEGMENT id="$name" size="$length" start="$st" stop="$en" class="$class" orientation="$orientation" subparts="$subparts">$name</SEGMENT>\n);
143 print "</ENTRY_POINTS>\n</DASEP>\n";
146 # -----------------------------------------------------------------
147 # get the features for the segment indicated
148 sub features {
149 my @segments = get_segments() or return;
151 my $summary = param('summary');
152 my $url = get_url();
153 my @filter = param('type');
154 my @category = param('category');
155 push @filter,make_categories(@category);
158 ok_header();
159 print <<END
160 <?xml version="1.0" standalone="yes"?>
161 <!DOCTYPE DASGFF SYSTEM "http://www.biodas.org/dtd/dasgff.dtd">
162 <DASGFF>
163 <GFF version="1.0" href="$url">
167 foreach (@segments) {
168 my ($reference,$refclass,$start,$stop) = @$_;
169 my $seq = get_segment_obj($reference,$refclass,$start,$stop);
170 unless ($seq) {
171 print qq(<SEGMENT id="$reference" start="$start" stop="$stop" version="1.0" />\n);
172 next;
175 if (lc(param('category')) eq 'component') {
176 dump_framework($seq);
177 next;
180 my $r = $seq->refseq;
181 my $s = $seq->start;
182 my $e = $seq->stop;
183 ($s,$e) = ($e,$s) if $s > $e;
185 print qq(<SEGMENT id="$r" start="$s" stop="$e" version="1.0">\n);
187 my $iterator = $seq->features(-types=>\@filter,-merge=>0,-iterator=>1);
189 while (my $f = $iterator->next_seq) {
190 my $type = $f->type;
191 next if $EXCLUDE{$type};
193 my $flabel = $f->info || $f->type;
194 my $source = $f->source;
195 my $method = $f->method;
196 my $start = $f->start;
197 my $end = $f->stop;
198 my $score = $f->score;
199 my $orientation = $f->strand;
200 my $phase = $f->phase;
201 my $group = $f->group;
202 my $id = $f->id;
204 $phase ||= 0;
205 $orientation ||= 0;
206 $score ||= '-';
207 $orientation = $orientation >= 0 ? '+' : '-';
209 # hack hack hack
210 my $category = transmute($type);
211 ($start,$end) = ($end,$start) if $start > $end;
213 # group stuff
214 my $hash = $group;
215 # my @notes = $f->notes;
216 my @notes;
217 my $info = $f->info;
218 my $group_info;
220 if (ref($info)) {
221 my $class = $info->class;
222 $id = "$class:$info";
223 if ($DSN eq 'elegans') {
224 $group_info = qq(<LINK href="http://www.wormbase.org/db/get?name=$info;class=$class">$info</LINK>);
226 } else {
227 $hash = md5_hex($group);
228 $group_info = join "\n",map {qq(<NOTE>$_</NOTE>)} @notes;
231 my ($target,$target_info);
232 if (($target = $f->target) && $target->can('start')) {
233 my $start = $target->start;
234 my $stop = $target->stop;
235 $target_info = qq(<TARGET id="$target" start="$start" stop="$stop" />);
238 if ($category eq 'component') {
239 my $strt = 1;
240 my $stp = $stop - $start + 1;
241 $target_info = qq(<TARGET id="$id" start="$strt" stop="$stp" />);
244 my $map;
245 if ($type =~ /Segment|Link|Genomic_canonical|Contig/i) { $map = qq( reference="yes") } else { $map = qq() }
246 $map .= qq( subparts="yes") if $type =~ /Segment|Link/i;
248 ## Need not print feature for map in annotation services
249 ## The next 2 lines are ucommented at Wash U:
250 # if (($DSN ne "welegans") && ($c eq "structural")) {
251 # } else {
253 print <<END;
254 <FEATURE id="$id" label="$flabel">
255 <TYPE id="$type" category="$category"$map>$type</TYPE>
256 <METHOD id="$method">$method</METHOD>
257 <START>$start</START>
258 <END>$end</END>
259 <SCORE>$score</SCORE>
260 <ORIENTATION>$orientation</ORIENTATION>
261 <PHASE>$phase</PHASE>
264 if ($hash) {
265 print qq( <GROUP id="$hash">\n);
266 print qq( $group_info\n) if $group_info;
267 print qq( $target_info\n) if $target_info;
268 print qq( </GROUP>\n);
270 print <<END;
271 </FEATURE>
274 # } # End Wash U if statement
277 print qq(</SEGMENT>\n);
280 print <<END;
281 </GFF>
282 </DASGFF>
286 sub dump_framework {
287 my $seq = shift;
288 my $start = $seq->start;
289 my $stop = $seq->stop;
291 my @parts = $seq->contained_features(-type=>['Sequence:Link','Sequence:Genomic_canonical','Sequence:Contig'],-merge=>0);
293 print qq(<SEGMENT id="$seq" start="$start" stop="$stop" version="1.0">\n);
295 for my $part (@parts) {
296 my ($st,$en) = ($part->start,$part->stop);
297 my $orientation = $part->strand >= 0 ? '+1' : '-1';
298 my $length = $part->length;
299 my $type = $part->type;
300 my $method = $type->method;
301 my $description = qq(category="component" reference="yes");
302 $description .= qq( subparts="yes") unless $part->source eq 'Genomic_canonical';
304 print <<END
305 <FEATURE id="Sequence:$part" label="$part">
306 <TYPE id="$type" $description>$part</TYPE>
307 <METHOD id="$method">$method</METHOD>
308 <START>$st</START>
309 <END>$en</END>
310 <SCORE>-</SCORE>
311 <ORIENTATION>$orientation</ORIENTATION>
312 <PHASE>-</PHASE>
313 <GROUP id="Sequence:$part">
314 <TARGET id="$part" start="1" stop="$length">$part</TARGET>
315 </GROUP>
316 </FEATURE>
320 print qq(</SEGMENT>\n);
323 sub types {
324 return all_types() unless param('ref') or param('segment');
326 my $type = param('entry_type') || 'Sequence';
327 my $summary = param('summary');
328 my $url = get_url();
329 my @filter = param('type');
331 my @segments = get_segments() or return;
333 ok_header();
335 print <<END;
336 <?xml version="1.0" standalone="yes"?>
337 <!DOCTYPE DASTYPES SYSTEM "http://www.biodas.org/dtd/dastypes.dtd">
338 <DASTYPES>
339 <GFF version="1.2" summary="yes" href="$url">
343 foreach (@segments) {
344 my ($reference,$class,$start,$stop) = @$_;
345 next unless $reference;
346 my $seq = get_segment_obj($reference,$class,$start,$stop) or next;
347 unless ($seq) { #empty section
348 print qq(<SEGMENT id="$reference" start="$start" stop="$stop" version="1.0">\n);
349 print qq(</SEGMENT>\n);
350 next;
353 my $s = $seq->start;
354 my $e = $seq->stop;
356 # use absolute coordinates -- people expect it
357 my $name = $seq->refseq;
359 print qq(<SEGMENT id="$name" start="$s" stop="$e" version="1.0">\n);
361 my @args = (-enumerate=>1);
362 push @args,(-types=>\@filter) if @filter;
363 my %histogram = $seq->types(@args);
364 foreach (keys %histogram) {
365 my ($method,$source) = split ':';
366 my $count = $histogram{$_};
367 my $category = transmute($_);
368 print qq(\t<TYPE id="$_" category="$category" method="$method" source="$source">$count</TYPE>\n)
369 unless $EXCLUDE{$_};
371 print qq(</SEGMENT>\n);
373 print <<END;
374 </GFF>
375 </DASTYPES>
379 # list of all the types
380 sub all_types {
381 my @methods = $DB->types;
383 ok_header();
384 my $url = get_url();
385 print <<END;
386 <?xml version="1.0" standalone="yes"?>
387 <!DOCTYPE DASTYPES SYSTEM "http://www.biodas.org/dtd/dastypes.dtd">
388 <DASTYPES>
389 <GFF version="1.2" summary="yes" href="$url">
390 <SEGMENT>
394 for my $id (@methods) {
395 next if $EXCLUDE{$id};
396 my $category = transmute($id);
397 my $method = $id->method;
398 my $source = $id->source;
399 print qq(\t<TYPE id="$id" category="$category" method="$method" source="$source" />\n);
402 print <<END
403 </SEGMENT>
404 </GFF>
405 </DASTYPES>
411 # Big time kludge -- just outputs the prebuilt stylesheet in this
412 # directory. Used primarily for testing.
413 sub stylesheet {
414 ok_header();
415 open my $STYLE, '<', "style.xml" or die "Could not read file 'style.xml': $!\n";
416 while(<$STYLE>) {
417 print $_;
419 close $STYLE;
422 # really, really bad shit
423 # calculate type and category from acedb type and method
424 sub transmute {
425 my $type = shift;
427 # look in $TYPE2CATEGORY first to see if we have an exact match
428 my $category = $TYPE2CATEGORY{$type};
429 return $category if $category;
431 # otherwise do a fuzzy match using the values of %TYPEOBJECTS
432 for my $typeobj (values %TYPEOBJECTS) {
433 warn "comparing $typeobj to $type";
435 if ($typeobj->match($type)) {
436 $category = $TYPE2CATEGORY{$typeobj}; # fetch category for this object
437 $TYPE2CATEGORY{$type} = $category; # remember this match for later
438 return $category;
441 return 'miscellaneous'; # no success
444 # -----------------------------------------------------------------
445 sub get_url {
446 my $url = url(-path=>1, -query=>1);
447 $url =~ tr/&/\;/;
448 return $url;
451 # -----------------------------------------------------------------
452 sub error_header {
453 my ($message,$code) = @_;
454 $code ||= 500;
455 # $code = "$code $ERRCODES{$code}";
456 print header(-type =>'text/plain',
457 -X_DAS_Version => $VERSION,
458 -X_DAS_Status => $code,
459 ) unless $HEADER++;
460 return if request_method() eq 'HEAD';
461 print $message;
464 sub ok_header {
465 print header(-type =>'text/plain',
466 -X_DAS_Version => $VERSION,
467 -X_DAS_Status => "200 OK",
468 ) unless $HEADER++;
471 # phony dtd
472 sub dtd {
473 ok_header();
474 print <<DTD;
475 <!-- phony dtd for debugging parsers -->
479 # -----------------------------------------------------------------
480 sub get_segments {
481 # extended segment argument
482 my @segments;
483 foreach (param('segment')) {
484 my ($ref,$start,$stop) = /^(\S+?)(?::(\d+),(\d+))?$/;
485 push @segments,[$ref,$start,$stop];
487 push @segments,[scalar param('ref'),scalar param('start'),scalar param('stop')] if param('ref');
488 return unless @segments;
490 foreach (@segments){
491 my ($reference,$start,$stop) = @$_;
492 my $class = param('entry_type') || 'Sequence';
493 my $name = $reference;
495 if ($reference =~ /^(\w+):(\S+)$/) {
496 $class = $1;
497 $name = $2;
499 my @values = ($name,$class,$start,$stop);
500 $_ = \@values;
503 return wantarray ? @segments : \@segments;
506 # -----------------------------------------------------------------
507 sub get_segment_obj {
508 my ($reference,$class,$start,$stop) = @_;
509 my @args = (-name=>$reference);
510 push @args,(-class=>$class) if defined $class;
511 push @args,(-start=>$start) if defined $start;
512 push @args,(-stop=>$stop) if defined $stop;
514 my $segment = $DB->segment(@args) or return;
515 return $segment;
519 # -----------------------------------------------------------------
520 sub make_categories {
521 my @filter;
522 for my $category (@_) {
523 my $c = lc $category;
524 push @filter,@{$CATEGORIES{$c}} if $CATEGORIES{$c};
525 push @filter,$category unless $CATEGORIES{$c};
527 return @filter;