maint: restructure to use Dist::Zilla
[bioperl-live.git] / lib / Bio / Tools / Analysis / DNA / ESEfinder.pm
blobabfeb8da0554aaacbbd8a5988682649cab8353fa
2 # BioPerl module for Bio::Tools::Analysis::DNA::ESEfinder
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
8 # Copyright Richard Adams
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Tools::Analysis::DNA::ESEfinder - a wrapper around ESEfinder
17 server
19 =head1 SYNOPSIS
21 use Bio::Tools::Analysis::DNA::ESEfinder;
22 use strict;
24 my $seq; # a Bio::PrimarySeqI or Bio::SeqI object
26 $seq = Bio::Seq->new(
27 -primary_id => 'test',
28 -seq=>'atgcatgctaggtgtgtgttttgtgggttgtactagctagtgat'.
29 -alphabet=>'dna');
31 my $ese_finder = Bio::Tools::Analysis::DNA::ESEfinder->
32 new(-seq => $seq);
34 # run ESEfinder prediction on a DNA sequence
35 $ese_finder->run();
37 die "Could not get a result"
38 unless $ese_finder->status =~ /^COMPLETED/;
40 print $ese_finder->result; # print raw prediction to STDOUT
42 foreach my $feat ( $ese_finder->result('Bio::SeqFeatureI') ) {
44 # do something to SeqFeature
45 # e.g. print as GFF
46 print $feat->gff_string, "\n";
47 # or store within the sequence - if it is a Bio::SeqI
48 $seq->add_SeqFeature($feat)
52 =head1 DESCRIPTION
54 This class is a wrapper around the ESEfinder web server which uses
55 experimentally defined scoring matrices to identify possible exonic
56 splicing enhancers in human transcripts.
58 The results can be retrieved in 4 ways.
60 =over 4
62 =item 1.
64 C<$ese_finder-E<gt>result('')> retrieves the raw text output of the
65 program
67 =item 2.
69 C<$ese_finder-E<gt>result('all')> returns a Bio::Seq::Meta::Array object
70 with prediction scores for all residues in the sequence
73 =item 3.
75 C<$ese_finder-E<gt>result('Bio::SeqFeatureI')> returns an array of
76 Bio::SeqFeature objects for sequences with significant scores. Feature
77 tags are score, motif, SR_protein and method
79 =item 4.
81 C<$ese_finder-E<gt>result('raw')> returns an array of significant matches
82 with each element being a reference to [SR_protein, position, motif,
83 score]
85 =back
87 See L<http://rulai.cshl.edu/tools/ESE2/>
89 This the second implementation of Bio::SimpleAnalysisI which hopefully
90 will make it easier to write wrappers on various services. This class
91 uses a web resource and therefore inherits from L<Bio::WebAgent>.
93 =head1 SEE ALSO
95 L<Bio::SimpleAnalysisI>,
96 L<Bio::WebAgent>
98 =head1 FEEDBACK
100 =head2 Mailing Lists
102 User feedback is an integral part of the evolution of this and other
103 Bioperl modules. Send your comments and suggestions preferably to one
104 of the Bioperl mailing lists. Your participation is much appreciated.
106 bioperl-l@bioperl.org - General discussion
107 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
109 =head2 Support
111 Please direct usage questions or support issues to the mailing list:
113 I<bioperl-l@bioperl.org>
115 rather than to the module maintainer directly. Many experienced and
116 reponsive experts will be able look at the problem and quickly
117 address it. Please include a thorough description of the problem
118 with code and data examples if at all possible.
120 =head2 Reporting Bugs
122 Report bugs to the Bioperl bug tracking system to help us keep track
123 the bugs and their resolution. Bug reports can be submitted via the
124 web:
126 https://github.com/bioperl/bioperl-live/issues
128 =head1 AUTHORS
130 Richard Adams, Richard.Adams@ed.ac.uk,
131 Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
133 =head1 APPENDIX
135 The rest of the documentation details each of the object
136 methods. Internal methods are usually preceded with a _
138 =cut
141 # Let the code begin...
142 #should have own
144 package Bio::Tools::Analysis::DNA::ESEfinder;
146 use Data::Dumper;
147 use IO::String;
148 use Bio::SeqIO;
149 use HTTP::Request::Common qw (POST);
150 use HTML::HeadParser;
151 use Bio::SeqFeature::Generic;
152 use Bio::Seq::Meta::Array;
153 use Bio::WebAgent;
154 use strict;
156 #inherits directly from SimpleAnalysisBase
157 use base qw(Bio::Tools::Analysis::SimpleAnalysisBase);
160 #global vars are now file-scoped lexicals
162 my $URL = 'http://rulai.cshl.org/cgi-bin/tools/ESE/esefinder.cgi';
164 my $ANALYSIS_NAME = 'ESEfinder';
167 my $ANALYSIS_SPEC =
169 'name' => 'ESEfinder',
170 'type' => 'DNA', #compulsory entry as is used for seq checking
171 'version' => '2.0',
172 'supplier' => 'Krainer lab, Cold Spring Harbor Laboratory, POBOX100, Bungtown Rd, COld Spring Harbor, NY, USA',
173 'description' => 'to identify exonic splicing elements in human transcripts',
176 my $INPUT_SPEC =
178 'mandatory' => 'true',
179 'type' => 'Bio::PrimarySeqI',
180 'name' => 'sequence',
183 my $RESULT_SPEC =
185 '' => 'bulk', # same as undef
186 'Bio::SeqFeatureI' => 'ARRAY of Bio::SeqFeature::Generic',
187 'raw' => 'Array of [ SR_protein, position, motif, score]',
188 'all' => 'Bio::Seq::Meta::Array object'
192 ### unique to this module ##
193 sub _init {
194 ## fills in fixed data for class ##
195 my $self = shift;
196 $self->url($URL);
197 $self->{'_ANALYSIS_SPEC'} =$ANALYSIS_SPEC;
198 $self->{'_INPUT_SPEC'} =$INPUT_SPEC;
199 $self->{'_RESULT_SPEC'} =$RESULT_SPEC;
200 $self->{'_ANALYSIS_NAME'} =$ANALYSIS_NAME;
201 return $self;
205 sub _run {
206 my $self = shift;
207 my $seq_fasta;
208 my $stringfh = IO::String->new($seq_fasta);
209 my $seqout = Bio::SeqIO->new(-fh => $stringfh,
210 -format => 'fasta');
211 $seqout->write_seq($self->seq);
212 $self->debug($seq_fasta);
213 $self->delay(1);
214 # delay repeated calls by default by 3 sec, set delay() to change
215 $self->sleep;
216 $self->status('TERMINATED_BY_ERROR');
218 my $request = POST $self->url,
219 #Content_Type => 'x-www-form-urlencoded',
220 Content => [
221 protein1 => 1,
222 protein2 => 1,
223 protein3 => 1,
224 protein4 => 1,
225 radio_sf2 => 0,
226 radio_sc35 => 0,
227 radio_srp40 => 0,
228 radio_srp55 => 0,
229 sequence =>$seq_fasta,
231 my $content = $self->request($request);
232 if( $content->is_error ) {
233 $self->throw(ref($self)." Request Error:\n".$content->as_string);
236 my $text = $content->content; #1st reponse
237 my ($tmpfile) = $text =~ /value="(tmp\/.+txt)"/;
238 # now get data for all residues #
239 my $rq2 = POST 'http://rulai.cshl.org/cgi-bin/tools/ESE/resultfile.txt',
240 #Content_Type => 'x-www-form-urlencoded',
241 Content => [
242 fname => $tmpfile,
244 my $ua2 = Bio::WebAgent->new();
245 my $content2 = $ua2->request($rq2);
246 if( $content2->is_error ) {
247 $self->throw(ref($self)." Request Error:\n".$content2->as_string);
250 my $text2 = $content2->content;
251 $self->{'_result'} = $text2;
252 $self->status('COMPLETED') if $text2 ne '';
254 #print Dumper $response;
258 sub result {
260 #make sec feat of above threshold scores #
262 my ($self,$value) = @_;
264 my @sig_pdctns;
265 my @fts;
267 if ($value ) {
268 my $result = IO::String->new($self->{'_result'});
269 my $current_SR;
270 my $all_st_flag = 0;
271 my %all;
272 while (my $line = <$result>) {
273 #make array of all scores or threshold depending on $value
274 last if $line =~ /^All scores/ && $value ne 'all' or $line =~ /2001,/;
275 $all_st_flag++ if $line =~ /All scores/;
276 next if $value eq 'all' && $all_st_flag == 0;
278 #parse line
279 if ($line =~ /^Protein/) {
280 ($current_SR) = $line =~/:\s+(\S+)/;
281 $current_SR =~ s{/}{_}; # remove unallowed charcters from hash
283 if ( $line =~/^\d+/ && $value ne 'all') {
284 push @sig_pdctns, [$current_SR, split /\s+/, $line] ;
285 } elsif ($line =~ /^\d+/) {
287 push @{$all{$current_SR}}, [split /\s+/, $line];
291 if ($value eq 'Bio::SeqFeatureI') {
292 foreach (@sig_pdctns) {
293 #make new ese object for each row of results
294 push @fts, Bio::SeqFeature::Generic->new
296 -start => $_->[1],
297 -end => $_->[1] + length($_->[2]) -1,
298 -source => 'ESEfinder',
299 -primary => 'ESE',
300 -tag =>{
301 score =>$_->[3],
302 motif=> $_->[2],
303 SR_protein=> $_->[0],
304 method=> 'ESEfinder',
308 return @fts;
310 ## convert parsed data into a meta array format
311 elsif ($value eq 'all') {
312 bless ($self->seq, "Bio::Seq::Meta::Array");
313 $self->seq->isa("Bio::Seq::MetaI")
314 || $self->throw("$self is not a Bio::Seq::MetaI");
316 for my $prot (keys %all) {
317 my @meta;
318 my $len = scalar @{$all{$prot}} ;
319 for (my $i = 0; $i < $len; $i++ ) {
320 $meta[$i] = $all{$prot}[$i][2];
323 # assign default name here so that the
324 # Bio::Seq::Meta::Array can work for all classes
325 # implementing it and we can avoid having to make
326 # asubclass for each implementation
328 $Bio::Seq::Meta::Array::DEFAULT_NAME = "ESEfinder_SRp55";
329 my $meta_name = $self->analysis_spec->{'name'} . "_" . "$prot";
330 $self->seq->named_meta($meta_name,\@meta );
332 # return seq array object implementing meta sequence #
333 return $self->seq;
336 #return ref to array of arrays
337 return \@sig_pdctns;
339 return $self->{'_result'};