2 # BioPerl module for Bio::Tools::Analysis::DNA::ESEfinder
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
8 # Copyright Richard Adams
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Tools::Analysis::DNA::ESEfinder - a wrapper around ESEfinder
21 use Bio::Tools::Analysis::DNA::ESEfinder;
24 my $seq; # a Bio::PrimarySeqI or Bio::SeqI object
27 -primary_id => 'test',
28 -seq=>'atgcatgctaggtgtgtgttttgtgggttgtactagctagtgat'.
31 my $ese_finder = Bio::Tools::Analysis::DNA::ESEfinder->
34 # run ESEfinder prediction on a DNA sequence
37 die "Could not get a result"
38 unless $ese_finder->status =~ /^COMPLETED/;
40 print $ese_finder->result; # print raw prediction to STDOUT
42 foreach my $feat ( $ese_finder->result('Bio::SeqFeatureI') ) {
44 # do something to SeqFeature
46 print $feat->gff_string, "\n";
47 # or store within the sequence - if it is a Bio::SeqI
48 $seq->add_SeqFeature($feat)
54 This class is a wrapper around the ESEfinder web server which uses
55 experimentally defined scoring matrices to identify possible exonic
56 splicing enhancers in human transcripts.
58 The results can be retrieved in 4 ways.
64 C<$ese_finder-E<gt>result('')> retrieves the raw text output of the
69 C<$ese_finder-E<gt>result('all')> returns a Bio::Seq::Meta::Array object
70 with prediction scores for all residues in the sequence
75 C<$ese_finder-E<gt>result('Bio::SeqFeatureI')> returns an array of
76 Bio::SeqFeature objects for sequences with significant scores. Feature
77 tags are score, motif, SR_protein and method
81 C<$ese_finder-E<gt>result('raw')> returns an array of significant matches
82 with each element being a reference to [SR_protein, position, motif,
87 See L<http://rulai.cshl.edu/tools/ESE2/>
89 This the second implementation of Bio::SimpleAnalysisI which hopefully
90 will make it easier to write wrappers on various services. This class
91 uses a web resource and therefore inherits from L<Bio::WebAgent>.
95 L<Bio::SimpleAnalysisI>,
102 User feedback is an integral part of the evolution of this and other
103 Bioperl modules. Send your comments and suggestions preferably to one
104 of the Bioperl mailing lists. Your participation is much appreciated.
106 bioperl-l@bioperl.org - General discussion
107 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
111 Please direct usage questions or support issues to the mailing list:
113 I<bioperl-l@bioperl.org>
115 rather than to the module maintainer directly. Many experienced and
116 reponsive experts will be able look at the problem and quickly
117 address it. Please include a thorough description of the problem
118 with code and data examples if at all possible.
120 =head2 Reporting Bugs
122 Report bugs to the Bioperl bug tracking system to help us keep track
123 the bugs and their resolution. Bug reports can be submitted via the
126 https://github.com/bioperl/bioperl-live/issues
130 Richard Adams, Richard.Adams@ed.ac.uk,
131 Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
135 The rest of the documentation details each of the object
136 methods. Internal methods are usually preceded with a _
141 # Let the code begin...
144 package Bio
::Tools
::Analysis
::DNA
::ESEfinder
;
149 use HTTP
::Request
::Common qw
(POST
);
150 use HTML
::HeadParser
;
151 use Bio
::SeqFeature
::Generic
;
152 use Bio
::Seq
::Meta
::Array
;
156 #inherits directly from SimpleAnalysisBase
157 use base
qw(Bio::Tools::Analysis::SimpleAnalysisBase);
160 #global vars are now file-scoped lexicals
162 my $URL = 'http://rulai.cshl.org/cgi-bin/tools/ESE/esefinder.cgi';
164 my $ANALYSIS_NAME = 'ESEfinder';
169 'name' => 'ESEfinder',
170 'type' => 'DNA', #compulsory entry as is used for seq checking
172 'supplier' => 'Krainer lab, Cold Spring Harbor Laboratory, POBOX100, Bungtown Rd, COld Spring Harbor, NY, USA',
173 'description' => 'to identify exonic splicing elements in human transcripts',
178 'mandatory' => 'true',
179 'type' => 'Bio::PrimarySeqI',
180 'name' => 'sequence',
185 '' => 'bulk', # same as undef
186 'Bio::SeqFeatureI' => 'ARRAY of Bio::SeqFeature::Generic',
187 'raw' => 'Array of [ SR_protein, position, motif, score]',
188 'all' => 'Bio::Seq::Meta::Array object'
192 ### unique to this module ##
194 ## fills in fixed data for class ##
197 $self->{'_ANALYSIS_SPEC'} =$ANALYSIS_SPEC;
198 $self->{'_INPUT_SPEC'} =$INPUT_SPEC;
199 $self->{'_RESULT_SPEC'} =$RESULT_SPEC;
200 $self->{'_ANALYSIS_NAME'} =$ANALYSIS_NAME;
208 my $stringfh = IO
::String
->new($seq_fasta);
209 my $seqout = Bio
::SeqIO
->new(-fh
=> $stringfh,
211 $seqout->write_seq($self->seq);
212 $self->debug($seq_fasta);
214 # delay repeated calls by default by 3 sec, set delay() to change
216 $self->status('TERMINATED_BY_ERROR');
218 my $request = POST
$self->url,
219 #Content_Type => 'x-www-form-urlencoded',
229 sequence
=>$seq_fasta,
231 my $content = $self->request($request);
232 if( $content->is_error ) {
233 $self->throw(ref($self)." Request Error:\n".$content->as_string);
236 my $text = $content->content; #1st reponse
237 my ($tmpfile) = $text =~ /value="(tmp\/.+txt
)"/;
238 # now get data for all residues #
239 my $rq2 = POST 'http://rulai.cshl.org/cgi-bin/tools/ESE/resultfile.txt',
240 #Content_Type => 'x-www-form-urlencoded',
244 my $ua2 = Bio::WebAgent->new();
245 my $content2 = $ua2->request($rq2);
246 if( $content2->is_error ) {
247 $self->throw(ref($self)." Request Error
:\n".$content2->as_string);
250 my $text2 = $content2->content;
251 $self->{'_result'} = $text2;
252 $self->status('COMPLETED') if $text2 ne '';
254 #print Dumper $response;
260 #make sec feat of above threshold scores #
262 my ($self,$value) = @_;
268 my $result = IO::String->new($self->{'_result'});
272 while (my $line = <$result>) {
273 #make array of all scores or threshold depending on $value
274 last if $line =~ /^All scores/ && $value ne 'all' or $line =~ /2001,/;
275 $all_st_flag++ if $line =~ /All scores/;
276 next if $value eq 'all' && $all_st_flag == 0;
279 if ($line =~ /^Protein/) {
280 ($current_SR) = $line =~/:\s+(\S+)/;
281 $current_SR =~ s{/}{_}; # remove unallowed charcters from hash
283 if ( $line =~/^\d+/ && $value ne 'all') {
284 push @sig_pdctns, [$current_SR, split /\s+/, $line] ;
285 } elsif ($line =~ /^\d+/) {
287 push @{$all{$current_SR}}, [split /\s+/, $line];
291 if ($value eq 'Bio::SeqFeatureI') {
292 foreach (@sig_pdctns) {
293 #make new ese object for each row of results
294 push @fts, Bio::SeqFeature::Generic->new
297 -end => $_->[1] + length($_->[2]) -1,
298 -source => 'ESEfinder',
303 SR_protein=> $_->[0],
304 method=> 'ESEfinder',
310 ## convert parsed data into a meta array format
311 elsif ($value eq 'all') {
312 bless ($self->seq, "Bio
::Seq
::Meta
::Array
");
313 $self->seq->isa("Bio
::Seq
::MetaI
")
314 || $self->throw("$self is
not a Bio
::Seq
::MetaI
");
316 for my $prot (keys %all) {
318 my $len = scalar @{$all{$prot}} ;
319 for (my $i = 0; $i < $len; $i++ ) {
320 $meta[$i] = $all{$prot}[$i][2];
323 # assign default name here so that the
324 # Bio::Seq::Meta::Array can work for all classes
325 # implementing it and we can avoid having to make
326 # asubclass for each implementation
328 $Bio::Seq::Meta::Array::DEFAULT_NAME = "ESEfinder_SRp55
";
329 my $meta_name = $self->analysis_spec->{'name'} . "_
" . "$prot";
330 $self->seq->named_meta($meta_name,\@meta );
332 # return seq array object implementing meta sequence #
336 #return ref to array of arrays
339 return $self->{'_result'};