Supply TEMPLATE and SUFFIX for temporary query sequence files.
[bioperl-run.git] / scripts / papplmaker.PLS
blobc11bd11b344edbc2705c67793c953fa97d141788
1 #!/usr/bin/perl -w
3 # Usage: ./papplmaker -h
4 # or: perldoc papplmaker
6 # senger@ebi.ac.uk
7 # July 2002
9 # $Id: papplmaker.PLS,v 1.8 2006-07-04 22:23:36 mauricio Exp $
10 #-----------------------------------------------------------------------------
12 use strict;
14 sub get_usage {
15 return <<"END_OF_USAGE";
16 Usage:
17 papplmaker [options]
19 where 'options' are:
20 -a <access> access method (default 'soap')
21 -l <location> where are the analyses
22 (default: http://www.ebi.ac.uk/soaplab/services)
23 -n <name> name of an analysis,
24 (default: generate for all available, unless -r given)
25 -r <regexp> regular expression for matching analysis names
26 (default: generate for all available, unless -n given)
27 -m <module> name of generated module,
28 can be a template (\$ANALYSIS, \$CATEGORY, \$SERVICE)
29 (default: same as service name)
30 -d <directory> output directory
31 (default: current directory)
32 -f! overwrite existing files
33 (default: skip generation of the existing files)
34 -s, -S show what and where will be generated but do not do it
35 -h this help
37 Environment variables:
38 HTTPPROXY HTTP proxy server (use by some access method,
39 e.g. by the SOAP access)
41 For more details type: perldoc papplmaker.PLS
43 END_OF_USAGE
46 BEGIN {
47 # add path to the directory with this script
48 my $mylib;
49 ($mylib = $0) =~ s|/[^/]+$||;
50 unshift @INC, $mylib;
52 use vars qw($VERSION $Revision $Cmdline);
53 $Cmdline = join (' ', @ARGV);
55 # be prepare for command-line options/arguments
56 use Getopt::Std;
57 use vars qw/ $opt_h $opt_a $opt_l $opt_n $opt_r $opt_m /;
58 use vars qw/ $opt_d $opt_f $opt_v $opt_s $opt_S /;
59 my $switches = 'adflmnr'; # switches taking an argument (a value)
60 getopt ($switches);
63 # set the version for version checking
64 $VERSION = do { my @r = (q[$Revision: 1.8 $] =~ /\d+/g); sprintf "%d.%-02d", @r };
65 $Revision = q[$Id: papplmaker.PLS,v 1.8 2006-07-04 22:23:36 mauricio Exp $];
67 # help wanted?
68 if ($opt_h) {
69 print get_usage;
70 exit 0;
73 # print version of this script and exit
74 if ($opt_v) {
75 print "$0 $VERSION\n";
76 exit 0;
80 use Bio::Tools::Run::Analysis; # to access analysis tools directly
81 use Bio::Tools::Run::AnalysisFactory; # to access list/factory of analysis tools
83 # --- create a factory object;
84 my @access = ('-access', $opt_a) if defined $opt_a;
85 my @location = ('-location', $opt_l) if defined $opt_l;
86 my @httpproxy = ('-httpproxy', $ENV{'HTTPPROXY'}) if defined $ENV{'HTTPPROXY'};
87 my $factory = new Bio::Tools::Run::AnalysisFactory (@location, @httpproxy)
88 unless $opt_n;
90 # --- create an analysis (service) object;
91 my @name = ('-name', $opt_n) if defined $opt_n;
92 my $service = new Bio::Tools::Run::Analysis (@name, @location, @httpproxy);
94 # find names of services we are going to generate stubs for
95 my (@names) = $opt_n ? ($opt_n) : @{ $factory->available_analyses };
96 @names = grep /$opt_r/i, @names if $opt_r;
97 print (join ("\n", @names), "\n") and exit 0 if $opt_s;
98 $opt_f = 'no' unless $opt_f;
100 for my $name (@names) {
102 # service name can consist of category and analysis name
103 my ($category, $analysis) = split (/\./, $name, 2);
104 unless ($analysis) { # swap them if category does not exist
105 $analysis = $category;
106 $category = undef;
109 # module may be a template for a real name
110 my $module = $opt_m ? $opt_m : $name;
111 $module =~ s/\$\{?SERVICE\}?/$name/ig;
112 $module =~ s/\$\{?ANALYSIS\}?/$analysis/ig;
113 $module =~ s/\$\{?CATEGORY\}?/$category/ig if $category;
114 $module =~ s/[ -\/]/_/g; # would cause troubles in module name (?)
116 # destination for generation
117 my $file = $module;
118 $file =~ s/[ \/]/_/g;
119 $file =~ s|::|/|g;
120 if ($opt_d) {
121 $opt_d .= '/' unless $opt_d =~ m|/$|;
122 $file = "$opt_d$file";
124 $file .= '.pm';
126 # show what would happen... or do it
127 if ($opt_S) {
128 print "SERVICE: $name\n\tMODULE: $module\n\tFILE: $file\n";
129 } else {
131 # create a service object (we will ask it for service metadata)
132 $service = $factory->create_analysis ($name) unless ($opt_n);
133 # $service = $service->new (-name => $name); # an alternative if 'create_analysis' does not work
135 # get metadata from the service
136 print "Accessing service '$name'...\n";
137 my $input_spec = $service->input_spec;
138 my $result_spec = $service->result_spec;
140 # create necessary directories
141 my $dirs = $file;
142 while ($dirs =~ s|/[^/]*$||) {
143 next unless $dirs;
144 mkdir ($dirs) or die "Directory '$dirs': $!\n" unless -d $dirs;
147 # generate and write a module for the service
148 print "Creating module '$module'...\n";
149 print ("\tFile '$file' exists, skipped...\n") and next if -s $file and $opt_f ne '!';
150 open (MODULE, ">$file") or die "File '$file': $!\n";
151 print MODULE &generate ($service, $file, $module, $input_spec, $result_spec);
152 close (MODULE) or die "File '$file': $!\n";
153 print "\tFile '$file' created\n";
157 # generated method names are created from the data input and result
158 # nams which may have some strange syntax not allowed in Perl - so
159 # make them more perlish here
160 sub esc_method_name {
161 my ($name) = shift;
162 $name =~ s/\W/_/g;
163 $name = "_$name" if $name =~ /^\d/;
164 return $name;
168 sub generate {
169 my ($service, $file, $module, $input_spec, $result_spec) = @_;
171 my $code1 = <<'EOC',
172 use Bio::Tools::Run::Analysis;
173 use vars qw(@ISA $AUTOLOAD);
175 sub new {
176 my ($class, @args) = @_;
177 my $parent = new Bio::Tools::Run::Analysis (-access => '$$$ACCESS$$$',
178 -location => '$$$LOCATION$$$',
179 -name => '$$$SERVICE$$$',
180 @args
182 @ISA = (ref $parent);
183 $self->{'_inputs'} = {};
184 bless $parent, '$$$PACKAGE$$$';
187 sub create_job {
188 my ($self, $params) = @_;
189 my $parent_object;
190 if (! defined $params) {
191 $parent_object =
192 $self->SUPER::create_job ( $self->_prepare_inputs ($self->{'_inputs'}) );
193 } elsif (ref $params) {
194 $parent_object =
195 $self->SUPER::create_job ( $self->_prepare_inputs ($self->{'_inputs'}, $params) );
196 } else {
197 $parent_object =
198 $self->SUPER::create_job ($params);
200 my $parent_ref = ref $parent_object;
201 my $job = bless $parent_object, '$$$PACKAGE$$$::Job';
202 $job->_isa ($parent_ref);
203 $job;
206 sub AUTOLOAD {
207 my $method = substr ($AUTOLOAD, rindex ($AUTOLOAD, '::') + 2);
208 return if $method eq 'DESTROY';
210 die "Unrecognized method '$method'.\n" .
211 "List of available methods for setting input data:\n\t" .
212 join ("\n\t", sort keys %set_methods) . "\n";
215 { no strict 'refs';
216 foreach my $method_name ( keys %set_methods ) {
217 my %method_def = %{ $set_methods{$method_name} };
218 *$method_name = sub {
219 my $self = shift;
220 my $value = $self->_read_value (shift);
221 if (@{ $method_def{'allowed_values'} } > 0) {
222 my $found;
223 foreach my $allowed ( @{ $method_def{'allowed_values'} } ) {
224 ($found = 1, last) if ($value eq $allowed);
226 warn ("Possibly '$value' is not allowed. Allowed values are:\n\t" .
227 join ("\n\t", @{ $method_def{'allowed_values'} }) . "\n")
228 unless $found;
230 ${ $self->{'_inputs'} }{ $method_def{'input_name'} } = $value;
231 $self;
237 my $code2 = <<'EOC',
238 use vars qw(@ISA $AUTOLOAD);
240 sub _isa { @ISA = $_[1] }
242 sub AUTOLOAD {
243 my $method = substr ($AUTOLOAD, rindex ($AUTOLOAD, '::') + 2);
244 return if $method eq 'DESTROY';
246 die "Unrecognized method '$method'.\n" .
247 "List of available methods for getting result data:\n\t" .
248 join ("\n\t", sort keys %get_methods) . "\n";
251 { no strict 'refs';
252 foreach my $method_name ( keys %get_methods ) {
253 my %method_def = %{ $get_methods{$method_name} };
254 my $result_name = $method_def{'result_name'};
255 *$method_name = sub {
256 my $self = shift;
257 ${ $self->results ( { $result_name => shift } ) } {$result_name};
263 my %replace = ( '\$\$\$ACCESS\$\$\$' => $$service{'_access'},
264 '\$\$\$LOCATION\$\$\$' => $$service{'_location'},
265 '\$\$\$SERVICE\$\$\$' => $$service{'_name'},
266 '\$\$\$PACKAGE\$\$\$' => $module,
268 foreach (keys %replace) {
269 $code1 =~ s/$_/$replace{$_}/eg;
273 # --- here starts the generated output
275 join ("\n",
276 "package $module;\n",
278 "# -- generated by Bio::Tools::Run::Analysis papplmaker (v$VERSION)",
279 "# -- Copyright (C) 2003 Martin Senger and EMBL-EBI",
280 "# -- generated from location: $service->{'_location'}",
281 "# -- using command line:",
282 "# -- $0 $Cmdline",
283 "# -- [" . localtime() . "]\n",
285 'my %set_methods = (',
287 (map { my %input = %$_;
288 my $method_name = &esc_method_name ($input{'name'});
289 my @attrs = (" input_name => '$input{'name'}',");
290 push (@attrs, " mandatory => '$input{'mandatory'}',") if defined $input{'mandatory'};
291 push (@attrs, " default => '$input{'default'}',") if defined $input{'default'};
292 push (@attrs, " type => '$input{'type'}',") if $input{'type'};
293 join("\n",
294 " $method_name => {",
295 @attrs,
296 ' allowed_values => [' .
297 ($input{'allowed_values'} ? join (',', map "'$_'", @{ $input{'allowed_values'} }) : '') .
298 ']',
299 ' },'
301 } @{ $input_spec} ),
302 ");",
304 $code1,
306 "\n",
307 "package ${module}::Job;\n",
309 'my %get_methods = (',
311 (map { my %result = %$_;
312 my $method_name = &esc_method_name ($result{'name'});
313 my @attrs = (" result_name => '$result{'name'}',");
314 push (@attrs, " type => '$result{'type'}',") if $result{'type'};
315 join("\n",
316 " $method_name => {",
317 @attrs,
318 ' },'
320 } @{ $result_spec} ),
321 ");",
323 $code2,
324 <<'EOC');
329 __END__
331 =head1 NAME
333 papplmaker.PLS - Analysis tools module generator
335 =head1 SYNOPSIS
337 # get some help
338 papplmaker.PLS -h
340 # generate module for program 'seqret'
341 papplmaker.PLS -n edit.seqret
343 # ditto, but specify where to find 'seqret'
344 papplmaker.PLS -n edit::seqret
345 -l http://localhost:8080/axis/services
347 # ditto, but specify a non-default access method to 'seqret'
348 papplmaker.PLS -n edit::seqret
349 -l http://corba.ebi.ac.uk/IOR/Analyses.ref
350 -a corba
352 # generate modules for all available analyses
353 # (using default location and default access method)
354 papplmaker.PLS
356 # do not generate but see what would be generated
357 papplmaker.PLS -s
358 papplmaker.PLS -S
360 # generate module for analysis 'edit::seqret'
361 # but name it 'MySeqret'
362 papplmaker.PLS -n edit::seqret -m MySeqret
364 # ...and use it
365 use MySeqret;
366 print new MySeqret->sequence_direct_data ('tatatacccgt')
367 ->osformat ('embl')
368 ->wait_for
369 ->outseq;
371 # ditto but put the result into directory '/tmp/my'
372 # (directories do not need to exist)
373 papplmaker.PLS -n edit::seqret -m MySeqret -d /tmp/my/
375 # generate modules for all analysis whose names
376 # matches given regular expression (case insensitive)
377 papplmaker.PLS -r 'edit'
379 # ditto, but name generated module with your own names
380 # (letting papplmaker.PLS substitute parts of your names)
381 papplmaker.PLS -r 'edit' -m 'My_$ANALYSIS'
383 =head1 DESCRIPTION
385 The module C<Bio::Tools::Run::Analysis> provides access to the local and remote
386 analysis tools in a unified way (defined in C<Bio::AnalysisI>). The
387 module uses general approach allowing to set arbitrary input data and
388 to retrieve results by naming them. However, sometimes is more
389 convenient to use a specific module, representing one analysis tool,
390 that already knows about available input and result names.
392 The generator C<papplmaker.PLS> creates such dedicated modules.
394 C<papplmaker.PLS> uses the same access method as the general module -
395 which means that depending on the parameter C<access> it can use SOAP,
396 CORBA or any other (supported) protocol, or it can access local
397 analysis (available on the same machine where C<papplmaker.PLS> is
398 invoked).
400 C<papplmaker.PLS> does its job either for one named analysis (specified
401 by the C<-n> option, or it uses C<Bio::Tools::Run::AnalysisFactory> module in
402 order to find what analyses are available, and can limit their number
403 by matching against a regular expression given by the C<-r> option.
405 The generated module or modules are named by default similarly to the
406 names of the corresponding analyses, but this can be changed by the
407 C<-m> option which is actually a template where the following strings
408 are recognised and replaced:
410 =over 4
412 =item $ANALYSIS or ${ANALYSIS}
414 Will be replaced by the name of the analysis.
416 =item $CATEGORY or ${CATEGORY}
418 Will be replaced by the name of the category where the analysis belongs to.
420 =item $SERVICE or ${SERVICE}
422 Will be replaced by the entire name of the service (which is usually a
423 concatenation of a category and a analysis name, and it is used also
424 as a default module name, btw).
426 =back
428 What is a difference between the C<service> and C<analysis>, and what
429 does C<category> mean? Sometimes these terms may be confusing because
430 they may mean slightly different things depending on the access method
431 used to communicate with them. Generally, an C<analysis> is a program
432 (an application, a tool) running somewhere, but sometimes on a local
433 machine. An example of an analysis is C<seqret> (from the EMBOSS
434 package). The analyses can be grouped into categories by their
435 functions or by type of data they deal with (but sometimes there are
436 no categories at all). Each analyses can be accessed using a higher
437 level of abstraction, a C<service>. A service is usually a
438 protocol-dependent wrapper, such as a Web Service, or a CORBA
439 service. For example there is a C<edit::seqret> service which
440 represents analysis C<seqret> in the category C<edit>.
442 =head1 FEEDBACK
444 =head2 Mailing Lists
446 User feedback is an integral part of the evolution of this and other
447 Bioperl modules. Send your comments and suggestions preferably to
448 the Bioperl mailing list. Your participation is much appreciated.
450 bioperl-l@bioperl.org - General discussion
451 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
453 =head2 Reporting Bugs
455 Report bugs to the Bioperl bug tracking system to help us keep track
456 of the bugs and their resolution. Bug reports can be submitted via the
457 web:
459 http://redmine.open-bio.org/projects/bioperl/
461 =head1 AUTHOR
463 Martin Senger (senger@ebi.ac.uk)
465 =head1 COPYRIGHT
467 Copyright (c) 2003, Martin Senger and EMBL-EBI.
468 All Rights Reserved.
470 This script is free software; you can redistribute it and/or modify
471 it under the same terms as Perl itself.
473 =head1 DISCLAIMER
475 This software is provided "as is" without warranty of any kind.
477 =head1 BUGS AND LIMITATIONS
479 None known at the time of writing this.
481 =cut