sync with trunk to r15684
[bioperl-live.git] / Bio / Root / Utilities.pm
blob3cdaca6d67d49d6ecc40f0ed2545adf3546cc2e8
1 package Bio::Root::Utilities;
2 use strict;
4 # $Id$
6 =head1 NAME
8 Bio::Root::Utilities - General-purpose utility module
10 =head1 SYNOPSIS
12 =head2 Object Creation
14 # Using the supplied singleton object:
15 use Bio::Root::Utilities qw(:obj);
16 $Util->some_method();
18 # Create an object manually:
19 use Bio::Root::Utilities;
20 my $util = Bio::Root::Utilities->new();
21 $util->some_method();
23 $date_stamp = $Util->date_format('yyy-mm-dd');
25 $clean = $Util->untaint($dirty);
27 $compressed = $Util->compress('/home/me/myfile.txt')
29 my ($mean, $stdev) = $Util->mean_stdev( @data );
31 $Util->authority("me@example.com");
32 $Util->mail_authority("Something you should know about...");
34 ...and a host of other methods. See below.
36 =head1 DESCRIPTION
38 Provides general-purpose utilities of potential interest to any Perl script.
40 The C<:obj> tag is a convenience that imports a $Util symbol into your
41 namespace representing a Bio::Root::Utilities object. This saves you
42 from creating your own Bio::Root::Utilities object via
43 C<Bio::Root::Utilities-E<gt>new()> or from prefixing all method calls with
44 C<Bio::Root::Utilities>, though feel free to do these things if desired.
45 Since there should normally not be a need for a script to have more
46 than one Bio::Root::Utilities object, this module thus comes with it's
47 own singleton.
49 =head1 INSTALLATION
51 This module is included with the central Bioperl distribution:
53 http://www.bioperl.org/wiki/Getting_BioPerl
54 ftp://bio.perl.org/pub/DIST
56 Follow the installation instructions included in the README file.
58 =head1 DEPENDENCIES
60 Inherits from L<Bio::Root::Root>, and uses L<Bio::Root::IO>
61 and L<Bio::Root::Exception>.
63 Relies on external executables for file compression/uncompression
64 and sending mail. No paths to these are hard coded but are located
65 as needed.
67 =head1 SEE ALSO
69 http://bioperl.org - Bioperl Project Homepage
71 =head1 FEEDBACK
73 =head2 Mailing Lists
75 User feedback is an integral part of the evolution of this and other Bioperl modules.
76 Send your comments and suggestions preferably to one of the Bioperl mailing lists.
77 Your participation is much appreciated.
79 bioperl-l@bioperl.org - General discussion
80 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
82 =head2 Support
84 Please direct usage questions or support issues to the mailing list:
86 L<bioperl-l@bioperl.org>
88 rather than to the module maintainer directly. Many experienced and
89 reponsive experts will be able look at the problem and quickly
90 address it. Please include a thorough description of the problem
91 with code and data examples if at all possible.
93 =head2 Reporting Bugs
95 Report bugs to the Bioperl bug tracking system to help us keep track
96 the bugs and their resolution. Bug reports can be submitted via the
97 web:
99 http://bugzilla.open-bio.org/
101 =head1 AUTHOR
103 Steve Chervitz E<lt>sac@bioperl.orgE<gt>
105 See L<the FEEDBACK section | FEEDBACK> for where to send bug reports and comments.
108 =head1 ACKNOWLEDGEMENTS
110 This module was originally developed under the auspices of the
111 Saccharomyces Genome Database: http://genome-www.stanford.edu/Saccharomyces
113 =head1 COPYRIGHT
115 Copyright (c) 1996-2007 Steve Chervitz. All Rights Reserved.
116 This module is free software; you can redistribute it and/or
117 modify it under the same terms as Perl itself.
119 =cut
121 =head1 APPENDIX
123 Methods beginning with a leading underscore are considered private
124 and are intended for internal use by this module. They are
125 B<not> considered part of the public interface and are described here
126 for documentation purposes only.
128 =cut
130 # Let the code begin...
132 use Bio::Root::IO;
133 use Bio::Root::Exception;
135 use vars qw(@EXPORT_OK %EXPORT_TAGS);
136 use base qw(Bio::Root::Root Exporter);
137 @EXPORT_OK = qw($Util);
138 %EXPORT_TAGS = ( obj => [qw($Util)],
139 std => [qw($Util)],);
141 use vars qw($ID $Util $GNU_PATH $TIMEOUT_SECS
142 @COMPRESSION_UTILS @UNCOMPRESSION_UTILS
143 $DEFAULT_NEWLINE $NEWLINE $AUTHORITY
144 @MONTHS @DAYS $BASE_YEAR $DEFAULT_CENTURY
147 $ID = 'Bio::Root::Utilities';
148 # Number of seconds to wait before timing out when reading input (taste_file())
149 $TIMEOUT_SECS = 30;
150 $NEWLINE = $ENV{'NEWLINE'} || undef;
151 $BASE_YEAR = 1900; # perl's localtime() assumes this for it's year data.
152 # TODO: update this every hundred years. Y2K-sensitive code.
153 $DEFAULT_CENTURY = $BASE_YEAR + 100;
154 @MONTHS = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
155 @DAYS = qw(Sun Mon Tue Wed Thu Fri Sat);
156 # Sets the preference for compression utilities to be used by compress().
157 # The first executable in this list to be found in the current PATH will be used,
158 # unless overridden in the call to that function. See docs for details.
159 @COMPRESSION_UTILS = qw(gzip bzip2 zip compress);
160 @UNCOMPRESSION_UTILS = qw(gunzip bunzip2 unzip uncompress);
162 # Default person to receive feedback from users and possibly automatic error messages.
163 $AUTHORITY = '';
165 # Note: $GNU_PATH is now deprecated, shouldn't be needed since now this module
166 # will automatically locate the compression utility in the current PATH.
167 # Retaining $GNU_PATH for backward compatibility.
169 # $GNU_PATH points to the directory containing the gzip and gunzip
170 # executables. It may be required for executing gzip/gunzip
171 # in some situations (e.g., when $ENV{PATH} doesn't contain this dir.
172 # Customize $GNU_PATH for your site if the compress() or
173 # uncompress() functions are generating exceptions.
174 $GNU_PATH = '';
175 #$GNU_PATH = '/tools/gnu/bin/';
177 $DEFAULT_NEWLINE = "\012"; # \n (used if get_newline() fails for some reason)
179 ## Static UTIL object.
180 $Util = Bio::Root::Root->new();
183 =head2 date_format
185 Title : date_format
186 Usage : $Util->date_format( [FMT], [DATE])
187 Purpose : -- Get a string containing the formated date or time
188 : taken when this routine is invoked.
189 : -- Provides a way to avoid using `date`.
190 : -- Provides an interface to localtime().
191 : -- Interconverts some date formats.
193 : (For additional functionality, use Date::Manip or
194 : Date::DateCalc available from CPAN).
195 Example : $Util->date_format();
196 : $date = $Util->date_format('yyyy-mmm-dd', '11/22/92');
197 Returns : String (unless 'list' is provided as argument, see below)
199 : 'yyyy-mm-dd' = 1996-05-03 # default format.
200 : 'yyyy-dd-mm' = 1996-03-05
201 : 'yyyy-mmm-dd' = 1996-May-03
202 : 'd-m-y' = 3-May-1996
203 : 'd m y' = 3 May 1996
204 : 'dmy' = 3may96
205 : 'mdy' = May 3, 1996
206 : 'ymd' = 96may3
207 : 'md' = may3
208 : 'year' = 1996
209 : 'hms' = 23:01:59 # when not converting a format, 'hms' can be
210 : # tacked on to any of the above options
211 : # to add the time stamp: eg 'dmyhms'
212 : 'full' | 'unix' = UNIX-style date: Tue May 5 22:00:00 1998
213 : 'list' = the contents of localtime(time) in an array.
214 Argument : (all are optional)
215 : FMT = yyyy-mm-dd | yyyy-dd-mm | yyyy-mmm-dd |
216 : mdy | ymd | md | d-m-y | hms | hm
217 : ('hms' may be appended to any of these to
218 : add a time stamp)
220 : DATE = String containing date to be converted.
221 : Acceptable input formats:
222 : 12/1/97 (for 1 December 1997)
223 : 1997-12-01
224 : 1997-Dec-01
225 Throws :
226 Comments : If you don't care about formatting or using backticks, you can
227 : always use: $date = `date`;
229 : For more features, use Date::Manip.pm, (which I should
230 : probably switch to...)
232 See Also : L<file_date()|file_date>, L<month2num()|month2num>
234 =cut
236 #---------------'
237 sub date_format {
238 #---------------
239 my $self = shift;
240 my $option = shift;
241 my $date = shift; # optional date to be converted.
243 my($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst);
245 $option ||= 'yyyy-mm-dd';
247 my ($month_txt, $day_txt, $month_num, $fullYear);
248 my ($converting, @date);
250 # Load a supplied date for conversion:
251 if(defined($date) && ($date =~ /[\D-]+/)) {
252 $converting = 1;
253 if( $date =~ m{/}) {
254 ($mon,$mday,$year) = split(m{/}, $date);
255 } elsif($date =~ /(\d{4})-(\d{1,2})-(\d{1,2})/) {
256 ($year,$mon,$mday) = ($1, $2, $3);
257 } elsif($date =~ /(\d{4})-(\w{3,})-(\d{1,2})/) {
258 ($year,$mon,$mday) = ($1, $2, $3);
259 $mon = $self->month2num($2);
260 } else {
261 print STDERR "\n*** Unsupported input date format: $date\n";
263 if(length($year) == 4) {
264 $fullYear = $year;
265 $year = substr $year, 2;
266 } else {
267 # Heuristics to guess what century was intended when a 2-digit year is given
268 # If number is over 50, assume it's for prev century; under 50 = default century.
269 # TODO: keep an eye on this Y2K-sensitive code
270 if ($year > 50) {
271 $fullYear = $DEFAULT_CENTURY + $year - 100;
272 } else {
273 $fullYear = $DEFAULT_CENTURY + $year;
276 $mon -= 1;
277 } else {
278 ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = @date =
279 localtime(($date ? $date : time()));
280 return @date if $option =~ /list/i;
281 $fullYear = $BASE_YEAR+$year;
283 $month_txt = $MONTHS[$mon];
284 $day_txt = $DAYS[$wday] if defined $wday;
285 $month_num = $mon+1;
287 # print "sec: $sec, min: $min, hour: $hour, month: $mon, m-day: $mday, year: $year\nwday: $wday, yday: $yday, dst: $isdst";<STDIN>;
289 if( $option =~ /yyyy-mm-dd/i ) {
290 $date = sprintf "%4d-%02d-%02d",$fullYear,$month_num,$mday;
291 } elsif( $option =~ /yyyy-dd-mm/i ) {
292 $date = sprintf "%4d-%02d-%02d",$fullYear,$mday,$month_num;
293 } elsif( $option =~ /yyyy-mmm-dd/i ) {
294 $date = sprintf "%4d-%3s-%02d",$fullYear,$month_txt,$mday;
295 } elsif( $option =~ /full|unix/i ) {
296 $date = sprintf "%3s %3s %2d %02d:%02d:%02d %d",$day_txt, $month_txt, $mday, $hour, $min, $sec, $fullYear;
297 } elsif( $option =~ /mdy/i ) {
298 $date = "$month_txt $mday, $fullYear";
299 } elsif( $option =~ /ymd/i ) {
300 $date = $year."\l$month_txt$mday";
301 } elsif( $option =~ /dmy/i ) {
302 $date = $mday."\l$month_txt$year";
303 } elsif( $option =~ /md/i ) {
304 $date = "\l$month_txt$mday";
305 } elsif( $option =~ /d-m-y/i ) {
306 $date = "$mday-$month_txt-$fullYear";
307 } elsif( $option =~ /d m y/i ) {
308 $date = "$mday $month_txt $fullYear";
309 } elsif( $option =~ /year/i ) {
310 $date = $fullYear;
311 } elsif( $option =~ /dmy/i ) {
312 $date = $mday.'-'.$month_txt.'-'.$fullYear;
313 } elsif($option and $option !~ /hms/i) {
314 print STDERR "\n*** Unrecognized date format request: $option\n";
317 if( $option =~ /hms/i and not $converting) {
318 $date .= " $hour:$min:$sec" if $date;
319 $date ||= "$hour:$min:$sec";
322 return $date || join(" ", @date);
326 =head2 month2num
328 Title : month2num
329 Purpose : Converts a string containing a name of a month to integer
330 : representing the number of the month in the year.
331 Example : $Util->month2num("march"); # returns 3
332 Argument : The string argument must contain at least the first
333 : three characters of the month's name. Case insensitive.
334 Throws : Exception if the conversion fails.
336 =cut
338 #--------------'
339 sub month2num {
340 #--------------
342 my ($self, $str) = @_;
344 # Get string in proper format for conversion.
345 $str = substr($str, 0, 3);
346 for(0..$#MONTHS) {
347 return $_+1 if $str =~ /$MONTHS[$_]/i;
349 $self->throw("Invalid month name: $str");
352 =head2 num2month
354 Title : num2month
355 Purpose : Does the opposite of month2num.
356 : Converts a number into a string containing a name of a month.
357 Example : $Util->num2month(3); # returns 'Mar'
358 Throws : Exception if supplied number is out of range.
360 =cut
362 #-------------
363 sub num2month {
364 #-------------
365 my ($self, $num) = @_;
367 $self->throw("Month out of range: $num") if $num < 1 or $num > 12;
368 return $MONTHS[$num-1];
371 =head2 compress
373 Title : compress
374 Usage : $Util->compress(full-path-filename);
375 : $Util->compress(<named parameters>);
376 Purpose : Compress a file.
377 Example : $Util->compress("/usr/people/me/data.txt");
378 : $Util->compress(-file=>"/usr/people/me/data.txt",
379 : -tmp=>1,
380 : -outfile=>"/usr/people/share/data.txt.gz",
381 : -exe=>"/usr/local/bin/fancyzip");
382 Returns : String containing full, absolute path to compressed file
383 Argument : Named parameters (case-insensitive):
384 : -FILE => String (name of file to be compressed, full path).
385 : If the supplied filename ends with '.gz' or '.Z',
386 : that extension will be removed before attempting to compress.
387 : Optional:
388 : -TMP => boolean. If true, (or if user is not the owner of the file)
389 : the file is compressed to a temp file. If false, file may be
390 : clobbered with the compressed version (if using a utility like
391 : gzip, which is the default)
392 : -OUTFILE => String (name of the output compressed file, full path).
393 : -EXE => Name of executable for compression utility to use.
394 : Will supercede those in @COMPRESSION_UTILS defined by
395 : this module. If the absolute path to the executable is not provided,
396 : it will be searched in the PATH env variable.
397 Throws : Exception if file cannot be compressed.
398 : If user is not owner of the file, generates a warning and compresses to
399 : a tmp file. To avoid this warning, use the -o file test operator
400 : and call this function with -TMP=>1.
401 Comments : Attempts to compress using utilities defined in the @COMPRESSION_UTILS
402 : defined by this module, in the order defined. The first utility that is
403 : found to be executable will be used. Any utility defined in optional -EXE param
404 : will be tested for executability first.
405 : To minimize security risks, the -EXE parameter value is untained using
406 : the untaint() method of this module (in 'relaxed' mode to permit path separators).
408 See Also : L<uncompress()|uncompress>
410 =cut
412 #------------'
413 sub compress {
414 #------------
415 my ($self, @args) = @_;
416 # This method formerly didn't use named params and expected fileName, tmp
417 # in that order. This should be backward compatibile.
418 my ($fileName, $tmp, $outfile, $exe) = $self->_rearrange([qw(FILE TMP OUTFILE EXE)], @args);
419 my ($file, $get, $fmt);
421 # in case the supplied name already has a compressed extension
422 if($fileName =~ /(\.gz|\.Z|\.bz2|\.zip)$/) { $fileName =~ s/$1$//; };
423 $self->debug("compressing file $fileName");
425 my @util_to_use = @COMPRESSION_UTILS;
427 if (defined $exe){
428 $exe = $self->untaint($exe, 1);
429 unshift @util_to_use, $exe;
432 my @checked = @util_to_use;
433 $exe ||= '';
434 while (not -x $exe and scalar(@util_to_use)) {
435 $exe = $self->find_exe(shift @util_to_use);
438 unless (-x $exe) {
439 $self->throw("Can't find compression utility. Looked for @checked");
442 my ($compressed, @cmd, $handle);
444 if(defined($outfile) or $tmp or not -o $fileName) {
445 if (defined $outfile) {
446 $compressed = $outfile;
447 } else {
448 # obtain a temporary file name (not using the handle)
449 # and insert some special text to flag it as a bioperl-based temp file
450 my $io = Bio::Root::IO->new();
451 ($handle, $compressed) = $io->tempfile();
452 $compressed .= '.tmp.bioperl.gz';
455 if ($exe =~ /gzip|bzip2|compress/) {
456 @cmd = ("$exe -f < \"$fileName\" > \"$compressed\"");
457 } elsif ($exe eq 'zip') {
458 @cmd = ("$exe -r \"$fileName.zip\" \"$fileName\"");
460 not $tmp and
461 $self->warn("Not owner of file $fileName. Compressing to temp file $compressed.");
462 $tmp = 1;
463 } else {
464 # Need to compute the compressed name based on exe since we're returning it.
465 $compressed = $fileName;
466 if ($exe =~ /gzip/) {
467 $compressed .= '.gz';
468 } elsif ($exe =~ /bzip2/) {
469 $compressed .= '.bz2';
470 } elsif ($exe =~ /zip/) {
471 $compressed .= '.zip';
472 } elsif ($exe =~ /compress/) {
473 $compressed .= '.Z';
475 if ($exe =~ /gzip|bzip2|compress/) {
476 @cmd = ($exe, '-f', $fileName);
477 } elsif ($exe eq 'zip') {
478 @cmd = ($exe, '-r', "$compressed", $fileName);
482 if(system(@cmd) != 0) {
483 $self->throw( -class => 'Bio::Root::SystemException',
484 -text => "Failed to compress file $fileName using $exe: $!");
487 return $compressed;
490 =head2 uncompress
492 Title : uncompress
493 Usage : $Util->uncompress(full-path-filename);
494 : $Util->uncompress(<named parameters>);
495 Purpose : Uncompress a file.
496 Example : $Util->uncompress("/usr/people/me/data.txt");
497 : $Util->uncompress(-file=>"/usr/people/me/data.txt.gz",
498 : -tmp=>1,
499 : -outfile=>"/usr/people/share/data.txt",
500 : -exe=>"/usr/local/bin/fancyzip");
501 Returns : String containing full, absolute path to uncompressed file
502 Argument : Named parameters (case-insensitive):
503 : -FILE => String (name of file to be uncompressed, full path).
504 : If the supplied filename ends with '.gz' or '.Z',
505 : that extension will be removed before attempting to uncompress.
506 : Optional:
507 : -TMP => boolean. If true, (or if user is not the owner of the file)
508 : the file is uncompressed to a temp file. If false, file may be
509 : clobbered with the uncompressed version (if using a utility like
510 : gzip, which is the default)
511 : -OUTFILE => String (name of the output uncompressed file, full path).
512 : -EXE => Name of executable for uncompression utility to use.
513 : Will supercede those in @UNCOMPRESSION_UTILS defined by
514 : this module. If the absolute path to the executable is not provided,
515 : it will be searched in the PATH env variable.
516 Throws : Exception if file cannot be uncompressed.
517 : If user is not owner of the file, generates a warning and uncompresses to
518 : a tmp file. To avoid this warning, use the -o file test operator
519 : and call this function with -TMP=>1.
520 Comments : Attempts to uncompress using utilities defined in the @UNCOMPRESSION_UTILS
521 : defined by this module, in the order defined. The first utility that is
522 : found to be executable will be used. Any utility defined in optional -EXE param
523 : will be tested for executability first.
524 : To minimize security risks, the -EXE parameter value is untained using
525 : the untaint() method of this module (in 'relaxed' mode to permit path separators).
527 See Also : L<compress()|compress>
529 =cut
531 #------------'
532 sub uncompress {
533 #------------
534 my ($self, @args) = @_;
535 # This method formerly didn't use named params and expected fileName, tmp
536 # in that order. This should be backward compatibile.
537 my ($fileName, $tmp, $outfile, $exe) = $self->_rearrange([qw(FILE TMP OUTFILE EXE)], @args);
538 my ($file, $get, $fmt);
540 # in case the supplied name lacks a compressed extension
541 if(not $fileName =~ /(\.gz|\.Z|\.bz2|\.zip)$/) { $fileName .= $1; };
542 $self->debug("uncompressing file $fileName");
544 my @util_to_use = @UNCOMPRESSION_UTILS;
546 if (defined $exe){
547 $exe = $self->untaint($exe, 1);
548 unshift @util_to_use, $exe;
551 $exe ||= '';
552 while (not -x $exe and scalar(@util_to_use)) {
553 $exe = $self->find_exe(shift @util_to_use);
556 unless (-x $exe) {
557 $self->throw("Can't find compression utility. Looked for @util_to_use");
560 my ($uncompressed, @cmd, $handle);
562 $uncompressed = $fileName;
563 $uncompressed =~ s/\.\w+$//;
565 if(defined($outfile) or $tmp or not -o $fileName) {
566 if (defined $outfile) {
567 $uncompressed = $outfile;
568 } else {
569 # obtain a temporary file name (not using the handle)
570 my $io = Bio::Root::IO->new();
571 ($handle, $uncompressed) = $io->tempfile();
572 # insert some special text to flag it as a bioperl-based temp file
573 $uncompressed .= '.tmp.bioperl';
576 if ($exe =~ /gunzip|bunzip2|uncompress/) {
577 @cmd = ("$exe -f < \"$fileName\" > \"$uncompressed\"");
578 } elsif ($exe eq 'unzip') {
579 @cmd = ("$exe -p \"$fileName\" > \"$uncompressed\"");
581 not $tmp and
582 $self->warn("Not owner of file $fileName. Uncompressing to temp file $uncompressed.");
583 $tmp = 1;
584 } else {
585 if ($exe =~ /gunzip|bunzip2|uncompress/) {
586 @cmd = ($exe, '-f', $fileName);
587 } elsif ($exe eq 'zip') {
588 @cmd = ($exe, $fileName);
592 if(system(@cmd) != 0) {
593 $self->throw( -class => 'Bio::Root::SystemException',
594 -text => "Failed to uncompress file $fileName using $exe: $!");
597 return $uncompressed;
601 =head2 file_date
603 Title : file_date
604 Usage : $Util->file_date( filename [,date_format])
605 Purpose : Obtains the date of a given file.
606 : Provides flexible formatting via date_format().
607 Returns : String = date of the file as: yyyy-mm-dd (e.g., 1997-10-15)
608 Argument : filename = string, full path name for file
609 : date_format = string, desired format for date (see date_format()).
610 : Default = yyyy-mm-dd
611 Thows : Exception if no file is provided or does not exist.
612 Comments : Uses the mtime field as obtained by stat().
614 =cut
616 #--------------
617 sub file_date {
618 #--------------
619 my ($self, $file, $fmt) = @_;
621 $self->throw("No such file: $file") if not $file or not -e $file;
623 $fmt ||= 'yyyy-mm-dd';
625 my @file_data = stat($file);
626 return $self->date_format($fmt, $file_data[9]); # mtime field
630 =head2 untaint
632 Title : untaint
633 Purpose : To remove nasty shell characters from untrusted data
634 : and allow a script to run with the -T switch.
635 : Potentially dangerous shell meta characters: &;`'\"|*?!~<>^()[]{}$\n\r
636 : Accept only the first block of contiguous characters:
637 : Default allowed chars = "-\w.', ()"
638 : If $relax is true = "-\w.', ()\/=%:^<>*"
639 Usage : $Util->untaint($value, $relax)
640 Returns : String containing the untained data.
641 Argument: $value = string
642 : $relax = boolean
643 Comments:
644 This general untaint() function may not be appropriate for every situation.
645 To allow only a more restricted subset of special characters
646 (for example, untainting a regular expression), then using a custom
647 untainting mechanism would permit more control.
649 Note that special trusted vars (like $0) require untainting.
651 =cut
653 #------------`
654 sub untaint {
655 #------------
656 my($self,$value,$relax) = @_;
657 $relax ||= 0;
658 my $untainted;
660 $self->debug("\nUNTAINT: $value\n");
662 unless (defined $value and $value ne '') {
663 return $value;
666 if( $relax ) {
667 $value =~ /([-\w.\', ()\/=%:^<>*]+)/;
668 $untainted = $1
669 # } elsif( $relax == 2 ) { # Could have several degrees of relax.
670 # $value =~ /([-\w.\', ()\/=%:^<>*]+)/;
671 # $untainted = $1
672 } else {
673 $value =~ /([-\w.\', ()]+)/;
674 $untainted = $1
677 $self->debug("UNTAINTED: $untainted\n");
679 $untainted;
683 =head2 mean_stdev
685 Title : mean_stdev
686 Usage : ($mean, $stdev) = $Util->mean_stdev( @data )
687 Purpose : Calculates the mean and standard deviation given a list of numbers.
688 Returns : 2-element list (mean, stdev)
689 Argument : list of numbers (ints or floats)
690 Thows : n/a
692 =cut
694 #---------------
695 sub mean_stdev {
696 #---------------
697 my ($self, @data) = @_;
698 return (undef,undef) if not @data; # case of empty @data list
699 my $mean = 0;
700 my $N = 0;
701 foreach (@data) { $mean += $_; $N++ }
702 $mean /= $N;
703 my $sum_diff_sqd = 0;
704 foreach (@data) { $sum_diff_sqd += ($mean - $_) * ($mean - $_); }
705 # if only one element in @data list, unbiased stdev is undefined
706 my $stdev = $N <= 1 ? undef : sqrt( $sum_diff_sqd / ($N-1) );
707 return ($mean, $stdev);
711 =head2 count_files
713 Title : count_files
714 Purpose : Counts the number of files/directories within a given directory.
715 : Also reports the number of text and binary files in the dir
716 : as well as names of these files and directories.
717 Usage : count_files(\%data)
718 : $data{-DIR} is the directory to be analyzed. Default is ./
719 : $data{-PRINT} = 0|1; if 1, prints results to STDOUT, (default=0).
720 Argument : Hash reference (empty)
721 Returns : n/a;
722 : Modifies the hash ref passed in as the sole argument.
723 : $$href{-TOTAL} scalar
724 : $$href{-NUM_TEXT_FILES} scalar
725 : $$href{-NUM_BINARY_FILES} scalar
726 : $$href{-NUM_DIRS} scalar
727 : $$href{-T_FILE_NAMES} array ref
728 : $$href{-B_FILE_NAMES} array ref
729 : $$href{-DIRNAMES} array ref
731 =cut
733 #----------------
734 sub count_files {
735 #----------------
736 my $self = shift;
737 my $href = shift; # Reference to an empty hash.
738 my( $name, @fileLine);
739 my $dir = $$href{-DIR} || './'; # THIS IS UNIX SPECIFIC? FIXME/TODO
740 my $print = $$href{-PRINT} || 0;
742 ### Make sure $dir ends with /
743 $dir !~ m{/$} and do{ $dir .= '/'; $$href{-DIR} = $dir; };
745 open ( my $PIPE, "ls -1 $dir |" ) || $self->throw("Can't open input pipe: $!");
747 ### Initialize the hash data.
748 $$href{-TOTAL} = 0;
749 $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} = $$href{-NUM_DIRS} = 0;
750 $$href{-T_FILE_NAMES} = [];
751 $$href{-B_FILE_NAMES} = [];
752 $$href{-DIR_NAMES} = [];
753 while( <$PIPE> ) {
754 chomp();
755 $$href{-TOTAL}++;
756 if( -T $dir.$_ ) {
757 $$href{-NUM_TEXT_FILES}++; push @{$$href{-T_FILE_NAMES}}, $_; }
758 if( -B $dir.$_ and not -d $dir.$_) {
759 $$href{-NUM_BINARY_FILES}++; push @{$$href{-B_FILE_NAMES}}, $_; }
760 if( -d $dir.$_ ) {
761 $$href{-NUM_DIRS}++; push @{$$href{-DIR_NAMES}}, $_; }
763 close $PIPE;
765 if( $print) {
766 printf( "\n%4d %s\n", $$href{-TOTAL}, "total files+dirs in $dir");
767 printf( "%4d %s\n", $$href{-NUM_TEXT_FILES}, "text files");
768 printf( "%4d %s\n", $$href{-NUM_BINARY_FILES}, "binary files");
769 printf( "%4d %s\n", $$href{-NUM_DIRS}, "directories");
774 #=head2 file_info
776 # Title : file_info
777 # Purpose : Obtains a variety of date for a given file.
778 # : Provides an interface to Perl's stat().
779 # Status : Under development. Not ready. Don't use!
781 #=cut
783 #--------------
784 sub file_info {
785 #--------------
786 my ($self, %param) = @_;
787 my ($file, $get, $fmt) = $self->_rearrange([qw(FILE GET FMT)], %param);
788 $get ||= 'all';
789 $fmt ||= 'yyyy-mm-dd';
791 my($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size,
792 $atime, $mtime, $ctime, $blksize, $blocks) = stat $file;
794 if($get =~ /date/i) {
795 ## I can get the elapsed time since the file was modified but
796 ## it's not so straightforward to get the date in a nice format...
797 ## Think about using a standard CPAN module for this, like
798 ## Date::Manip or Date::DateCalc.
800 my $date = $mtime;
801 my $elsec = time - $mtime;
802 printf "\nFile age: %.0f sec %.0f hrs %.0f days", $elsec, $elsec/3600, $elsec/(3600*24);<STDIN>;
803 my $days = sprintf "%.0f", $elsec/(3600*24);
804 } elsif($get eq 'all') {
805 return stat $file;
810 #------------
811 sub delete {
812 #------------
813 my $self = shift;
814 my $fileName = shift;
815 if(not -e $fileName) {
816 $self->throw("Can't delete file $fileName: Does not exist.");
817 } elsif(not -o $fileName) {
818 $self->throw("Can't delete file $fileName: Not owner.");
820 my $ulval = unlink($fileName) > 0 or
821 $self->throw("Failed to delete file $fileName: $!");
825 =head2 create_filehandle
827 Usage : $object->create_filehandle(<named parameters>);
828 Purpose : Create a FileHandle object from a file or STDIN.
829 : Mainly used as a helper method by read() and get_newline().
830 Example : $data = $object->create_filehandle(-FILE =>'usr/people/me/data.txt')
831 Argument : Named parameters (case-insensitive):
832 : (all optional)
833 : -CLIENT => object reference for the object submitting
834 : the request. Default = $Util.
835 : -FILE => string (full path to file) or a reference
836 : to a FileHandle object or typeglob. This is an
837 : optional parameter (if not defined, STDIN is used).
838 Returns : Reference to a FileHandle object.
839 Throws : Exception if cannot open a supplied file or if supplied with a
840 : reference that is not a FileHandle ref.
841 Comments : If given a FileHandle reference, this method simply returns it.
842 : This method assumes the user wants to read ascii data. So, if
843 : the file is binary, it will be treated as a compressed (gzipped)
844 : file and access it using gzip -ce. The problem here is that not
845 : all binary files are necessarily compressed. Therefore,
846 : this method should probably have a -mode parameter to
847 : specify ascii or binary.
849 See Also : L<get_newline()|get_newline>
851 =cut
853 #---------------------
854 sub create_filehandle {
855 #---------------------
856 my($self, @param) = @_;
857 my($client, $file, $handle) =
858 $self->_rearrange([qw( CLIENT FILE HANDLE )], @param);
860 if(not ref $client) { $client = $self; }
861 $file ||= $handle;
862 if( $client->can('file')) {
863 $file = $client->file($file);
866 my $FH;
867 my ($handle_ref);
869 if($handle_ref = ref($file)) {
870 if($handle_ref eq 'FileHandle') {
871 $FH = $file;
872 $client->{'_input_type'} = "FileHandle";
873 } elsif($handle_ref eq 'GLOB') {
874 $FH = $file;
875 $client->{'_input_type'} = "Glob";
876 } else {
877 $self->throw(-class=>'Bio::Root::IOException',
878 -text =>"Can't read from $file: Not a FileHandle or GLOB ref.");
880 $self->verbose > 0 and printf STDERR "$ID: reading data from FileHandle\n";
882 } elsif($file) {
883 $client->{'_input_type'} = "FileHandle for $file";
885 # Use gzip -cd to access compressed data.
886 if( -B $file ) {
887 $client->{'_input_type'} .= " (compressed)";
888 my $gzip = $self->find_exe('gzip');
889 $file = "$gzip -cd $file |"
892 require FileHandle;
893 $FH = new FileHandle;
894 open ($FH, $file) || $self->throw(-class=>'Bio::Root::FileOpenException',
895 -text =>"Can't access data file: $file: $!");
896 $self->verbose > 0 and printf STDERR "$ID: reading data from file $file\n";
898 } else {
899 # Read from STDIN.
900 $FH = \*STDIN;
901 $self->verbose > 0 and printf STDERR "$ID: reading data from STDIN\n";
902 $client->{'_input_type'} = "STDIN";
905 return $FH;
908 =head2 get_newline
910 Usage : $object->get_newline(<named parameters>);
911 Purpose : Determine the character(s) used for newlines in a given file or
912 : input stream. Delegates to Bio::Root::Utilities::get_newline()
913 Example : $data = $object->get_newline(-CLIENT => $anObj,
914 : -FILE =>'usr/people/me/data.txt')
915 Argument : Same arguemnts as for create_filehandle().
916 Returns : Reference to a FileHandle object.
917 Throws : Propogates any exceptions thrown by Bio::Root::Utilities::get_newline().
919 See Also : L<taste_file()|taste_file>, L<create_filehandle()|create_filehandle>
921 =cut
923 #-----------------
924 sub get_newline {
925 #-----------------
926 my($self, @param) = @_;
928 return $NEWLINE if defined $NEWLINE;
930 my($client ) =
931 $self->_rearrange([qw( CLIENT )], @param);
933 my $FH = $self->create_filehandle(@param);
935 if(not ref $client) { $client = $self; }
937 if($client->{'_input_type'} =~ /STDIN|Glob|compressed/) {
938 # Can't taste from STDIN since we can't seek 0 on it.
939 # Are other non special Glob refs seek-able?
940 # Attempt to guess newline based on platform.
941 # Not robust since we could be reading Unix files on a Mac, e.g.
942 if(defined $ENV{'MACPERL'}) {
943 $NEWLINE = "\015"; # \r
944 } else {
945 $NEWLINE = "\012"; # \n
947 } else {
948 $NEWLINE = $self->taste_file($FH);
951 close ($FH) unless ($client->{'_input_type'} eq 'STDIN' ||
952 $client->{'_input_type'} eq 'FileHandle' ||
953 $client->{'_input_type'} eq 'Glob' );
955 delete $client->{'_input_type'};
957 return $NEWLINE || $DEFAULT_NEWLINE;
961 =head2 taste_file
963 Usage : $object->taste_file( <FileHandle> );
964 : Mainly a utility method for get_newline().
965 Purpose : Sample a filehandle to determine the character(s) used for a newline.
966 Example : $char = $Util->taste_file($FH)
967 Argument : Reference to a FileHandle object.
968 Returns : String containing an octal represenation of the newline character string.
969 : Unix = "\012" ("\n")
970 : Win32 = "\012\015" ("\r\n")
971 : Mac = "\015" ("\r")
972 Throws : Exception if no input is read within $TIMEOUT_SECS seconds.
973 : Exception if argument is not FileHandle object reference.
974 : Warning if cannot determine neewline char(s).
975 Comments : Based on code submitted by Vicki Brown (vlb@deltagen.com).
977 See Also : L<get_newline()|get_newline>
979 =cut
981 #---------------
982 sub taste_file {
983 #---------------
984 my ($self, $FH) = @_;
985 my $BUFSIZ = 256; # Number of bytes read from the file handle.
986 my ($buffer, $octal, $str, $irs, $i);
988 ref($FH) eq 'FileHandle' or $self->throw("Can't taste file: not a FileHandle ref");
990 $buffer = '';
992 # this is a quick hack to check for availability of alarm(); just copied
993 # from Bio/Root/IOManager.pm HL 02/19/01
994 my $alarm_available = 1;
995 eval {
996 alarm(0);
998 if($@) {
999 # alarm() not available (ActiveState perl for win32 doesn't have it.
1000 # See jitterbug PR#98)
1001 $alarm_available = 0;
1003 $SIG{ALRM} = sub { die "Timed out!"; };
1004 my $result;
1005 eval {
1006 $alarm_available && alarm( $TIMEOUT_SECS );
1007 $result = read($FH, $buffer, $BUFSIZ); # read the $BUFSIZ characters of file
1008 $alarm_available && alarm(0);
1010 if($@ =~ /Timed out!/) {
1011 $self->throw("Timed out while waiting for input.",
1012 "Timeout period = $TIMEOUT_SECS seconds.\nFor longer time before timing out, edit \$TIMEOUT_SECS in Bio::Root::Utilities.pm.");
1014 } elsif(not $result) {
1015 my $err = $@;
1016 $self->throw("read taste failed to read from FileHandle.", $err);
1018 } elsif($@ =~ /\S/) {
1019 my $err = $@;
1020 $self->throw("Unexpected error during read: $err");
1023 seek($FH, 0, 0) or $self->throw("seek failed to seek 0 on FileHandle.");
1025 my @chars = split(//, $buffer);
1026 my $flavor;
1028 for ($i = 0; $i <$BUFSIZ; $i++) {
1029 if (($chars[$i] eq "\012")) {
1030 unless ($chars[$i-1] eq "\015") {
1031 $flavor='Unix';
1032 $octal = "\012";
1033 $str = '\n';
1034 $irs = "^J";
1035 last;
1037 } elsif (($chars[$i] eq "\015") && ($chars[$i+1] eq "\012")) {
1038 $flavor='DOS';
1039 $octal = "\015\012";
1040 $str = '\r\n';
1041 $irs = "^M^J";
1042 last;
1043 } elsif (($chars[$i] eq "\015")) {
1044 $flavor='Mac';
1045 $octal = "\015";
1046 $str = '\r';
1047 $irs = "^M";
1048 last;
1051 if (not $octal) {
1052 $self->warn("Could not determine newline char. Using '\012'");
1053 $octal = "\012";
1054 } else {
1055 # print STDERR "FLAVOR=$flavor, NEWLINE CHAR = $irs\n";
1057 return($octal);
1060 =head2 file_flavor
1062 Usage : $object->file_flavor( <filename> );
1063 Purpose : Returns the 'flavor' of a given file (unix, dos, mac)
1064 Example : print "$file has flavor: ", $Util->file_flavor($file);
1065 Argument : filename = string, full path name for file
1066 Returns : String describing flavor of file and handy info about line endings.
1067 : One of these is returned:
1068 : unix (\n or 012 or ^J)
1069 : dos (\r\n or 015,012 or ^M^J)
1070 : mac (\r or 015 or ^M)
1071 : unknown
1072 Throws : Exception if argument is not a file
1073 : Propogates any exceptions thrown by Bio::Root::Utilities::get_newline().
1075 See Also : L<get_newline()|get_newline>, L<taste_file()|taste_file>
1077 =cut
1079 #---------------
1080 sub file_flavor {
1081 #---------------
1082 my ($self, $file) = @_;
1083 my %flavors=("\012" =>'unix (\n or 012 or ^J)',
1084 "\015\012" =>'dos (\r\n or 015,012 or ^M^J)',
1085 "\015" =>'mac (\r or 015 or ^M)'
1088 -f $file or $self->throw("Can't determine flavor: arg '$file' is either non existant or is not a file.\n");
1089 my $octal = $self->get_newline($file);
1090 my $flavor = $flavors{$octal} || "unknown";
1091 return $flavor;
1094 ######################################
1095 ##### Mail Functions ########
1096 ######################################
1098 =head2 mail_authority
1100 Title : mail_authority
1101 Usage : $Util->mail_authority( $message )
1102 Purpose : Syntactic sugar to send email to $Bio::Root::Global::AUTHORITY
1104 See Also : L<send_mail()|send_mail>
1106 =cut
1108 sub mail_authority {
1110 my( $self, $message ) = @_;
1111 my $script = $self->untaint($0,1);
1113 my $email = $self->{'_auth_email'} || $AUTHORITY;
1114 if (defined $email) {
1115 $self->send_mail( -TO=>$AUTHORITY, -SUBJ=>$script, -MSG=>$message);
1116 } else {
1117 $self->throw("Can't email authority. No email defined.");
1121 =head2 authority
1123 Title : authority
1124 Usage : $Util->authority('admin@example.com');
1125 Purpose : Set/get the email address that should be notified by mail_authority()
1127 See Also : L<mail_authority()|mail_authority>
1129 =cut
1131 sub authority {
1133 my( $self, $email ) = @_;
1134 $self->{'_auth_email'} = $email if defined $email;
1135 return $self->{'_auth_email'};
1139 =head2 send_mail
1141 Title : send_mail
1142 Usage : $Util->send_mail( named_parameters )
1143 Purpose : Provides an interface to mail or sendmail, if available
1144 Returns : n/a
1145 Argument : Named parameters: (case-insensitive)
1146 : -TO => e-mail address to send to
1147 : -SUBJ => subject for message (optional)
1148 : -MSG => message to be sent (optional)
1149 : -CC => cc: e-mail address (optional)
1150 Thows : Exception if TO: address appears bad or is missing.
1151 : Exception if mail cannot be sent.
1152 Comments : Based on TomC's tip at:
1153 : http://www.perl.com/CPAN/doc/FMTEYEWTK/safe_shellings
1155 : Using default 'From:' information.
1156 : sendmail options used:
1157 : -t: ignore the address given on the command line and
1158 : get To:address from the e-mail header.
1159 : -oi: prevents send_mail from ending the message if it
1160 : finds a period at the start of a line.
1162 See Also : L<mail_authority()|mail_authority>
1164 =cut
1167 #-------------'
1168 sub send_mail {
1169 #-------------
1170 my( $self, @param) = @_;
1171 my($recipient,$subj,$message,$cc) = $self->_rearrange([qw(TO SUBJ MSG CC)],@param);
1173 $self->throw("Invalid or missing e-mail address: $recipient")
1174 if not $recipient =~ /\S+\@\S+/;
1176 $subj ||= 'empty subject'; $message ||= '';
1178 # Best to use mail rather than sendmail. Permissions on sendmail in
1179 # linux distros have been significantly locked down in recent years,
1180 # due to the perception that it is insecure.
1181 my ($exe, $ccinfo);
1182 if ($exe = $self->find_exe('mail')) {
1183 if (defined $cc) {
1184 $ccinfo = "-c $cc";
1186 $self->debug("send_mail: $exe -s '$subj' $ccinfo $recipient\n");
1187 open (MAIL, "| $exe -s '$subj' $ccinfo $recipient") ||
1188 $self->throw("Can't send email: mail cannot fork: $!");
1189 print MAIL <<QQ_EOFM_QQ;
1190 $message
1191 QQ_EOFM_QQ
1192 $? and $self->warn("mail didn't exit nicely: $?");
1193 close(MAIL);
1194 } elsif ($exe = $self->find_exe('sendmail')) {
1195 open (SENDMAIL, "| $exe -oi -t") ||
1196 $self->throw("Can't send email: sendmail cannot fork: $!");
1197 print SENDMAIL <<QQ_EOFSM_QQ;
1198 To: $recipient
1199 Subject: $subj
1200 Cc: $cc
1202 $message
1204 QQ_EOFSM_QQ
1205 $? and $self->warn("sendmail didn't exit nicely: $?");
1207 close(SENDMAIL);
1208 } else {
1209 $self->throw("Can't find executable for mail or sendmail.");
1214 =head2 find_exe
1216 Title : find_exe
1217 Usage : $Util->find_exe(name);
1218 Purpose : Locate an executable (for use in a system() call, e.g.))
1219 Example : $Util->find_exe("gzip");
1220 Returns : String containing executable that passes the -x test.
1221 Returns undef if an executable of the supplied name cannot be found.
1222 Argument : Name of executable to be found.
1223 : Can be a full path. If supplied name is not executable, an executable
1224 : of that name will be searched in all directories in the currently
1225 : defined PATH environment variable.
1226 Throws : No exceptions, but issues a warning if multiple paths are found
1227 : for a given name. The first one is used.
1228 Comments : TODO: Confirm functionality on all bioperl-supported platforms.
1229 May get tripped up by variation in path separator character used
1230 for splitting ENV{PATH}.
1231 See Also :
1233 =cut
1235 sub find_exe {
1236 my ($self, $name) = @_;
1237 my @bindirs = split (':', $ENV{'PATH'});
1238 my $exe = $name;
1239 unless (-x $exe) {
1240 undef $exe;
1241 my @exes;
1242 foreach my $d (@bindirs) {
1243 push(@exes, "$d/$name") if -x "$d/$name";
1245 if (scalar @exes) {
1246 $exe = $exes[0];
1247 if (defined $exes[1]) {
1248 $self->warn("find_exe: Multiple paths to '$name' found. Using $exe.");
1252 return $exe;
1256 ######################################
1257 ### Interactive Functions #####
1258 ######################################
1261 =head2 yes_reply
1263 Title : yes_reply()
1264 Usage : $Util->yes_reply( [query_string]);
1265 Purpose : To test an STDIN input value for affirmation.
1266 Example : print +( $Util->yes_reply('Are you ok') ? "great!\n" : "sorry.\n" );
1267 : $Util->yes_reply('Continue') || die;
1268 Returns : Boolean, true (1) if input string begins with 'y' or 'Y'
1269 Argument: query_string = string to be used to prompt user (optional)
1270 : If not provided, 'Yes or no' will be used.
1271 : Question mark is automatically appended.
1273 =cut
1275 #-------------
1276 sub yes_reply {
1277 #-------------
1278 my $self = shift;
1279 my $query = shift;
1280 my $reply;
1281 $query ||= 'Yes or no';
1282 print "\n$query? (y/n) [n] ";
1283 chomp( $reply = <STDIN> );
1284 $reply =~ /^y/i;
1289 =head2 request_data
1291 Title : request_data()
1292 Usage : $Util->request_data( [value_name]);
1293 Purpose : To request data from a user to be entered via keyboard (STDIN).
1294 Example : $name = $Util->request_data('Name');
1295 : # User will see: % Enter Name:
1296 Returns : String, (data entered from keyboard, sans terminal newline.)
1297 Argument: value_name = string to be used to prompt user.
1298 : If not provided, 'data' will be used, (not very helpful).
1299 : Question mark is automatically appended.
1301 =cut
1303 #----------------
1304 sub request_data {
1305 #----------------
1306 my $self = shift;
1307 my $data = shift || 'data';
1308 print "Enter $data: ";
1309 # Remove the terminal newline char.
1310 chomp($data = <STDIN>);
1311 $data;
1314 sub quit_reply {
1315 # Not much used since you can use request_data()
1316 # and test for an empty string.
1317 my $self = shift;
1318 my $reply;
1319 chop( $reply = <STDIN> );
1320 $reply =~ /^q.*/i;
1324 =head2 verify_version
1326 Purpose : Checks the version of Perl used to invoke the script.
1327 : Aborts program if version is less than the given argument.
1328 Usage : verify_version('5.000')
1330 =cut
1332 #------------------
1333 sub verify_version {
1334 #------------------
1335 my $self = shift;
1336 my $reqVersion = shift;
1338 $] < $reqVersion and do {
1339 printf STDERR ( "\a\n%s %0.3f.\n", "** Sorry. This Perl script requires at least version", $reqVersion);
1340 printf STDERR ( "%s %0.3f %s\n\n", "You are running Perl version", $], "Please update your Perl!\n\n" );
1341 exit(1);
1349 __END__