1 package Bio
::Root
::Utilities
;
8 Bio::Root::Utilities - General-purpose utility module
12 =head2 Object Creation
14 # Using the supplied singleton object:
15 use Bio::Root::Utilities qw(:obj);
18 # Create an object manually:
19 use Bio::Root::Utilities;
20 my $util = Bio::Root::Utilities->new();
23 $date_stamp = $Util->date_format('yyy-mm-dd');
25 $clean = $Util->untaint($dirty);
27 $compressed = $Util->compress('/home/me/myfile.txt')
29 my ($mean, $stdev) = $Util->mean_stdev( @data );
31 $Util->authority("me@example.com");
32 $Util->mail_authority("Something you should know about...");
34 ...and a host of other methods. See below.
38 Provides general-purpose utilities of potential interest to any Perl script.
40 The C<:obj> tag is a convenience that imports a $Util symbol into your
41 namespace representing a Bio::Root::Utilities object. This saves you
42 from creating your own Bio::Root::Utilities object via
43 C<Bio::Root::Utilities-E<gt>new()> or from prefixing all method calls with
44 C<Bio::Root::Utilities>, though feel free to do these things if desired.
45 Since there should normally not be a need for a script to have more
46 than one Bio::Root::Utilities object, this module thus comes with it's
51 This module is included with the central Bioperl distribution:
53 http://www.bioperl.org/wiki/Getting_BioPerl
54 ftp://bio.perl.org/pub/DIST
56 Follow the installation instructions included in the README file.
60 Inherits from L<Bio::Root::Root>, and uses L<Bio::Root::IO>
61 and L<Bio::Root::Exception>.
63 Relies on external executables for file compression/uncompression
64 and sending mail. No paths to these are hard coded but are located
69 http://bioperl.org - Bioperl Project Homepage
75 User feedback is an integral part of the evolution of this and other Bioperl modules.
76 Send your comments and suggestions preferably to one of the Bioperl mailing lists.
77 Your participation is much appreciated.
79 bioperl-l@bioperl.org - General discussion
80 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
84 Please direct usage questions or support issues to the mailing list:
86 L<bioperl-l@bioperl.org>
88 rather than to the module maintainer directly. Many experienced and
89 reponsive experts will be able look at the problem and quickly
90 address it. Please include a thorough description of the problem
91 with code and data examples if at all possible.
95 Report bugs to the Bioperl bug tracking system to help us keep track
96 the bugs and their resolution. Bug reports can be submitted via the
99 http://bugzilla.open-bio.org/
103 Steve Chervitz E<lt>sac@bioperl.orgE<gt>
105 See L<the FEEDBACK section | FEEDBACK> for where to send bug reports and comments.
108 =head1 ACKNOWLEDGEMENTS
110 This module was originally developed under the auspices of the
111 Saccharomyces Genome Database: http://genome-www.stanford.edu/Saccharomyces
115 Copyright (c) 1996-2007 Steve Chervitz. All Rights Reserved.
116 This module is free software; you can redistribute it and/or
117 modify it under the same terms as Perl itself.
123 Methods beginning with a leading underscore are considered private
124 and are intended for internal use by this module. They are
125 B<not> considered part of the public interface and are described here
126 for documentation purposes only.
130 # Let the code begin...
133 use Bio
::Root
::Exception
;
135 use vars
qw(@EXPORT_OK %EXPORT_TAGS);
136 use base qw(Bio::Root::Root Exporter);
137 @EXPORT_OK = qw($Util);
138 %EXPORT_TAGS = ( obj => [qw($Util)],
139 std => [qw($Util)],);
141 use vars qw($ID $Util $GNU_PATH $TIMEOUT_SECS
142 @COMPRESSION_UTILS @UNCOMPRESSION_UTILS
143 $DEFAULT_NEWLINE $NEWLINE $AUTHORITY
144 @MONTHS @DAYS $BASE_YEAR $DEFAULT_CENTURY
147 $ID = 'Bio::Root::Utilities';
148 # Number of seconds to wait before timing out when reading input (taste_file())
150 $NEWLINE = $ENV{'NEWLINE'} || undef;
151 $BASE_YEAR = 1900; # perl's localtime() assumes this for it's year data.
152 # TODO: update this every hundred years. Y2K-sensitive code.
153 $DEFAULT_CENTURY = $BASE_YEAR + 100;
154 @MONTHS = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
155 @DAYS = qw(Sun Mon Tue Wed Thu Fri Sat);
156 # Sets the preference for compression utilities to be used by compress().
157 # The first executable in this list to be found in the current PATH will be used,
158 # unless overridden in the call to that function. See docs for details.
159 @COMPRESSION_UTILS = qw(gzip bzip2 zip compress);
160 @UNCOMPRESSION_UTILS = qw(gunzip bunzip2 unzip uncompress);
162 # Default person to receive feedback from users and possibly automatic error messages.
165 # Note: $GNU_PATH is now deprecated, shouldn't be needed since now this module
166 # will automatically locate the compression utility in the current PATH.
167 # Retaining $GNU_PATH for backward compatibility.
169 # $GNU_PATH points to the directory containing the gzip and gunzip
170 # executables. It may be required for executing gzip/gunzip
171 # in some situations (e.g., when $ENV{PATH} doesn't contain this dir.
172 # Customize $GNU_PATH for your site if the compress() or
173 # uncompress() functions are generating exceptions.
175 #$GNU_PATH = '/tools/gnu/bin/';
177 $DEFAULT_NEWLINE = "\012"; # \n (used if get_newline() fails for some reason)
179 ## Static UTIL object.
180 $Util = Bio
::Root
::Root
->new();
186 Usage : $Util->date_format( [FMT], [DATE])
187 Purpose : -- Get a string containing the formated date or time
188 : taken when this routine is invoked.
189 : -- Provides a way to avoid using `date`.
190 : -- Provides an interface to localtime().
191 : -- Interconverts some date formats.
193 : (For additional functionality, use Date::Manip or
194 : Date::DateCalc available from CPAN).
195 Example : $Util->date_format();
196 : $date = $Util->date_format('yyyy-mmm-dd', '11/22/92');
197 Returns : String (unless 'list' is provided as argument, see below)
199 : 'yyyy-mm-dd' = 1996-05-03 # default format.
200 : 'yyyy-dd-mm' = 1996-03-05
201 : 'yyyy-mmm-dd' = 1996-May-03
202 : 'd-m-y' = 3-May-1996
203 : 'd m y' = 3 May 1996
205 : 'mdy' = May 3, 1996
209 : 'hms' = 23:01:59 # when not converting a format, 'hms' can be
210 : # tacked on to any of the above options
211 : # to add the time stamp: eg 'dmyhms'
212 : 'full' | 'unix' = UNIX-style date: Tue May 5 22:00:00 1998
213 : 'list' = the contents of localtime(time) in an array.
214 Argument : (all are optional)
215 : FMT = yyyy-mm-dd | yyyy-dd-mm | yyyy-mmm-dd |
216 : mdy | ymd | md | d-m-y | hms | hm
217 : ('hms' may be appended to any of these to
220 : DATE = String containing date to be converted.
221 : Acceptable input formats:
222 : 12/1/97 (for 1 December 1997)
226 Comments : If you don't care about formatting or using backticks, you can
227 : always use: $date = `date`;
229 : For more features, use Date::Manip.pm, (which I should
230 : probably switch to...)
232 See Also : L<file_date()|file_date>, L<month2num()|month2num>
241 my $date = shift; # optional date to be converted.
243 my($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst);
245 $option ||= 'yyyy-mm-dd';
247 my ($month_txt, $day_txt, $month_num, $fullYear);
248 my ($converting, @date);
250 # Load a supplied date for conversion:
251 if(defined($date) && ($date =~ /[\D-]+/)) {
254 ($mon,$mday,$year) = split(m{/}, $date);
255 } elsif($date =~ /(\d{4})-(\d{1,2})-(\d{1,2})/) {
256 ($year,$mon,$mday) = ($1, $2, $3);
257 } elsif($date =~ /(\d{4})-(\w{3,})-(\d{1,2})/) {
258 ($year,$mon,$mday) = ($1, $2, $3);
259 $mon = $self->month2num($2);
261 print STDERR
"\n*** Unsupported input date format: $date\n";
263 if(length($year) == 4) {
265 $year = substr $year, 2;
267 # Heuristics to guess what century was intended when a 2-digit year is given
268 # If number is over 50, assume it's for prev century; under 50 = default century.
269 # TODO: keep an eye on this Y2K-sensitive code
271 $fullYear = $DEFAULT_CENTURY + $year - 100;
273 $fullYear = $DEFAULT_CENTURY + $year;
278 ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = @date =
279 localtime(($date ?
$date : time()));
280 return @date if $option =~ /list/i;
281 $fullYear = $BASE_YEAR+$year;
283 $month_txt = $MONTHS[$mon];
284 $day_txt = $DAYS[$wday] if defined $wday;
287 # print "sec: $sec, min: $min, hour: $hour, month: $mon, m-day: $mday, year: $year\nwday: $wday, yday: $yday, dst: $isdst";<STDIN>;
289 if( $option =~ /yyyy-mm-dd/i ) {
290 $date = sprintf "%4d-%02d-%02d",$fullYear,$month_num,$mday;
291 } elsif( $option =~ /yyyy-dd-mm/i ) {
292 $date = sprintf "%4d-%02d-%02d",$fullYear,$mday,$month_num;
293 } elsif( $option =~ /yyyy-mmm-dd/i ) {
294 $date = sprintf "%4d-%3s-%02d",$fullYear,$month_txt,$mday;
295 } elsif( $option =~ /full|unix/i ) {
296 $date = sprintf "%3s %3s %2d %02d:%02d:%02d %d",$day_txt, $month_txt, $mday, $hour, $min, $sec, $fullYear;
297 } elsif( $option =~ /mdy/i ) {
298 $date = "$month_txt $mday, $fullYear";
299 } elsif( $option =~ /ymd/i ) {
300 $date = $year."\l$month_txt$mday";
301 } elsif( $option =~ /dmy/i ) {
302 $date = $mday."\l$month_txt$year";
303 } elsif( $option =~ /md/i ) {
304 $date = "\l$month_txt$mday";
305 } elsif( $option =~ /d-m-y/i ) {
306 $date = "$mday-$month_txt-$fullYear";
307 } elsif( $option =~ /d m y/i ) {
308 $date = "$mday $month_txt $fullYear";
309 } elsif( $option =~ /year/i ) {
311 } elsif( $option =~ /dmy/i ) {
312 $date = $mday.'-'.$month_txt.'-'.$fullYear;
313 } elsif($option and $option !~ /hms/i) {
314 print STDERR
"\n*** Unrecognized date format request: $option\n";
317 if( $option =~ /hms/i and not $converting) {
318 $date .= " $hour:$min:$sec" if $date;
319 $date ||= "$hour:$min:$sec";
322 return $date || join(" ", @date);
329 Purpose : Converts a string containing a name of a month to integer
330 : representing the number of the month in the year.
331 Example : $Util->month2num("march"); # returns 3
332 Argument : The string argument must contain at least the first
333 : three characters of the month's name. Case insensitive.
334 Throws : Exception if the conversion fails.
342 my ($self, $str) = @_;
344 # Get string in proper format for conversion.
345 $str = substr($str, 0, 3);
347 return $_+1 if $str =~ /$MONTHS[$_]/i;
349 $self->throw("Invalid month name: $str");
355 Purpose : Does the opposite of month2num.
356 : Converts a number into a string containing a name of a month.
357 Example : $Util->num2month(3); # returns 'Mar'
358 Throws : Exception if supplied number is out of range.
365 my ($self, $num) = @_;
367 $self->throw("Month out of range: $num") if $num < 1 or $num > 12;
368 return $MONTHS[$num-1];
374 Usage : $Util->compress(full-path-filename);
375 : $Util->compress(<named parameters>);
376 Purpose : Compress a file.
377 Example : $Util->compress("/usr/people/me/data.txt");
378 : $Util->compress(-file=>"/usr/people/me/data.txt",
380 : -outfile=>"/usr/people/share/data.txt.gz",
381 : -exe=>"/usr/local/bin/fancyzip");
382 Returns : String containing full, absolute path to compressed file
383 Argument : Named parameters (case-insensitive):
384 : -FILE => String (name of file to be compressed, full path).
385 : If the supplied filename ends with '.gz' or '.Z',
386 : that extension will be removed before attempting to compress.
388 : -TMP => boolean. If true, (or if user is not the owner of the file)
389 : the file is compressed to a temp file. If false, file may be
390 : clobbered with the compressed version (if using a utility like
391 : gzip, which is the default)
392 : -OUTFILE => String (name of the output compressed file, full path).
393 : -EXE => Name of executable for compression utility to use.
394 : Will supercede those in @COMPRESSION_UTILS defined by
395 : this module. If the absolute path to the executable is not provided,
396 : it will be searched in the PATH env variable.
397 Throws : Exception if file cannot be compressed.
398 : If user is not owner of the file, generates a warning and compresses to
399 : a tmp file. To avoid this warning, use the -o file test operator
400 : and call this function with -TMP=>1.
401 Comments : Attempts to compress using utilities defined in the @COMPRESSION_UTILS
402 : defined by this module, in the order defined. The first utility that is
403 : found to be executable will be used. Any utility defined in optional -EXE param
404 : will be tested for executability first.
405 : To minimize security risks, the -EXE parameter value is untained using
406 : the untaint() method of this module (in 'relaxed' mode to permit path separators).
408 See Also : L<uncompress()|uncompress>
415 my ($self, @args) = @_;
416 # This method formerly didn't use named params and expected fileName, tmp
417 # in that order. This should be backward compatibile.
418 my ($fileName, $tmp, $outfile, $exe) = $self->_rearrange([qw(FILE TMP OUTFILE EXE)], @args);
419 my ($file, $get, $fmt);
421 # in case the supplied name already has a compressed extension
422 if($fileName =~ /(\.gz|\.Z|\.bz2|\.zip)$/) { $fileName =~ s/$1$//; };
423 $self->debug("compressing file $fileName");
425 my @util_to_use = @COMPRESSION_UTILS;
428 $exe = $self->untaint($exe, 1);
429 unshift @util_to_use, $exe;
432 my @checked = @util_to_use;
434 while (not -x
$exe and scalar(@util_to_use)) {
435 $exe = $self->find_exe(shift @util_to_use);
439 $self->throw("Can't find compression utility. Looked for @checked");
442 my ($compressed, @cmd, $handle);
444 if(defined($outfile) or $tmp or not -o
$fileName) {
445 if (defined $outfile) {
446 $compressed = $outfile;
448 # obtain a temporary file name (not using the handle)
449 # and insert some special text to flag it as a bioperl-based temp file
450 my $io = Bio
::Root
::IO
->new();
451 ($handle, $compressed) = $io->tempfile();
452 $compressed .= '.tmp.bioperl.gz';
455 if ($exe =~ /gzip|bzip2|compress/) {
456 @cmd = ("$exe -f < \"$fileName\" > \"$compressed\"");
457 } elsif ($exe eq 'zip') {
458 @cmd = ("$exe -r \"$fileName.zip\" \"$fileName\"");
461 $self->warn("Not owner of file $fileName. Compressing to temp file $compressed.");
464 # Need to compute the compressed name based on exe since we're returning it.
465 $compressed = $fileName;
466 if ($exe =~ /gzip/) {
467 $compressed .= '.gz';
468 } elsif ($exe =~ /bzip2/) {
469 $compressed .= '.bz2';
470 } elsif ($exe =~ /zip/) {
471 $compressed .= '.zip';
472 } elsif ($exe =~ /compress/) {
475 if ($exe =~ /gzip|bzip2|compress/) {
476 @cmd = ($exe, '-f', $fileName);
477 } elsif ($exe eq 'zip') {
478 @cmd = ($exe, '-r', "$compressed", $fileName);
482 if(system(@cmd) != 0) {
483 $self->throw( -class => 'Bio::Root::SystemException',
484 -text
=> "Failed to compress file $fileName using $exe: $!");
493 Usage : $Util->uncompress(full-path-filename);
494 : $Util->uncompress(<named parameters>);
495 Purpose : Uncompress a file.
496 Example : $Util->uncompress("/usr/people/me/data.txt");
497 : $Util->uncompress(-file=>"/usr/people/me/data.txt.gz",
499 : -outfile=>"/usr/people/share/data.txt",
500 : -exe=>"/usr/local/bin/fancyzip");
501 Returns : String containing full, absolute path to uncompressed file
502 Argument : Named parameters (case-insensitive):
503 : -FILE => String (name of file to be uncompressed, full path).
504 : If the supplied filename ends with '.gz' or '.Z',
505 : that extension will be removed before attempting to uncompress.
507 : -TMP => boolean. If true, (or if user is not the owner of the file)
508 : the file is uncompressed to a temp file. If false, file may be
509 : clobbered with the uncompressed version (if using a utility like
510 : gzip, which is the default)
511 : -OUTFILE => String (name of the output uncompressed file, full path).
512 : -EXE => Name of executable for uncompression utility to use.
513 : Will supercede those in @UNCOMPRESSION_UTILS defined by
514 : this module. If the absolute path to the executable is not provided,
515 : it will be searched in the PATH env variable.
516 Throws : Exception if file cannot be uncompressed.
517 : If user is not owner of the file, generates a warning and uncompresses to
518 : a tmp file. To avoid this warning, use the -o file test operator
519 : and call this function with -TMP=>1.
520 Comments : Attempts to uncompress using utilities defined in the @UNCOMPRESSION_UTILS
521 : defined by this module, in the order defined. The first utility that is
522 : found to be executable will be used. Any utility defined in optional -EXE param
523 : will be tested for executability first.
524 : To minimize security risks, the -EXE parameter value is untained using
525 : the untaint() method of this module (in 'relaxed' mode to permit path separators).
527 See Also : L<compress()|compress>
534 my ($self, @args) = @_;
535 # This method formerly didn't use named params and expected fileName, tmp
536 # in that order. This should be backward compatibile.
537 my ($fileName, $tmp, $outfile, $exe) = $self->_rearrange([qw(FILE TMP OUTFILE EXE)], @args);
538 my ($file, $get, $fmt);
540 # in case the supplied name lacks a compressed extension
541 if(not $fileName =~ /(\.gz|\.Z|\.bz2|\.zip)$/) { $fileName .= $1; };
542 $self->debug("uncompressing file $fileName");
544 my @util_to_use = @UNCOMPRESSION_UTILS;
547 $exe = $self->untaint($exe, 1);
548 unshift @util_to_use, $exe;
552 while (not -x
$exe and scalar(@util_to_use)) {
553 $exe = $self->find_exe(shift @util_to_use);
557 $self->throw("Can't find compression utility. Looked for @util_to_use");
560 my ($uncompressed, @cmd, $handle);
562 $uncompressed = $fileName;
563 $uncompressed =~ s/\.\w+$//;
565 if(defined($outfile) or $tmp or not -o
$fileName) {
566 if (defined $outfile) {
567 $uncompressed = $outfile;
569 # obtain a temporary file name (not using the handle)
570 my $io = Bio
::Root
::IO
->new();
571 ($handle, $uncompressed) = $io->tempfile();
572 # insert some special text to flag it as a bioperl-based temp file
573 $uncompressed .= '.tmp.bioperl';
576 if ($exe =~ /gunzip|bunzip2|uncompress/) {
577 @cmd = ("$exe -f < \"$fileName\" > \"$uncompressed\"");
578 } elsif ($exe eq 'unzip') {
579 @cmd = ("$exe -p \"$fileName\" > \"$uncompressed\"");
582 $self->warn("Not owner of file $fileName. Uncompressing to temp file $uncompressed.");
585 if ($exe =~ /gunzip|bunzip2|uncompress/) {
586 @cmd = ($exe, '-f', $fileName);
587 } elsif ($exe eq 'zip') {
588 @cmd = ($exe, $fileName);
592 if(system(@cmd) != 0) {
593 $self->throw( -class => 'Bio::Root::SystemException',
594 -text
=> "Failed to uncompress file $fileName using $exe: $!");
597 return $uncompressed;
604 Usage : $Util->file_date( filename [,date_format])
605 Purpose : Obtains the date of a given file.
606 : Provides flexible formatting via date_format().
607 Returns : String = date of the file as: yyyy-mm-dd (e.g., 1997-10-15)
608 Argument : filename = string, full path name for file
609 : date_format = string, desired format for date (see date_format()).
610 : Default = yyyy-mm-dd
611 Thows : Exception if no file is provided or does not exist.
612 Comments : Uses the mtime field as obtained by stat().
619 my ($self, $file, $fmt) = @_;
621 $self->throw("No such file: $file") if not $file or not -e
$file;
623 $fmt ||= 'yyyy-mm-dd';
625 my @file_data = stat($file);
626 return $self->date_format($fmt, $file_data[9]); # mtime field
633 Purpose : To remove nasty shell characters from untrusted data
634 : and allow a script to run with the -T switch.
635 : Potentially dangerous shell meta characters: &;`'\"|*?!~<>^()[]{}$\n\r
636 : Accept only the first block of contiguous characters:
637 : Default allowed chars = "-\w.', ()"
638 : If $relax is true = "-\w.', ()\/=%:^<>*"
639 Usage : $Util->untaint($value, $relax)
640 Returns : String containing the untained data.
641 Argument: $value = string
644 This general untaint() function may not be appropriate for every situation.
645 To allow only a more restricted subset of special characters
646 (for example, untainting a regular expression), then using a custom
647 untainting mechanism would permit more control.
649 Note that special trusted vars (like $0) require untainting.
656 my($self,$value,$relax) = @_;
660 $self->debug("\nUNTAINT: $value\n");
662 unless (defined $value and $value ne '') {
667 $value =~ /([-\w.\', ()\/=%:^<>*]+)/;
669 # } elsif( $relax == 2 ) { # Could have several degrees of relax.
670 # $value =~ /([-\w.\', ()\/=%:^<>*]+)/;
673 $value =~ /([-\w.\', ()]+)/;
677 $self->debug("UNTAINTED: $untainted\n");
686 Usage : ($mean, $stdev) = $Util->mean_stdev( @data )
687 Purpose : Calculates the mean and standard deviation given a list of numbers.
688 Returns : 2-element list (mean, stdev)
689 Argument : list of numbers (ints or floats)
697 my ($self, @data) = @_;
698 return (undef,undef) if not @data; # case of empty @data list
701 foreach (@data) { $mean += $_; $N++ }
703 my $sum_diff_sqd = 0;
704 foreach (@data) { $sum_diff_sqd += ($mean - $_) * ($mean - $_); }
705 # if only one element in @data list, unbiased stdev is undefined
706 my $stdev = $N <= 1 ?
undef : sqrt( $sum_diff_sqd / ($N-1) );
707 return ($mean, $stdev);
714 Purpose : Counts the number of files/directories within a given directory.
715 : Also reports the number of text and binary files in the dir
716 : as well as names of these files and directories.
717 Usage : count_files(\%data)
718 : $data{-DIR} is the directory to be analyzed. Default is ./
719 : $data{-PRINT} = 0|1; if 1, prints results to STDOUT, (default=0).
720 Argument : Hash reference (empty)
722 : Modifies the hash ref passed in as the sole argument.
723 : $$href{-TOTAL} scalar
724 : $$href{-NUM_TEXT_FILES} scalar
725 : $$href{-NUM_BINARY_FILES} scalar
726 : $$href{-NUM_DIRS} scalar
727 : $$href{-T_FILE_NAMES} array ref
728 : $$href{-B_FILE_NAMES} array ref
729 : $$href{-DIRNAMES} array ref
737 my $href = shift; # Reference to an empty hash.
738 my( $name, @fileLine);
739 my $dir = $$href{-DIR
} || './'; # THIS IS UNIX SPECIFIC? FIXME/TODO
740 my $print = $$href{-PRINT
} || 0;
742 ### Make sure $dir ends with /
743 $dir !~ m{/$} and do{ $dir .= '/'; $$href{-DIR
} = $dir; };
745 open ( my $PIPE, "ls -1 $dir |" ) || $self->throw("Can't open input pipe: $!");
747 ### Initialize the hash data.
749 $$href{-NUM_TEXT_FILES
} = $$href{-NUM_BINARY_FILES
} = $$href{-NUM_DIRS
} = 0;
750 $$href{-T_FILE_NAMES
} = [];
751 $$href{-B_FILE_NAMES
} = [];
752 $$href{-DIR_NAMES
} = [];
757 $$href{-NUM_TEXT_FILES
}++; push @
{$$href{-T_FILE_NAMES
}}, $_; }
758 if( -B
$dir.$_ and not -d
$dir.$_) {
759 $$href{-NUM_BINARY_FILES
}++; push @
{$$href{-B_FILE_NAMES
}}, $_; }
761 $$href{-NUM_DIRS
}++; push @
{$$href{-DIR_NAMES
}}, $_; }
766 printf( "\n%4d %s\n", $$href{-TOTAL
}, "total files+dirs in $dir");
767 printf( "%4d %s\n", $$href{-NUM_TEXT_FILES
}, "text files");
768 printf( "%4d %s\n", $$href{-NUM_BINARY_FILES
}, "binary files");
769 printf( "%4d %s\n", $$href{-NUM_DIRS
}, "directories");
777 # Purpose : Obtains a variety of date for a given file.
778 # : Provides an interface to Perl's stat().
779 # Status : Under development. Not ready. Don't use!
786 my ($self, %param) = @_;
787 my ($file, $get, $fmt) = $self->_rearrange([qw(FILE GET FMT)], %param);
789 $fmt ||= 'yyyy-mm-dd';
791 my($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size,
792 $atime, $mtime, $ctime, $blksize, $blocks) = stat $file;
794 if($get =~ /date/i) {
795 ## I can get the elapsed time since the file was modified but
796 ## it's not so straightforward to get the date in a nice format...
797 ## Think about using a standard CPAN module for this, like
798 ## Date::Manip or Date::DateCalc.
801 my $elsec = time - $mtime;
802 printf "\nFile age: %.0f sec %.0f hrs %.0f days", $elsec, $elsec/3600, $elsec/(3600*24);<STDIN
>;
803 my $days = sprintf "%.0f", $elsec/(3600*24);
804 } elsif($get eq 'all') {
814 my $fileName = shift;
815 if(not -e
$fileName) {
816 $self->throw("Can't delete file $fileName: Does not exist.");
817 } elsif(not -o
$fileName) {
818 $self->throw("Can't delete file $fileName: Not owner.");
820 my $ulval = unlink($fileName) > 0 or
821 $self->throw("Failed to delete file $fileName: $!");
825 =head2 create_filehandle
827 Usage : $object->create_filehandle(<named parameters>);
828 Purpose : Create a FileHandle object from a file or STDIN.
829 : Mainly used as a helper method by read() and get_newline().
830 Example : $data = $object->create_filehandle(-FILE =>'usr/people/me/data.txt')
831 Argument : Named parameters (case-insensitive):
833 : -CLIENT => object reference for the object submitting
834 : the request. Default = $Util.
835 : -FILE => string (full path to file) or a reference
836 : to a FileHandle object or typeglob. This is an
837 : optional parameter (if not defined, STDIN is used).
838 Returns : Reference to a FileHandle object.
839 Throws : Exception if cannot open a supplied file or if supplied with a
840 : reference that is not a FileHandle ref.
841 Comments : If given a FileHandle reference, this method simply returns it.
842 : This method assumes the user wants to read ascii data. So, if
843 : the file is binary, it will be treated as a compressed (gzipped)
844 : file and access it using gzip -ce. The problem here is that not
845 : all binary files are necessarily compressed. Therefore,
846 : this method should probably have a -mode parameter to
847 : specify ascii or binary.
849 See Also : L<get_newline()|get_newline>
853 #---------------------
854 sub create_filehandle
{
855 #---------------------
856 my($self, @param) = @_;
857 my($client, $file, $handle) =
858 $self->_rearrange([qw( CLIENT FILE HANDLE )], @param);
860 if(not ref $client) { $client = $self; }
862 if( $client->can('file')) {
863 $file = $client->file($file);
869 if($handle_ref = ref($file)) {
870 if($handle_ref eq 'FileHandle') {
872 $client->{'_input_type'} = "FileHandle";
873 } elsif($handle_ref eq 'GLOB') {
875 $client->{'_input_type'} = "Glob";
877 $self->throw(-class=>'Bio::Root::IOException',
878 -text
=>"Can't read from $file: Not a FileHandle or GLOB ref.");
880 $self->verbose > 0 and printf STDERR
"$ID: reading data from FileHandle\n";
883 $client->{'_input_type'} = "FileHandle for $file";
885 # Use gzip -cd to access compressed data.
887 $client->{'_input_type'} .= " (compressed)";
888 my $gzip = $self->find_exe('gzip');
889 $file = "$gzip -cd $file |"
893 $FH = new FileHandle
;
894 open ($FH, $file) || $self->throw(-class=>'Bio::Root::FileOpenException',
895 -text
=>"Can't access data file: $file: $!");
896 $self->verbose > 0 and printf STDERR
"$ID: reading data from file $file\n";
901 $self->verbose > 0 and printf STDERR
"$ID: reading data from STDIN\n";
902 $client->{'_input_type'} = "STDIN";
910 Usage : $object->get_newline(<named parameters>);
911 Purpose : Determine the character(s) used for newlines in a given file or
912 : input stream. Delegates to Bio::Root::Utilities::get_newline()
913 Example : $data = $object->get_newline(-CLIENT => $anObj,
914 : -FILE =>'usr/people/me/data.txt')
915 Argument : Same arguemnts as for create_filehandle().
916 Returns : Reference to a FileHandle object.
917 Throws : Propogates any exceptions thrown by Bio::Root::Utilities::get_newline().
919 See Also : L<taste_file()|taste_file>, L<create_filehandle()|create_filehandle>
926 my($self, @param) = @_;
928 return $NEWLINE if defined $NEWLINE;
931 $self->_rearrange([qw( CLIENT )], @param);
933 my $FH = $self->create_filehandle(@param);
935 if(not ref $client) { $client = $self; }
937 if($client->{'_input_type'} =~ /STDIN|Glob|compressed/) {
938 # Can't taste from STDIN since we can't seek 0 on it.
939 # Are other non special Glob refs seek-able?
940 # Attempt to guess newline based on platform.
941 # Not robust since we could be reading Unix files on a Mac, e.g.
942 if(defined $ENV{'MACPERL'}) {
943 $NEWLINE = "\015"; # \r
945 $NEWLINE = "\012"; # \n
948 $NEWLINE = $self->taste_file($FH);
951 close ($FH) unless ($client->{'_input_type'} eq 'STDIN' ||
952 $client->{'_input_type'} eq 'FileHandle' ||
953 $client->{'_input_type'} eq 'Glob' );
955 delete $client->{'_input_type'};
957 return $NEWLINE || $DEFAULT_NEWLINE;
963 Usage : $object->taste_file( <FileHandle> );
964 : Mainly a utility method for get_newline().
965 Purpose : Sample a filehandle to determine the character(s) used for a newline.
966 Example : $char = $Util->taste_file($FH)
967 Argument : Reference to a FileHandle object.
968 Returns : String containing an octal represenation of the newline character string.
969 : Unix = "\012" ("\n")
970 : Win32 = "\012\015" ("\r\n")
971 : Mac = "\015" ("\r")
972 Throws : Exception if no input is read within $TIMEOUT_SECS seconds.
973 : Exception if argument is not FileHandle object reference.
974 : Warning if cannot determine neewline char(s).
975 Comments : Based on code submitted by Vicki Brown (vlb@deltagen.com).
977 See Also : L<get_newline()|get_newline>
984 my ($self, $FH) = @_;
985 my $BUFSIZ = 256; # Number of bytes read from the file handle.
986 my ($buffer, $octal, $str, $irs, $i);
988 ref($FH) eq 'FileHandle' or $self->throw("Can't taste file: not a FileHandle ref");
992 # this is a quick hack to check for availability of alarm(); just copied
993 # from Bio/Root/IOManager.pm HL 02/19/01
994 my $alarm_available = 1;
999 # alarm() not available (ActiveState perl for win32 doesn't have it.
1000 # See jitterbug PR#98)
1001 $alarm_available = 0;
1003 $SIG{ALRM
} = sub { die "Timed out!"; };
1006 $alarm_available && alarm( $TIMEOUT_SECS );
1007 $result = read($FH, $buffer, $BUFSIZ); # read the $BUFSIZ characters of file
1008 $alarm_available && alarm(0);
1010 if($@
=~ /Timed out!/) {
1011 $self->throw("Timed out while waiting for input.",
1012 "Timeout period = $TIMEOUT_SECS seconds.\nFor longer time before timing out, edit \$TIMEOUT_SECS in Bio::Root::Utilities.pm.");
1014 } elsif(not $result) {
1016 $self->throw("read taste failed to read from FileHandle.", $err);
1018 } elsif($@
=~ /\S/) {
1020 $self->throw("Unexpected error during read: $err");
1023 seek($FH, 0, 0) or $self->throw("seek failed to seek 0 on FileHandle.");
1025 my @chars = split(//, $buffer);
1028 for ($i = 0; $i <$BUFSIZ; $i++) {
1029 if (($chars[$i] eq "\012")) {
1030 unless ($chars[$i-1] eq "\015") {
1037 } elsif (($chars[$i] eq "\015") && ($chars[$i+1] eq "\012")) {
1039 $octal = "\015\012";
1043 } elsif (($chars[$i] eq "\015")) {
1052 $self->warn("Could not determine newline char. Using '\012'");
1055 # print STDERR "FLAVOR=$flavor, NEWLINE CHAR = $irs\n";
1062 Usage : $object->file_flavor( <filename> );
1063 Purpose : Returns the 'flavor' of a given file (unix, dos, mac)
1064 Example : print "$file has flavor: ", $Util->file_flavor($file);
1065 Argument : filename = string, full path name for file
1066 Returns : String describing flavor of file and handy info about line endings.
1067 : One of these is returned:
1068 : unix (\n or 012 or ^J)
1069 : dos (\r\n or 015,012 or ^M^J)
1070 : mac (\r or 015 or ^M)
1072 Throws : Exception if argument is not a file
1073 : Propogates any exceptions thrown by Bio::Root::Utilities::get_newline().
1075 See Also : L<get_newline()|get_newline>, L<taste_file()|taste_file>
1082 my ($self, $file) = @_;
1083 my %flavors=("\012" =>'unix (\n or 012 or ^J)',
1084 "\015\012" =>'dos (\r\n or 015,012 or ^M^J)',
1085 "\015" =>'mac (\r or 015 or ^M)'
1088 -f
$file or $self->throw("Can't determine flavor: arg '$file' is either non existant or is not a file.\n");
1089 my $octal = $self->get_newline($file);
1090 my $flavor = $flavors{$octal} || "unknown";
1094 ######################################
1095 ##### Mail Functions ########
1096 ######################################
1098 =head2 mail_authority
1100 Title : mail_authority
1101 Usage : $Util->mail_authority( $message )
1102 Purpose : Syntactic sugar to send email to $Bio::Root::Global::AUTHORITY
1104 See Also : L<send_mail()|send_mail>
1108 sub mail_authority
{
1110 my( $self, $message ) = @_;
1111 my $script = $self->untaint($0,1);
1113 my $email = $self->{'_auth_email'} || $AUTHORITY;
1114 if (defined $email) {
1115 $self->send_mail( -TO
=>$AUTHORITY, -SUBJ
=>$script, -MSG
=>$message);
1117 $self->throw("Can't email authority. No email defined.");
1124 Usage : $Util->authority('admin@example.com');
1125 Purpose : Set/get the email address that should be notified by mail_authority()
1127 See Also : L<mail_authority()|mail_authority>
1133 my( $self, $email ) = @_;
1134 $self->{'_auth_email'} = $email if defined $email;
1135 return $self->{'_auth_email'};
1142 Usage : $Util->send_mail( named_parameters )
1143 Purpose : Provides an interface to mail or sendmail, if available
1145 Argument : Named parameters: (case-insensitive)
1146 : -TO => e-mail address to send to
1147 : -SUBJ => subject for message (optional)
1148 : -MSG => message to be sent (optional)
1149 : -CC => cc: e-mail address (optional)
1150 Thows : Exception if TO: address appears bad or is missing.
1151 : Exception if mail cannot be sent.
1152 Comments : Based on TomC's tip at:
1153 : http://www.perl.com/CPAN/doc/FMTEYEWTK/safe_shellings
1155 : Using default 'From:' information.
1156 : sendmail options used:
1157 : -t: ignore the address given on the command line and
1158 : get To:address from the e-mail header.
1159 : -oi: prevents send_mail from ending the message if it
1160 : finds a period at the start of a line.
1162 See Also : L<mail_authority()|mail_authority>
1170 my( $self, @param) = @_;
1171 my($recipient,$subj,$message,$cc) = $self->_rearrange([qw(TO SUBJ MSG CC)],@param);
1173 $self->throw("Invalid or missing e-mail address: $recipient")
1174 if not $recipient =~ /\S+\@\S+/;
1176 $subj ||= 'empty subject'; $message ||= '';
1178 # Best to use mail rather than sendmail. Permissions on sendmail in
1179 # linux distros have been significantly locked down in recent years,
1180 # due to the perception that it is insecure.
1182 if ($exe = $self->find_exe('mail')) {
1186 $self->debug("send_mail: $exe -s '$subj' $ccinfo $recipient\n");
1187 open (MAIL
, "| $exe -s '$subj' $ccinfo $recipient") ||
1188 $self->throw("Can't send email: mail cannot fork: $!");
1189 print MAIL
<<QQ_EOFM_QQ;
1192 $? and $self->warn("mail didn't exit nicely: $?");
1194 } elsif ($exe = $self->find_exe('sendmail')) {
1195 open (SENDMAIL, "| $exe -oi -t") ||
1196 $self->throw("Can't send email: sendmail cannot fork: $!");
1197 print SENDMAIL <<QQ_EOFSM_QQ
;
1205 $?
and $self->warn("sendmail didn't exit nicely: $?");
1209 $self->throw("Can't find executable for mail or sendmail.");
1217 Usage : $Util->find_exe(name);
1218 Purpose : Locate an executable (for use in a system() call, e.g.))
1219 Example : $Util->find_exe("gzip");
1220 Returns : String containing executable that passes the -x test.
1221 Returns undef if an executable of the supplied name cannot be found.
1222 Argument : Name of executable to be found.
1223 : Can be a full path. If supplied name is not executable, an executable
1224 : of that name will be searched in all directories in the currently
1225 : defined PATH environment variable.
1226 Throws : No exceptions, but issues a warning if multiple paths are found
1227 : for a given name. The first one is used.
1228 Comments : TODO: Confirm functionality on all bioperl-supported platforms.
1229 May get tripped up by variation in path separator character used
1230 for splitting ENV{PATH}.
1236 my ($self, $name) = @_;
1237 my @bindirs = split (':', $ENV{'PATH'});
1242 foreach my $d (@bindirs) {
1243 push(@exes, "$d/$name") if -x
"$d/$name";
1247 if (defined $exes[1]) {
1248 $self->warn("find_exe: Multiple paths to '$name' found. Using $exe.");
1256 ######################################
1257 ### Interactive Functions #####
1258 ######################################
1264 Usage : $Util->yes_reply( [query_string]);
1265 Purpose : To test an STDIN input value for affirmation.
1266 Example : print +( $Util->yes_reply('Are you ok') ? "great!\n" : "sorry.\n" );
1267 : $Util->yes_reply('Continue') || die;
1268 Returns : Boolean, true (1) if input string begins with 'y' or 'Y'
1269 Argument: query_string = string to be used to prompt user (optional)
1270 : If not provided, 'Yes or no' will be used.
1271 : Question mark is automatically appended.
1281 $query ||= 'Yes or no';
1282 print "\n$query? (y/n) [n] ";
1283 chomp( $reply = <STDIN
> );
1291 Title : request_data()
1292 Usage : $Util->request_data( [value_name]);
1293 Purpose : To request data from a user to be entered via keyboard (STDIN).
1294 Example : $name = $Util->request_data('Name');
1295 : # User will see: % Enter Name:
1296 Returns : String, (data entered from keyboard, sans terminal newline.)
1297 Argument: value_name = string to be used to prompt user.
1298 : If not provided, 'data' will be used, (not very helpful).
1299 : Question mark is automatically appended.
1307 my $data = shift || 'data';
1308 print "Enter $data: ";
1309 # Remove the terminal newline char.
1310 chomp($data = <STDIN
>);
1315 # Not much used since you can use request_data()
1316 # and test for an empty string.
1319 chop( $reply = <STDIN
> );
1324 =head2 verify_version
1326 Purpose : Checks the version of Perl used to invoke the script.
1327 : Aborts program if version is less than the given argument.
1328 Usage : verify_version('5.000')
1333 sub verify_version
{
1336 my $reqVersion = shift;
1338 $] < $reqVersion and do {
1339 printf STDERR
( "\a\n%s %0.3f.\n", "** Sorry. This Perl script requires at least version", $reqVersion);
1340 printf STDERR
( "%s %0.3f %s\n\n", "You are running Perl version", $], "Please update your Perl!\n\n" );