From 8a9ce9862f3821647cbf2868ea42e6b644deff6c Mon Sep 17 00:00:00 2001 From: Pavan Kumar Sunkara Date: Thu, 27 Jan 2011 13:23:29 +0100 Subject: [PATCH] gitweb: Create Gitweb::Util module Create a Gitweb::Util module, which is meant to contain internal utilities used by gitweb. Currently it includes all the quoting/unquoting and escaping subroutines that are used by the gitweb. Update gitweb/Makefile to install Gitweb::Util module alongside gitweb Signed-off-by: Pavan Kumar Sunkara Signed-off-by: Jakub Narebski --- gitweb/Makefile | 3 + gitweb/gitweb.perl | 140 +----------------------------------- gitweb/lib/Gitweb/Util.pm | 177 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 183 insertions(+), 137 deletions(-) create mode 100755 gitweb/lib/Gitweb/Util.pm diff --git a/gitweb/Makefile b/gitweb/Makefile index abe9db8ac5..9a4053b4f3 100644 --- a/gitweb/Makefile +++ b/gitweb/Makefile @@ -114,6 +114,9 @@ endif GITWEB_FILES += static/git-logo.png static/git-favicon.png +# Modules: Gitweb::* +GITWEB_MODULES += Gitweb/Util.pm + GITWEB_REPLACE = \ -e 's|++GIT_VERSION++|$(GIT_VERSION)|g' \ -e 's|++GIT_BINDIR++|$(bindir)|g' \ diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl index ea8ab56619..e8d858990e 100755 --- a/gitweb/gitweb.perl +++ b/gitweb/gitweb.perl @@ -23,11 +23,13 @@ use lib "++GITWEBLIBDIR++"; use CGI qw(:standard :escapeHTML -nosticky); use CGI::Util qw(unescape); use CGI::Carp qw(fatalsToBrowser set_message); -use Encode; use Fcntl ':mode'; use File::Find qw(); use File::Basename qw(basename); use Time::HiRes qw(gettimeofday tv_interval); + +use Gitweb::Util; + binmode STDOUT, ':utf8'; our $t0 = [ gettimeofday() ]; @@ -1382,128 +1384,6 @@ sub validate_refname { return $input; } -# decode sequences of octets in utf8 into Perl's internal form, -# which is utf-8 with utf8 flag set if needed. gitweb writes out -# in utf-8 thanks to "binmode STDOUT, ':utf8'" at beginning -sub to_utf8 { - my $str = shift; - return undef unless defined $str; - if (utf8::valid($str)) { - utf8::decode($str); - return $str; - } else { - return decode($fallback_encoding, $str, Encode::FB_DEFAULT); - } -} - -# quote unsafe chars, but keep the slash, even when it's not -# correct, but quoted slashes look too horrible in bookmarks -sub esc_param { - my $str = shift; - return undef unless defined $str; - $str =~ s/([^A-Za-z0-9\-_.~()\/:@ ]+)/CGI::escape($1)/eg; - $str =~ s/ /\+/g; - return $str; -} - -# the quoting rules for path_info fragment are slightly different -sub esc_path_info { - my $str = shift; - return undef unless defined $str; - - # path_info doesn't treat '+' as space (specially), but '?' must be escaped - $str =~ s/([^A-Za-z0-9\-_.~();\/;:@&= +]+)/CGI::escape($1)/eg; - - return $str; -} - -# quote unsafe chars in whole URL, so some characters cannot be quoted -sub esc_url { - my $str = shift; - return undef unless defined $str; - $str =~ s/([^A-Za-z0-9\-_.~();\/;?:@&= ]+)/CGI::escape($1)/eg; - $str =~ s/ /\+/g; - return $str; -} - -# quote unsafe characters in HTML attributes -sub esc_attr { - - # for XHTML conformance escaping '"' to '"' is not enough - return esc_html(@_); -} - -# replace invalid utf8 character with SUBSTITUTION sequence -sub esc_html { - my $str = shift; - my %opts = @_; - - return undef unless defined $str; - - $str = to_utf8($str); - $str = $cgi->escapeHTML($str); - if ($opts{'-nbsp'}) { - $str =~ s/ / /g; - } - $str =~ s|([[:cntrl:]])|(($1 ne "\t") ? quot_cec($1) : $1)|eg; - return $str; -} - -# quote control characters and escape filename to HTML -sub esc_path { - my $str = shift; - my %opts = @_; - - return undef unless defined $str; - - $str = to_utf8($str); - $str = $cgi->escapeHTML($str); - if ($opts{'-nbsp'}) { - $str =~ s/ / /g; - } - $str =~ s|([[:cntrl:]])|quot_cec($1)|eg; - return $str; -} - -# Make control characters "printable", using character escape codes (CEC) -sub quot_cec { - my $cntrl = shift; - my %opts = @_; - my %es = ( # character escape codes, aka escape sequences - "\t" => '\t', # tab (HT) - "\n" => '\n', # line feed (LF) - "\r" => '\r', # carrige return (CR) - "\f" => '\f', # form feed (FF) - "\b" => '\b', # backspace (BS) - "\a" => '\a', # alarm (bell) (BEL) - "\e" => '\e', # escape (ESC) - "\013" => '\v', # vertical tab (VT) - "\000" => '\0', # nul character (NUL) - ); - my $chr = ( (exists $es{$cntrl}) - ? $es{$cntrl} - : sprintf('\%2x', ord($cntrl)) ); - if ($opts{-nohtml}) { - return $chr; - } else { - return "$chr"; - } -} - -# Alternatively use unicode control pictures codepoints, -# Unicode "printable representation" (PR) -sub quot_upr { - my $cntrl = shift; - my %opts = @_; - - my $chr = sprintf('&#%04d;', 0x2400+ord($cntrl)); - if ($opts{-nohtml}) { - return $chr; - } else { - return "$chr"; - } -} - # git may return quoted and escaped filenames sub unquote { my $str = shift; @@ -1540,20 +1420,6 @@ sub unquote { return $str; } -# escape tabs (convert tabs to spaces) -sub untabify { - my $line = shift; - - while ((my $pos = index($line, "\t")) != -1) { - if (my $count = (8 - ($pos % 8))) { - my $spaces = ' ' x $count; - $line =~ s/\t/$spaces/; - } - } - - return $line; -} - sub project_in_list { my $project = shift; my @list = git_get_projects_list(); diff --git a/gitweb/lib/Gitweb/Util.pm b/gitweb/lib/Gitweb/Util.pm new file mode 100755 index 0000000000..a213d3f23b --- /dev/null +++ b/gitweb/lib/Gitweb/Util.pm @@ -0,0 +1,177 @@ +# Gitweb::Util -- Internal utilities used by gitweb (git web interface) +# +# This module is licensed under the GPLv2 + +package Gitweb::Util; + +use strict; +use warnings; +use Exporter qw(import); + +our @EXPORT = qw(to_utf8 + esc_param esc_path_info esc_url + esc_html esc_path esc_attr + untabify + $fallback_encoding); +our @EXPORT_OK = qw(quot_cec quot_upr); + +use Encode; +use CGI; + +# ...................................................................... +# Perl encoding (utf-8) + +# decode sequences of octets in utf8 into Perl's internal form, +# which is utf-8 with utf8 flag set if needed. gitweb writes out +# in utf-8 thanks to "binmode STDOUT, ':utf8'" at beginning of gitweb.perl +our $fallback_encoding = 'latin1'; +sub to_utf8 { + my $str = shift; + return undef unless defined $str; + if (utf8::valid($str)) { + utf8::decode($str); + return $str; + } else { + return decode($fallback_encoding, $str, Encode::FB_DEFAULT); + } +} + +# ...................................................................... +# CGI encoding + +# quote unsafe chars, but keep the slash, even when it's not +# correct, but quoted slashes look too horrible in bookmarks +sub esc_param { + my $str = shift; + return undef unless defined $str; + + $str =~ s/([^A-Za-z0-9\-_.~()\/:@ ]+)/CGI::escape($1)/eg; + $str =~ s/ /\+/g; + + return $str; +} + +# the quoting rules for path_info fragment are slightly different +sub esc_path_info { + my $str = shift; + return undef unless defined $str; + + # path_info doesn't treat '+' as space (specially), but '?' must be escaped + $str =~ s/([^A-Za-z0-9\-_.~();\/;:@&= +]+)/CGI::escape($1)/eg; + + return $str; +} + +# quote unsafe chars in whole URL, so some characters cannot be quoted +sub esc_url { + my $str = shift; + return undef unless defined $str; + + $str =~ s/([^A-Za-z0-9\-_.~();\/;?:@&= ]+)/CGI::escape($1)/eg; + $str =~ s/ /\+/g; + + return $str; +} + +# ...................................................................... +# (X)HTML escaping + +# replace invalid utf8 character with SUBSTITUTION sequence +sub esc_html { + my $str = shift; + my %opts = @_; + + return undef unless defined $str; + + $str = to_utf8($str); + $str = CGI::escapeHTML($str); + if ($opts{'-nbsp'}) { + $str =~ s/ / /g; + } + $str =~ s|([[:cntrl:]])|(($1 ne "\t") ? quot_cec($1) : $1)|eg; + return $str; +} + +# quote unsafe characters in HTML attributes +sub esc_attr { + + # for XHTML conformance escaping '"' to '"' is not enough + return esc_html(@_); +} + +# quote control characters and escape filename to HTML +sub esc_path { + my $str = shift; + my %opts = @_; + + return undef unless defined $str; + + $str = to_utf8($str); + $str = CGI::escapeHTML($str); + if ($opts{'-nbsp'}) { + $str =~ s/ / /g; + } + $str =~ s|([[:cntrl:]])|quot_cec($1)|eg; + return $str; +} + +# ...................................................................... +# Other + +# escape tabs (convert tabs to spaces) +sub untabify { + my $line = shift; + + while ((my $pos = index($line, "\t")) != -1) { + if (my $count = (8 - ($pos % 8))) { + my $spaces = ' ' x $count; + $line =~ s/\t/$spaces/; + } + } + + return $line; +} + +# ---------------------------------------------------------------------- +# Showing "unprintable" characters (utility functions) + +# Make control characters "printable", using character escape codes (CEC) +sub quot_cec { + my $cntrl = shift; + my %opts = @_; + my %es = ( # character escape codes, aka escape sequences + "\t" => '\t', # tab (HT) + "\n" => '\n', # line feed (LF) + "\r" => '\r', # carrige return (CR) + "\f" => '\f', # form feed (FF) + "\b" => '\b', # backspace (BS) + "\a" => '\a', # alarm (bell) (BEL) + "\e" => '\e', # escape (ESC) + "\013" => '\v', # vertical tab (VT) + "\000" => '\0', # nul character (NUL) + ); + my $chr = ( (exists $es{$cntrl}) + ? $es{$cntrl} + : sprintf('\%2x', ord($cntrl)) ); + if ($opts{-nohtml}) { + return $chr; + } else { + return "$chr"; + } +} + +# Alternatively use unicode control pictures codepoints, +# Unicode "printable representation" (PR) +sub quot_upr { + my $cntrl = shift; + my %opts = @_; + + my $chr = sprintf('&#%04d;', 0x2400+ord($cntrl)); + if ($opts{-nohtml}) { + return $chr; + } else { + return "$chr"; + } +} + +1; -- 2.11.4.GIT