1 # Gitweb::Util -- Internal utilities used by gitweb (git web interface)
3 # This module is licensed under the GPLv2
9 use Exporter
qw(import);
11 our @EXPORT = qw(to_utf8
12 esc_param esc_path_info esc_url
13 esc_html esc_path esc_attr
16 our @EXPORT_OK = qw(quot_cec quot_upr);
21 # ......................................................................
22 # Perl encoding (utf-8)
24 # decode sequences of octets in utf8 into Perl's internal form,
25 # which is utf-8 with utf8 flag set if needed. gitweb writes out
26 # in utf-8 thanks to "binmode STDOUT, ':utf8'" at beginning of gitweb.perl
27 our $fallback_encoding = 'latin1';
30 return undef unless defined $str;
31 if (utf8
::valid
($str)) {
35 return decode
($fallback_encoding, $str, Encode
::FB_DEFAULT
);
39 # ......................................................................
42 # quote unsafe chars, but keep the slash, even when it's not
43 # correct, but quoted slashes look too horrible in bookmarks
46 return undef unless defined $str;
48 $str =~ s/([^A-Za-z0-9\-_.~()\/:@ ]+)/CGI
::escape
($1)/eg
;
54 # the quoting rules for path_info fragment are slightly different
57 return undef unless defined $str;
59 # path_info doesn't treat '+' as space (specially), but '?' must be escaped
60 $str =~ s/([^A-Za-z0-9\-_.~();\/;:@&= +]+)/CGI
::escape
($1)/eg
;
65 # quote unsafe chars in whole URL, so some characters cannot be quoted
68 return undef unless defined $str;
70 $str =~ s/([^A-Za-z0-9\-_.~();\/;?:@&= ]+)/CGI
::escape
($1)/eg
;
76 # ......................................................................
79 # replace invalid utf8 character with SUBSTITUTION sequence
84 return undef unless defined $str;
87 $str = CGI
::escapeHTML
($str);
91 $str =~ s
|([[:cntrl
:]])|(($1 ne "\t") ? quot_cec
($1) : $1)|eg
;
95 # quote unsafe characters in HTML attributes
98 # for XHTML conformance escaping '"' to '"' is not enough
102 # quote control characters and escape filename to HTML
107 return undef unless defined $str;
109 $str = to_utf8
($str);
110 $str = CGI
::escapeHTML
($str);
111 if ($opts{'-nbsp'}) {
112 $str =~ s/ / /g;
114 $str =~ s
|([[:cntrl
:]])|quot_cec
($1)|eg
;
118 # ......................................................................
121 # escape tabs (convert tabs to spaces)
125 while ((my $pos = index($line, "\t")) != -1) {
126 if (my $count = (8 - ($pos % 8))) {
127 my $spaces = ' ' x
$count;
128 $line =~ s/\t/$spaces/;
135 # ----------------------------------------------------------------------
136 # Showing "unprintable" characters (utility functions)
138 # Make control characters "printable", using character escape codes (CEC)
142 my %es = ( # character escape codes, aka escape sequences
143 "\t" => '\t', # tab (HT)
144 "\n" => '\n', # line feed (LF)
145 "\r" => '\r', # carrige return (CR)
146 "\f" => '\f', # form feed (FF)
147 "\b" => '\b', # backspace (BS)
148 "\a" => '\a', # alarm (bell) (BEL)
149 "\e" => '\e', # escape (ESC)
150 "\013" => '\v', # vertical tab (VT)
151 "\000" => '\0', # nul character (NUL)
153 my $chr = ( (exists $es{$cntrl})
155 : sprintf('\%2x', ord($cntrl)) );
156 if ($opts{-nohtml
}) {
159 return "<span class=\"cntrl\">$chr</span>";
163 # Alternatively use unicode control pictures codepoints,
164 # Unicode "printable representation" (PR)
169 my $chr = sprintf('&#%04d;', 0x2400+ord($cntrl));
170 if ($opts{-nohtml
}) {
173 return "<span class=\"cntrl\">$chr</span>";