gitweb: Create Gitweb::Util module
[git/jnareb-git/bp-gitweb.git] / gitweb / lib / Gitweb / Util.pm
bloba213d3f23b6200eaefe792d67485af5c01056726
1 # Gitweb::Util -- Internal utilities used by gitweb (git web interface)
3 # This module is licensed under the GPLv2
5 package Gitweb::Util;
7 use strict;
8 use warnings;
9 use Exporter qw(import);
11 our @EXPORT = qw(to_utf8
12 esc_param esc_path_info esc_url
13 esc_html esc_path esc_attr
14 untabify
15 $fallback_encoding);
16 our @EXPORT_OK = qw(quot_cec quot_upr);
18 use Encode;
19 use CGI;
21 # ......................................................................
22 # Perl encoding (utf-8)
24 # decode sequences of octets in utf8 into Perl's internal form,
25 # which is utf-8 with utf8 flag set if needed. gitweb writes out
26 # in utf-8 thanks to "binmode STDOUT, ':utf8'" at beginning of gitweb.perl
27 our $fallback_encoding = 'latin1';
28 sub to_utf8 {
29 my $str = shift;
30 return undef unless defined $str;
31 if (utf8::valid($str)) {
32 utf8::decode($str);
33 return $str;
34 } else {
35 return decode($fallback_encoding, $str, Encode::FB_DEFAULT);
39 # ......................................................................
40 # CGI encoding
42 # quote unsafe chars, but keep the slash, even when it's not
43 # correct, but quoted slashes look too horrible in bookmarks
44 sub esc_param {
45 my $str = shift;
46 return undef unless defined $str;
48 $str =~ s/([^A-Za-z0-9\-_.~()\/:@ ]+)/CGI::escape($1)/eg;
49 $str =~ s/ /\+/g;
51 return $str;
54 # the quoting rules for path_info fragment are slightly different
55 sub esc_path_info {
56 my $str = shift;
57 return undef unless defined $str;
59 # path_info doesn't treat '+' as space (specially), but '?' must be escaped
60 $str =~ s/([^A-Za-z0-9\-_.~();\/;:@&= +]+)/CGI::escape($1)/eg;
62 return $str;
65 # quote unsafe chars in whole URL, so some characters cannot be quoted
66 sub esc_url {
67 my $str = shift;
68 return undef unless defined $str;
70 $str =~ s/([^A-Za-z0-9\-_.~();\/;?:@&= ]+)/CGI::escape($1)/eg;
71 $str =~ s/ /\+/g;
73 return $str;
76 # ......................................................................
77 # (X)HTML escaping
79 # replace invalid utf8 character with SUBSTITUTION sequence
80 sub esc_html {
81 my $str = shift;
82 my %opts = @_;
84 return undef unless defined $str;
86 $str = to_utf8($str);
87 $str = CGI::escapeHTML($str);
88 if ($opts{'-nbsp'}) {
89 $str =~ s/ / /g;
91 $str =~ s|([[:cntrl:]])|(($1 ne "\t") ? quot_cec($1) : $1)|eg;
92 return $str;
95 # quote unsafe characters in HTML attributes
96 sub esc_attr {
98 # for XHTML conformance escaping '"' to '"' is not enough
99 return esc_html(@_);
102 # quote control characters and escape filename to HTML
103 sub esc_path {
104 my $str = shift;
105 my %opts = @_;
107 return undef unless defined $str;
109 $str = to_utf8($str);
110 $str = CGI::escapeHTML($str);
111 if ($opts{'-nbsp'}) {
112 $str =~ s/ / /g;
114 $str =~ s|([[:cntrl:]])|quot_cec($1)|eg;
115 return $str;
118 # ......................................................................
119 # Other
121 # escape tabs (convert tabs to spaces)
122 sub untabify {
123 my $line = shift;
125 while ((my $pos = index($line, "\t")) != -1) {
126 if (my $count = (8 - ($pos % 8))) {
127 my $spaces = ' ' x $count;
128 $line =~ s/\t/$spaces/;
132 return $line;
135 # ----------------------------------------------------------------------
136 # Showing "unprintable" characters (utility functions)
138 # Make control characters "printable", using character escape codes (CEC)
139 sub quot_cec {
140 my $cntrl = shift;
141 my %opts = @_;
142 my %es = ( # character escape codes, aka escape sequences
143 "\t" => '\t', # tab (HT)
144 "\n" => '\n', # line feed (LF)
145 "\r" => '\r', # carrige return (CR)
146 "\f" => '\f', # form feed (FF)
147 "\b" => '\b', # backspace (BS)
148 "\a" => '\a', # alarm (bell) (BEL)
149 "\e" => '\e', # escape (ESC)
150 "\013" => '\v', # vertical tab (VT)
151 "\000" => '\0', # nul character (NUL)
153 my $chr = ( (exists $es{$cntrl})
154 ? $es{$cntrl}
155 : sprintf('\%2x', ord($cntrl)) );
156 if ($opts{-nohtml}) {
157 return $chr;
158 } else {
159 return "<span class=\"cntrl\">$chr</span>";
163 # Alternatively use unicode control pictures codepoints,
164 # Unicode "printable representation" (PR)
165 sub quot_upr {
166 my $cntrl = shift;
167 my %opts = @_;
169 my $chr = sprintf('&#%04d;', 0x2400+ord($cntrl));
170 if ($opts{-nohtml}) {
171 return $chr;
172 } else {
173 return "<span class=\"cntrl\">$chr</span>";