Util.pm: improve multiple email address checking
[girocco/readme.git] / Girocco / Util.pm
bloba0c9799320dae12106108f2f22790a994b15a691
1 package Girocco::Util;
3 use 5.008;
4 use strict;
5 use warnings;
7 use Girocco::Config;
8 use Time::Local;
9 use Encode;
11 BEGIN {
12 use base qw(Exporter);
13 our @EXPORT = qw(get_git scrypt jailed_file sendmail_pipe mailer_pipe
14 lock_file unlock_file valid_tag rand_adjust
15 filedb_atomic_append filedb_atomic_edit filedb_grep
16 filedb_atomic_grep valid_email valid_email_multi
17 valid_repo_url valid_web_url url_base url_path url_server
18 projects_html_list parse_rfc2822_date parse_any_date
19 extract_url_hostname is_dns_hostname is_our_hostname
20 get_cmd online_cpus sys_pagesize sys_memsize
21 calc_windowmemory to_utf8 capture_command human_size
22 calc_bigfilethreshold has_reserved_suffix
23 noFatalsToBrowser calc_redeltathreshold
24 clean_email_multi);
27 my $encoder;
28 BEGIN {
29 $encoder = Encode::find_encoding('Windows-1252') ||
30 Encode::find_encoding('ISO-8859-1') or
31 die "failed to load ISO-8859-1 encoder\n";
34 sub to_utf8($;$) {
35 my ($str, $encode) = @_;
36 return undef unless defined $str;
37 my $ans;
38 if (Encode::is_utf8($str) || utf8::decode($str)) {
39 $ans = $str;
40 } else {
41 $ans = $encoder->decode($str, Encode::FB_DEFAULT);
43 utf8::encode($ans) if $encode;
44 return $ans;
47 BEGIN {require "Girocco/extra/capture_command.pl"}
49 # Return the entire output sent to stdout from running a command
50 # Any output the command sends to stderr is discarded
51 # Returns undef if there was an error running the command (see $!)
52 sub get_cmd {
53 my ($status, $result) = capture_command(1, undef, @_);
54 return defined($status) && $status == 0 ? $result : undef;
57 # Same as get_cmd except configured git binary is automatically provided
58 # as the first argument to get_cmd
59 sub get_git {
60 return get_cmd($Girocco::Config::git_bin, @_);
63 sub scrypt {
64 my ($pwd) = @_;
65 crypt($pwd||'', join ('', ('.', '/', 0..9, 'A'..'Z', 'a'..'z')[rand 64, rand 64]));
68 sub jailed_file {
69 my ($filename) = @_;
70 $filename =~ s,^/,,;
71 $Girocco::Config::chroot."/$filename";
74 sub lock_file {
75 my ($path) = @_;
77 $path .= '.lock';
79 use Errno qw(EEXIST);
80 use Fcntl qw(O_WRONLY O_CREAT O_EXCL);
81 use IO::Handle;
82 my $handle = new IO::Handle;
84 unless (sysopen($handle, $path, O_WRONLY|O_CREAT|O_EXCL)) {
85 my $cnt = 0;
86 while (not sysopen($handle, $path, O_WRONLY|O_CREAT|O_EXCL)) {
87 ($! == EEXIST) or die "$path open failed: $!";
88 ($cnt++ < 16) or die "$path open failed: cannot open lockfile";
89 sleep(1);
92 # XXX: filedb-specific
93 chmod 0664, $path or die "$path g+w failed: $!";
95 $handle;
98 sub _is_passwd_file {
99 return defined($_[0]) && $_[0] eq jailed_file('/etc/passwd');
102 sub _run_update_pwd_db {
103 my ($path, $updatearg) = @_;
104 my @cmd = ($Girocco::Config::basedir.'/bin/update-pwd-db', "$path");
105 push(@cmd, $updatearg) if $updatearg;
106 system(@cmd) == 0 or die "update-pwd-db failed: $?";
109 sub unlock_file {
110 my ($path, $noreplace, $updatearg) = @_;
112 if (!$noreplace) {
113 _run_update_pwd_db("$path.lock", $updatearg)
114 if $Girocco::Config::update_pwd_db && _is_passwd_file($path);
115 rename "$path.lock", $path or die "$path unlock failed: $!";
116 } else {
117 unlink "$path.lock" or die "$path unlock failed: $!";
121 sub filedb_atomic_append {
122 my ($file, $line, $updatearg) = @_;
123 my $id = 65536;
125 open my $src, '<', $file or die "$file open for reading failed: $!";
126 my $dst = lock_file($file);
128 while (<$src>) {
129 my $aid = (split /:/)[2];
130 $id = $aid + 1 if ($aid >= $id);
132 print $dst $_ or die "$file(l) write failed: $!";
135 $line =~ s/\\i/$id/g;
136 print $dst "$line\n" or die "$file(l) write failed: $!";
138 close $dst or die "$file(l) close failed: $!";
139 close $src;
141 unlock_file($file, 0, $updatearg);
143 $id;
146 sub filedb_atomic_edit {
147 my ($file, $fn, $updatearg) = @_;
149 open my $src, '<', $file or die "$file open for reading failed: $!";
150 my $dst = lock_file($file);
152 while (<$src>) {
153 print $dst $fn->($_) or die "$file(l) write failed: $!";
156 close $dst or die "$file(l) close failed: $!";
157 close $src;
159 unlock_file($file, 0, $updatearg);
162 sub filedb_atomic_grep {
163 my ($file, $fn) = @_;
164 my @results = ();
166 open my $src, '<', $file or die "$file open for reading failed: $!";
167 my $dst = lock_file($file);
169 while (<$src>) {
170 my $result = $fn->($_);
171 push(@results, $result) if $result;
174 close $dst or die "$file(l) close failed: $!";
175 close $src;
177 unlock_file($file, 1);
178 return @results;
181 sub filedb_grep {
182 my ($file, $fn) = @_;
183 my @results = ();
185 open my $src, '<', $file or die "$file open for reading failed: $!";
187 while (<$src>) {
188 my $result = $fn->($_);
189 push(@results, $result) if $result;
192 close $src;
194 return @results;
197 sub valid_email {
198 my $email = shift;
199 defined($email) or $email = '';
200 return $email =~ /^[a-zA-Z0-9+._-]+@[a-zA-Z0-9.-]+$/;
203 sub clean_email_multi {
204 my $input = shift;
205 defined($input) or $input = '';
206 $input =~ s/^\s+//; $input =~ s/\s+$//;
207 my %seen = ();
208 my @newlist = ();
209 foreach (split(/\s*,\s*/, $input)) {
210 next if $_ eq "";
211 $seen{lc($_)} = 1, push(@newlist, $_) unless $seen{lc($_)};
213 return join(",", @newlist);
216 sub valid_email_multi {
217 # each email address must be a valid_email but we silently
218 # ignore extra spaces at the beginning/end and around any comma(s)
219 foreach (split(/,/, clean_email_multi(shift))) {
220 return 0 unless valid_email($_);
222 return 1;
225 sub valid_web_url {
226 my $url = shift;
227 defined($url) or $url = '';
228 return $url =~
229 /^https?:\/\/[a-zA-Z0-9.:-]+(\/[_\%a-zA-Z0-9.\/~:?&=;-]*)?(#[a-zA-Z0-9._-]+)?$/;
232 sub valid_repo_url {
233 my $url = shift || '';
234 # Currently neither username nor password is allowed in the URL and IPv6
235 # literal addresses are not accepted either.
236 $Girocco::Config::mirror_svn &&
237 $url =~ /^svn(\+https?)?:\/\/[a-zA-Z0-9.:-]+(\/[_\%a-zA-Z0-9.\/~-]*)?$/os
238 and return 1;
239 $Girocco::Config::mirror_darcs &&
240 $url =~ /^darcs:\/\/[a-zA-Z0-9.:-]+(\/[_\%a-zA-Z0-9.\/~-]*)?$/os
241 and return 1;
242 $Girocco::Config::mirror_bzr &&
243 $url =~ /^bzr:\/\/[a-zA-Z0-9.:-]+(\/[_\%a-zA-Z0-9.\/~-]*)?$/os
244 and return 1;
245 $Girocco::Config::mirror_hg &&
246 $url =~ /^hg\+https?:\/\/[a-zA-Z0-9.:-]+(\/[_\%a-zA-Z0-9.\/~-]*)?$/os
247 and return 1;
248 return $url =~ /^(https?|git):\/\/[a-zA-Z0-9.:-]+(\/[_\%a-zA-Z0-9.\/~-]*)?$/;
251 sub extract_url_hostname {
252 my $url = shift || '';
253 if ($url =~ m,^bzr://,) {
254 $url =~ s,^bzr://,,;
255 return 'launchpad.net' if $url =~ /^lp:/;
257 return undef unless $url =~ m,^[A-Za-z0-9+.-]+://[^/],;
258 $url =~ s,^[A-Za-z0-9+.-]+://,,;
259 $url =~ s,^([^/]+).*$,$1,;
260 $url =~ s/:[0-9]*$//;
261 $url =~ s/^[^\@]*[\@]//;
262 return $url ? $url : undef;
265 # See these RFCs:
266 # RFC 1034 section 3.5
267 # RFC 1123 section 2.1
268 # RFC 1738 section 3.1
269 # RFC 2606 sections 2 & 3
270 # RFC 3986 section 3.2.2
271 sub is_dns_hostname {
272 my $host = shift;
273 defined($host) or $host = '';
274 return 0 if $host eq '' || $host =~ /\s/;
275 # first remove a trailing '.'
276 $host =~ s/\.$//;
277 return 0 if length($host) > 255;
278 my $octet = '(?:\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])';
279 return 0 if $host =~ /^$octet\.$octet\.$octet\.$octet$/o;
280 my @labels = split(/[.]/, $host, -1);
281 return 0 unless @labels && @labels >= $Girocco::Config::min_dns_labels;
282 # now check each label
283 foreach my $label (@labels) {
284 return 0 unless length($label) > 0 && length($label) <= 63;
285 return 0 unless $label =~ /^[A-Za-z0-9](?:[A-Za-z0-9-]*[A-Za-z0-9])?$/;
287 # disallow RFC 2606 names provided at least two labels are present
288 if (@labels >= 2) {
289 my $tld = lc($labels[-1]);
290 return 0 if
291 $tld eq 'test' ||
292 $tld eq 'example' ||
293 $tld eq 'invalid' ||
294 $tld eq 'localhost';
295 my $sld = lc($labels[-2]);
296 return 0 if $sld eq 'example' &&
297 ($tld eq 'com' || $tld eq 'net' || $tld eq 'org');
299 return 1;
302 sub is_our_hostname {
303 my $test = shift || '';
304 $test =~ s/\.$//;
305 my %names = ();
306 my @urls = (
307 $Girocco::Config::gitweburl,
308 $Girocco::Config::gitwebfiles,
309 $Girocco::Config::webadmurl,
310 $Girocco::Config::bundlesurl,
311 $Girocco::Config::htmlurl,
312 $Girocco::Config::httppullurl,
313 $Girocco::Config::httpbundleurl,
314 $Girocco::Config::httpspushurl,
315 $Girocco::Config::gitpullurl,
316 $Girocco::Config::pushurl
318 foreach my $url (@urls) {
319 if ($url) {
320 my $host = extract_url_hostname($url);
321 if (defined($host)) {
322 $host =~ s/\.$//;
323 $names{lc($host)} = 1;
327 return $names{lc($test)} ? 1 : 0;
330 my (%_oktags, %_badtags);
331 BEGIN {
332 # entries MUST be all lowercase to be effective
333 %_oktags = (
334 # These are always okay (a "whitelist") even if they would otherwise
335 # not be allowed
336 ".net"=>1, "2d"=>1, "3d"=>1, "6502"=>1, "68000"=>1, "68008"=>1,
337 "68010"=>1, "68020"=>1, "68030"=>1, "68040"=>1, "68060"=>1,
338 "8086"=>1, "80286"=>1, "80386"=>1, "80486"=>1, "80586"=>1,
339 c=>1, cc=>1, make=>1, www=>1, x=>1
341 map({$_oktags{lc($_)}=1} @Girocco::Config::allowed_tags);
342 # entries MUST be all lowercase to be effective
343 %_badtags = (
344 # These are "nonsense" or pointless tags
345 about=>1, after=>1, all=>1, also=>1, an=>1, and=>1, another=>1, any=>1,
346 are=>1, as=>1, at=>1, be=>1, because=>1, been=>1, before=>1, being=>1,
347 between=>1, both=>1, but=>1, by=>1, came=>1, can=>1, come=>1, could=>1,
348 did=>1, do=>1, each=>1, for=>1, from=>1, get=>1, got=>1, had=>1, has=>1,
349 have=>1, he=>1, her=>1, here=>1, him=>1, himself=>1, his=>1, how=>1,
350 if=>1, in=>1, into=>1, is=>1, it=>1, like=>1, make=>1, many=>1, me=>1,
351 might=>1, more=>1, most=>1, much=>1, must=>1, my=>1, never=>1, now=>1,
352 of=>1, oh=>1, on=>1, only=>1, or=>1, other=>1, our=>1, out=>1, over=>1,
353 said=>1, same=>1, see=>1, should=>1, since=>1, some=>1, still=>1,
354 such=>1, take=>1, than=>1, that=>1, the=>1, their=>1, them=>1, then=>1,
355 there=>1, these=>1, they=>1, this=>1, those=>1, through=>1, to=>1,
356 too=>1, under=>1, up=>1, very=>1, was=>1, way=>1, we=>1, well=>1,
357 were=>1, what=>1, where=>1, which=>1, while=>1, who=>1, with=>1,
358 would=>1, yea=>1, yeah=>1, you=>1, your=>1, yup=>1
360 # These are "offensive" tags with at least one letter escaped to
361 # avoid having this file trigger various safe-scan robots
362 $_badtags{"a\x73\x73"} = 1;
363 $_badtags{"a\x73\x73hole"} = 1;
364 $_badtags{"b\x30\x30b"} = 1;
365 $_badtags{"b\x30\x30bs"} = 1;
366 $_badtags{"b\x6f\x6fb"} = 1;
367 $_badtags{"b\x6f\x6fbs"} = 1;
368 $_badtags{"b\x75tt"} = 1;
369 $_badtags{"b\x75ttd\x69\x63k"} = 1;
370 $_badtags{"c\x6f\x63k"} = 1;
371 $_badtags{"c\x75\x6e\x74"} = 1;
372 $_badtags{"d\x69\x63k"} = 1;
373 $_badtags{"d\x69\x63kb\x75tt"} = 1;
374 $_badtags{"f\x75\x63k"} = 1;
375 $_badtags{"in\x63\x65st"} = 1;
376 $_badtags{"ph\x75\x63k"} = 1;
377 $_badtags{"p\x6f\x72n"} = 1;
378 $_badtags{"p\x6f\x72no"} = 1;
379 $_badtags{"p\x6f\x72nographic"} = 1;
380 $_badtags{"p\x72\x30n"} = 1;
381 $_badtags{"p\x72\x6fn"} = 1;
382 $_badtags{"r\x61\x70e"} = 1;
383 $_badtags{"s\x65\x78"} = 1;
384 map({$_badtags{lc($_)}=1} @Girocco::Config::blocked_tags);
387 # A valid tag must only have [a-zA-Z0-9:.+#_-] characters, must start with a
388 # letter, must not be a noise word, must be more than one character long,
389 # must not be a repeated letter and must be no more than 32 characters long.
390 # However, anything in %_oktags is explicitly allowed even if it otherwise
391 # would violate the rules.
392 sub valid_tag {
393 local $_ = $_[0] || '';
394 return 1 if $_oktags{lc($_)};
395 return 0 unless /^[a-zA-Z][a-zA-Z0-9:.+#_-]+$/;
396 return 0 if $_badtags{lc($_)};
397 return 0 if /^(.)\1+$/;
398 return length($_) <= 32 ? 1 : 0;
401 # If the passed in argument looks like a URL, return only the stuff up through
402 # the host:port part otherwise return the entire argument.
403 sub url_base {
404 my $url = shift || '';
405 # See RFC 3968
406 $url = $1.$2.$3.$4 if $url =~ m,^( [A-Za-z][A-Za-z0-9+.-]*: ) # scheme
407 ( // ) # // separator
408 ((?:[^\@]+\@)?) # optional userinfo
409 ( [^/?#]+ ) # host and port
410 (?:[/?#].*)?$,x; # path and optional query string and/or anchor
411 return $url;
414 # If the passed in argument looks like a URL, return only the stuff following
415 # the host:port part otherwise return the entire argument.
416 # If the optional second argument is true, the returned value will have '/'
417 # appended if it does not already end in '/'.
418 sub url_path {
419 my $url = shift || '';
420 my $add_slash = shift || 0;
421 # See RFC 3968
422 $url = $1 if $url =~ m,^(?: [A-Za-z][A-Za-z0-9+.-]*: ) # scheme
423 (?: // ) # // separator
424 (?: [^\@]+\@ )? # optional userinfo
425 (?: [^/?#]+ ) # host and port
426 ((?:[/?#].*)?)$,x; # path and optional query string and/or anchor
427 $url .= '/' if $add_slash && $url !~ m|/$|;
428 return $url;
431 # If both SERVER_NAME and SERVER_PORT are set pass the argument through url_path
432 # and then prefix it with the appropriate scheme (HTTPS=?on), host and port and
433 # return it. If a something that doesn't look like it could be the start of a
434 # URL path comes back from url_path or SERVER_NAME is a link-local IPv6 address
435 # then just return the argument unchanged.
436 sub url_server {
437 my $url = shift || '';
438 my $path = url_path($url);
439 return $url unless $path eq '' || $path =~ m|^[/?#]|;
440 return $url unless $ENV{'SERVER_NAME'} && $ENV{'SERVER_PORT'} &&
441 $ENV{'SERVER_PORT'} =~ /^[1-9][0-9]{0,4}$/;
442 return $url if $ENV{'SERVER_NAME'} =~ /^[[]?fe80:/i;
443 my $server = $ENV{'SERVER_NAME'};
444 # Deal with Apache bug where IPv6 literal server names do not include
445 # the required surrounding '[' and ']' characters
446 $server = '[' . $server . ']' if $server =~ /:/ && $server !~ /^[[]/;
447 my $ishttps = $ENV{'HTTPS'} && $ENV{'HTTPS'} =~ /^on$/i;
448 my $portnum = 0 + $ENV{'SERVER_PORT'};
449 my $port = '';
450 if (($ishttps && $portnum != 443) || (!$ishttps && $portnum != 80)) {
451 $port = ':' . $portnum;
453 return 'http' . ($ishttps ? 's' : '') . '://' . $server . $port . $path;
456 # Returns the number rounded to the nearest tenths. The ".d" part will be
457 # excluded if it's ".0" unless the optional second argument is true
458 sub _tenths {
459 my $v = shift;
460 my $use0 = shift;
461 $v *= 10;
462 $v += 0.5;
463 $v = int($v);
464 return '' . int($v/10) unless $v % 10 || $use0;
465 return '' . int($v/10) . '.' . ($v%10);
468 # Returns a human-readable size string (e.g. '1.5 MiB') for the value
469 # (in bytes) passed in. Returns '0' for undefined or 0 or not all digits.
470 # Otherwise returns '1 KiB' for < 1024, or else a number rounded to the
471 # nearest tenths of a KiB, MiB or GiB.
472 sub human_size {
473 my $v = shift || 0;
474 return "0" unless $v && $v =~ /^\d+$/;
475 return "1 KiB" unless $v > 1024;
476 $v /= 1024;
477 return _tenths($v) . " KiB" if $v < 1024;
478 $v /= 1024;
479 return _tenths($v) . " MiB" if $v < 1024;
480 $v /= 1024;
481 return _tenths($v) . " GiB";
484 sub _escapeHTML {
485 my $str = shift;
486 $str =~ s/\&/\&amp;/gs;
487 $str =~ s/\</\&lt;/gs;
488 $str =~ s/\>/\&gt;/gs;
489 $str =~ s/\"/\&quot;/gs; #"
490 return $str;
493 # create relative time string from passed in age in seconds
494 sub _rel_age {
495 my $age = shift;
496 my $age_str;
498 if ($age > 60*60*24*365*2) {
499 $age_str = (int $age/60/60/24/365);
500 $age_str .= " years ago";
501 } elsif ($age > 60*60*24*(365/12)*2) {
502 $age_str = int $age/60/60/24/(365/12);
503 $age_str .= " months ago";
504 } elsif ($age > 60*60*24*7*2) {
505 $age_str = int $age/60/60/24/7;
506 $age_str .= " weeks ago";
507 } elsif ($age > 60*60*24*2) {
508 $age_str = int $age/60/60/24;
509 $age_str .= " days ago";
510 } elsif ($age > 60*60*2) {
511 $age_str = int $age/60/60;
512 $age_str .= " hours ago";
513 } elsif ($age > 60*2) {
514 $age_str = int $age/60;
515 $age_str .= " mins ago";
516 } elsif ($age > 2) {
517 $age_str = int $age;
518 $age_str .= " secs ago";
519 } elsif ($age >= 0) {
520 $age_str = "right now";
521 } else {
522 $age_str = "future time";
524 return $age_str;
527 # create relative time string from passed in idle in seconds
528 sub _rel_idle {
529 my $idle_str = _rel_age(shift);
530 $idle_str =~ s/ ago//;
531 $idle_str = "not at all" if $idle_str eq "right now";
532 return $idle_str;
535 sub _strftime {
536 use POSIX qw(strftime);
537 my ($fmt, $secs, $zonesecs) = @_;
538 my ($S,$M,$H,$d,$m,$y) = gmtime($secs + $zonesecs);
539 $zonesecs = int($zonesecs / 60);
540 $fmt =~ s/%z/\$z/g;
541 my $ans = strftime($fmt, $S, $M, $H, $d, $m, $y, -1, -1, -1);
542 my $z;
543 if ($zonesecs < 0) {
544 $z = "-";
545 $zonesecs = -$zonesecs;
546 } else {
547 $z = "+";
549 $z .= sprintf("%02d%02d", int($zonesecs/60), $zonesecs % 60);
550 $ans =~ s/\$z/$z/g;
551 return $ans;
554 # Take a list of project names and produce a nicely formated table that
555 # includes owner links and descriptions. If the list is empty returns ''.
556 # The first argument may be a hash ref that contains options. The following
557 # options are available:
558 # target -- sets the target value of the owner link
559 # emptyok -- if true returns an empty table rather than ''
560 # sizecol -- if true include a human-readable size column
561 # typecol -- if true include type column with hover info
562 # changed -- if true include a changed and idle column
563 sub projects_html_list {
564 my $options = {};
565 if (defined($_[0]) && ref($_[0]) eq 'HASH') {
566 $options = shift;
568 return '' unless @_ || (defined($options->{emptyok}) && $options->{emptyok});
569 require Girocco::Project;
570 my $count = 0;
571 my $target = '';
572 $target = " target=\""._escapeHTML($options->{target})."\""
573 if defined($options->{target});
574 my $withsize = defined($options->{sizecol}) && $options->{sizecol};
575 my $withtype = defined($options->{typecol}) && $options->{typecol};
576 my $withchanged = defined($options->{changed}) && $options->{changed};
577 my $sizehead = '';
578 $sizehead = substr(<<EOT, 0, -1) if $withsize;
579 <th class="sizecol"><span class="hover">Size<span><span class="head">Size</span
580 />Fork size excludes objects borrowed from the parent.</span></span></th
583 my $typehead = '';
584 $typehead = '<th>Type</th>' if $withtype;
585 my $chghead = '';
586 $chghead = substr(<<EOT, 0, -1) if $withchanged;
587 <th><span class="hover">Changed<span><span class="head">Changed</span
588 />The last time a ref change was received by this site.</span></span></th
589 ><th><span class="hover">Idle<span><span class="head">Idle</span
590 />The most recent committer time in <i>refs/heads</i>.</span></span></th
593 my $html = <<EOT;
594 <table class='projectlist'><tr><th>Project</th>$sizehead$typehead$chghead<th class="desc">Description</th></tr>
596 my $trclass = ' class="odd"';
597 foreach (sort({lc($a) cmp lc($b)} @_)) {
598 if (Girocco::Project::does_exist($_, 1)) {
599 my $proj = Girocco::Project->load($_);
600 my $projname = $proj->{name}.".git";
601 my $projdesc = $proj->{desc}||'';
602 utf8::decode($projdesc) if utf8::valid($projdesc);
603 my $sizecol = '';
604 if ($withsize) {
605 my $psize = $proj->{reposizek};
606 $psize = undef unless defined($psize) && $psize =~ /^\d+$/;
607 $psize = 0 if !defined($psize) && $proj->is_empty;
608 if (!defined($psize)) {
609 $psize = 'unknown';
610 } elsif (!$psize) {
611 $psize = 'empty';
612 } else {
613 $psize = human_size($psize * 1024);
614 $psize =~ s/ /\&#160;/g;
616 $sizecol = '<td class="sizecol">'.$psize.'</td>';
618 my $typecol = '';
619 if ($withtype) {
620 if ($proj->{mirror}) {
621 $typecol = substr(<<EOT, 0, -1);
622 <td class="type"><span class="hover">mirror<span class="nowrap">@{[_escapeHTML($proj->{url})]}</span></span></td>
624 } else {
625 my $users = @{$proj->{users}};
626 $users .= ' user';
627 $users .= 's' unless @{$proj->{users}} == 1;
628 my $userlist = join(', ', sort({lc($a) cmp lc($b)} @{$proj->{users}}));
629 my $spncls = length($userlist) > 25 ? '' : ' class="nowrap"';
630 $typecol = $userlist ? substr(<<EOT, 0, -1) : substr(<<EOT, 0, -1);
631 <td class="type"><span class="hover">$users<span$spncls>$userlist</span></span></td>
633 <td class="type">$users</td>
637 my $changecol = '';
638 if ($withchanged) {
639 my $rel = '';
640 my $changetime = $proj->{lastchange};
641 if ($changetime) {
642 $rel = "<span class=\"hover\">" .
643 _rel_age(time - parse_rfc2822_date($changetime)) .
644 "<span class=\"nowrap\">$changetime</span></span>";
645 } else {
646 $rel = "no commits";
648 $changecol = substr(<<EOT, 0, -1);
649 <td class="change">$rel</td>
651 my $idletime = $proj->{lastactivity};
652 my ($idlesecs, $tz);
653 $idlesecs = parse_any_date($idletime, \$tz) if $idletime;
654 if ($idlesecs) {
655 my $idle2822 = _strftime("%a, %d %b %Y %T %z", $idlesecs, $tz);
656 $rel = "<span class=\"hover\">" .
657 _rel_idle(time - $idlesecs) .
658 "<span class=\"nowrap\">$idle2822</span></span>";
659 } else {
660 $rel = "no commits";
662 $changecol .= substr(<<EOT, 0, -1);
663 <td class="idle">$rel</td>
666 $html .= <<EOT;
667 <tr$trclass><td><a href="@{[url_path($Girocco::Config::gitweburl)]}/$projname"$target
668 >@{[_escapeHTML($projname)]}</td>$sizecol$typecol$changecol<td>@{[_escapeHTML($projdesc)]}</td></tr>
670 $trclass = $trclass ? '' : ' class="odd"';
671 ++$count;
674 $html .= <<EOT;
675 </table>
677 return ($count || (defined($options->{emptyok}) && $options->{emptyok})) ? $html : '';
680 my %_month_names;
681 BEGIN {
682 %_month_names = (
683 jan => 0, feb => 1, mar => 2, apr => 3, may => 4, jun => 5,
684 jul => 6, aug => 7, sep => 8, oct => 9, nov => 10, dec => 11
688 # Should be in "date '+%a, %d %b %Y %T %z'" format as saved to lastgc, lastrefresh and lastchange
689 # The leading "%a, " is optional, returns undef if unrecognized date. This is also known as
690 # RFC 2822 date format and git's '%cD', '%aD' and --date=rfc2822 format.
691 # If the second argument is a SCALAR ref, its value will be set to the TZ offset in seconds
692 sub parse_rfc2822_date {
693 my $dstr = shift || '';
694 my $tzoff = shift || '';
695 $dstr = $1 if $dstr =~/^[^\s]+,\s*(.*)$/;
696 return undef unless $dstr =~
697 /^\s*(\d{1,2})\s+([A-Za-z]{3})\s+(\d{4})\s+(\d{1,2}):(\d{2}):(\d{2})\s+([+-]\d{4})\s*$/;
698 my ($d,$b,$Y,$H,$M,$S,$z) = ($1,$2,$3,$4,$5,$6,$7);
699 my $m = $_month_names{lc($b)};
700 return undef unless defined($m);
701 my $seconds = timegm(0+$S, 0+$M, 0+$H, 0+$d, 0+$m, $Y-1900);
702 my $offset = 60 * (60 * (0+substr($z,1,2)) + (0+substr($z,3,2)));
703 $offset = -$offset if substr($z,0,1) eq '-';
704 $$tzoff = $offset if ref($tzoff) eq 'SCALAR';
705 return $seconds - $offset;
708 # Will parse any supported date format. Actually there are three formats
709 # currently supported:
710 # 1. RFC 2822 (uses parse_rfc2822_date)
711 # 2. RFC 3339 / ISO 8601 (T may be ' ' or '_', 'Z' is optional or may be 'UTC', ':' optional in TZ)
712 # 3. Same as #2 except no colons or hyphens allowed and hours MUST be 2 digits
713 # 4. unix seconds since epoch with optional +/- trailing TZ (may not have a ':')
714 # Returns undef if unsupported date.
715 # If the second argument is a SCALAR ref, its value will be set to the TZ offset in seconds
716 sub parse_any_date {
717 my $dstr = shift || '';
718 my $tzoff = shift || '';
719 if ($dstr =~ /^\s*([-+]?\d+)(?:\s+([-+]\d{4}))?\s*$/) {
720 # Unix timestamp
721 my $ts = 0 + $1;
722 my $off = 0;
723 if ($2) {
724 my $z = $2;
725 $off = 60 * (60 * (0+substr($z,1,2)) + (0+substr($z,3,2)));
726 $off = -$off if substr($z,0,1) eq '-';
728 $$tzoff = $off if ref($tzoff) eq 'SCALAR';
729 return $ts;
731 if ($dstr =~ /^\s*(\d{4})-(\d{2})-(\d{2})[Tt _](\d{1,2}):(\d{2}):(\d{2})(?:[ _]?([Zz]|[Uu][Tt][Cc]|(?:[-+]\d{1,2}:?\d{2})))?\s*$/ ||
732 $dstr =~ /^\s*(\d{4})(\d{2})(\d{2})[Tt _](\d{2})(\d{2})(\d{2})(?:[ _]?([Zz]|[Uu][Tt][Cc]|(?:[-+]\d{2}\d{2})))?\s*$/) {
733 my ($Y,$m,$d,$H,$M,$S,$z) = ($1,$2,$3,$4,$5,$6,$7||'');
734 my $seconds = timegm(0+$S, 0+$M, 0+$H, 0+$d, $m-1, $Y-1900);
735 defined($z) && $z ne '' or $z = 'Z';
736 $z = uc($z);
737 $z =~ s/://;
738 substr($z,1,0) = '0' if length($z) == 4;
739 my $off = 0;
740 if ($z ne 'Z' && $z ne 'UTC') {
741 $off = 60 * (60 * (0+substr($z,1,2)) + (0+substr($z,3,2)));
742 $off = -$off if substr($z,0,1) eq '-';
744 $$tzoff = $off if ref($tzoff) eq 'SCALAR';
745 return $seconds - $off;
747 return parse_rfc2822_date($dstr, $tzoff);
750 # Input is a number such as a minute interval
751 # Return value is a random number between the input and 1.25*input
752 # This can be used to randomize the update and gc operations a bit to avoid
753 # having them all end up all clustered together
754 sub rand_adjust {
755 my $input = shift || 0;
756 return $input unless $input;
757 return $input + int(rand(0.25 * $input));
760 # Open a pipe to a new sendmail process. The '-i' option is always passed to
761 # the new process followed by any addtional arguments passed in. Note that
762 # the sendmail process is only expected to understand the '-i', '-t' and '-f'
763 # options. Using any other options via this function is not guaranteed to work.
764 # A list of recipients may follow the options. Combining a list of recipients
765 # with the '-t' option is not recommended.
766 sub sendmail_pipe {
767 return undef unless @_;
768 die "\$Girocco::Config::sendmail_bin is unset or not executable!\n"
769 unless $Girocco::Config::sendmail_bin && -x $Girocco::Config::sendmail_bin;
770 my $result = open(my $pipe, '|-', $Girocco::Config::sendmail_bin, '-i', @_);
771 return $result ? $pipe : undef;
774 # Open a pipe that works similarly to a mailer such as /usr/bin/mail in that
775 # if the first argument is '-s', a subject line will be automatically added
776 # (using the second argument as the subject). Any remaining arguments are
777 # expected to be recipient addresses that will be added to an explicit To:
778 # line as well as passed on to sendmail_pipe. In addition an
779 # "Auto-Submitted: auto-generated" header is always added as well as a suitable
780 # "From:" header.
781 sub mailer_pipe {
782 my $subject = undef;
783 if (@_ >= 2 && $_[0] eq '-s') {
784 shift;
785 $subject = shift;
787 my $tolist = join(", ", @_);
788 unshift(@_, '-f', $Girocco::Config::sender) if $Girocco::Config::sender;
789 my $pipe = sendmail_pipe(@_);
790 if ($pipe) {
791 print $pipe "From: \"$Girocco::Config::name\" ",
792 "($Girocco::Config::title) ",
793 "<$Girocco::Config::admin>\n";
794 print $pipe "To: $tolist\n";
795 print $pipe "Subject: $subject\n" if defined($subject);
796 print $pipe "MIME-Version: 1.0\n";
797 print $pipe "Content-Type: text/plain; charset=utf-8\n";
798 print $pipe "Content-Transfer-Encoding: 8bit\n";
799 print $pipe "X-Girocco: $Girocco::Config::gitweburl\n"
800 unless $Girocco::Config::suppress_x_girocco;
801 print $pipe "Auto-Submitted: auto-generated\n";
802 print $pipe "\n";
804 return $pipe;
807 sub _goodval {
808 my $val = shift;
809 return undef unless defined($val);
810 $val =~ s/[\r\n]+$//s;
811 return undef unless $val =~ /^\d+$/;
812 $val = 0 + $val;
813 return undef unless $val >= 1;
814 return $val;
817 # Returns the number of "online" cpus or undef if undetermined
818 sub online_cpus {
819 my @confcpus = $^O eq "linux" ?
820 qw(_NPROCESSORS_ONLN NPROCESSORS_ONLN) :
821 qw(NPROCESSORS_ONLN _NPROCESSORS_ONLN) ;
822 my $cpus = _goodval(get_cmd('getconf', $confcpus[0]));
823 return $cpus if $cpus;
824 $cpus = _goodval(get_cmd('getconf', $confcpus[1]));
825 return $cpus if $cpus;
826 if ($^O ne "linux") {
827 my @sysctls = qw(hw.ncpu);
828 unshift(@sysctls, qw(hw.availcpu)) if $^O eq "darwin";
829 foreach my $mib (@sysctls) {
830 $cpus = _goodval(get_cmd('sysctl', '-n', $mib));
831 return $cpus if $cpus;
834 return undef;
837 # Returns the system page size in bytes or undef if undetermined
838 # This should never fail on a POSIX system
839 sub sys_pagesize {
840 use POSIX ":unistd_h";
841 my $pagesize = sysconf(_SC_PAGESIZE);
842 return undef unless defined($pagesize) && $pagesize =~ /^\d+$/;
843 $pagesize = 0 + $pagesize;
844 return undef unless $pagesize >= 256;
845 return $pagesize;
848 # Returns the amount of available physical memory in bytes
849 # This may differ from the actual amount of physical memory installed
850 # Returns undef if this cannot be determined
851 sub sys_memsize {
852 my $pagesize = sys_pagesize;
853 if ($pagesize && $^O eq "linux") {
854 my $pages = _goodval(get_cmd('getconf', '_PHYS_PAGES'));
855 return $pagesize * $pages if $pages;
857 if ($^O ne "linux") {
858 my @sysctls = qw(hw.physmem64);
859 unshift(@sysctls, qw(hw.memsize)) if $^O eq "darwin";
860 foreach my $mib (@sysctls) {
861 my $memsize = _goodval(get_cmd('sysctl', '-n', $mib));
862 return $memsize if $memsize;
864 my $memsize32 = _goodval(get_cmd('sysctl', '-n', 'hw.physmem'));
865 return $memsize32 if $memsize32 && $memsize32 <= 2147483647;
866 if ($pagesize) {
867 my $pages = _goodval(get_cmd('sysctl', '-n', 'hw.availpages'));
868 return $pagesize * $pages if $pages;
870 return 2147483647 + 1 if $memsize32;
872 return undef;
875 sub _get_max_conf_suffixed_size {
876 my $conf = shift;
877 return undef unless defined $conf && $conf =~ /^(\d+)([kKmMgG]?)$/;
878 my ($val, $suffix) = (0+$1, lc($2));
879 $val *= 1024 if $suffix eq 'k';
880 $val *= 1024 * 1024 if $suffix eq 'm';
881 $val *= 1024 * 1024 * 1024 if $suffix eq 'g';
882 return $val;
885 sub _make_suffixed_size {
886 my $size = shift;
887 return $size if $size % 1024;
888 $size /= 1024;
889 return "${size}k" if $size % 1024;
890 $size /= 1024;
891 return "${size}m" if $size % 1024;
892 $size /= 1024;
893 return "${size}g";
896 # Return the value to pass to --window-memory= for git repack
897 # If the system memory or number of CPUs cannot be determined, returns "1g"
898 # Otherwise returns one third the available memory divided by the number of CPUs
899 # but never more than 1 gigabyte or max_gc_window_memory_size.
900 sub calc_windowmemory {
901 my $cpus = online_cpus;
902 my $memsize = sys_memsize;
903 my $max = 1024 * 1024 * 1024;
904 if ($cpus && $memsize) {
905 $max = int($memsize / 3 / $cpus);
906 $max = 1024 * 1024 * 1024 if $max >= 1024 * 1024 * 1024;
908 my $maxconf = _get_max_conf_suffixed_size($Girocco::Config::max_gc_window_memory_size);
909 $max = $maxconf if defined($maxconf) && $maxconf && $max > $maxconf;
910 return _make_suffixed_size($max);
913 # Return the value to set as core.bigFileThreshold for git repack
914 # If the system memory cannot be determined, returns "256m"
915 # Otherwise returns the available memory divided by 16
916 # but never more than 512 megabytes or max_gc_big_file_threshold_size.
917 sub calc_bigfilethreshold {
918 my $memsize = sys_memsize;
919 my $max = 256 * 1024 * 1024;
920 if ($memsize) {
921 $max = int($memsize / 16);
922 $max = 512 * 1024 * 1024 if $max >= 512 * 1024 * 1024;
924 my $maxconf = _get_max_conf_suffixed_size($Girocco::Config::max_gc_big_file_threshold_size);
925 $max = $maxconf if defined($maxconf) && $maxconf && $max > $maxconf;
926 return _make_suffixed_size($max);
929 # Return the value to use when deciding whether or not to re-calculate object deltas
930 # If there are no more than this many objects then deltas will be recomputed in
931 # order to create more efficient pack files. The new_delta_threshold value
932 # is constrained to be at least 1000 * cpu cores and no more than 100000.
933 # The default is sys_memsize rounded up to the nearest multiple of 256 MB and
934 # then 5000 per 256 MB or 50000 if we cannot determine memory size but never
935 # more than 100000 or less than 1000 * cpu cores.
936 sub calc_redeltathreshold {
937 my $cpus = online_cpus || 1;
938 if (defined($Girocco::Config::new_delta_threshold) &&
939 $Girocco::Config::new_delta_threshold =~ /^\d+/) {
940 my $ndt = 0 + $Girocco::Config::new_delta_threshold;
941 if ($ndt >= $cpus * 1000) {
942 return $ndt <= 100000 ? $ndt : 100000;
945 my $calcval = 50000;
946 my $memsize = sys_memsize;
947 if ($memsize) {
948 my $quantum = 256 * 1024 * 1024;
949 $calcval = 5000 * int(($memsize + ($quantum - 1)) / $quantum);
950 $calcval = 1000 * $cpus if $calcval < 1000 * $cpus;
951 $calcval = 100000 if $calcval > 100000;
953 return $calcval;
956 # $1 => thing to test
957 # $2 => optional directory, if given and -e "$2/$1$3", then return false
958 # $3 => optional, defaults to ''
959 sub has_reserved_suffix {
960 no warnings; # avoid silly 'unsuccessful stat on filename with \n' warning
961 my ($name, $dir, $ext) = @_;
962 $ext = '' unless defined $ext;
963 return 0 unless defined $name && $name =~ /\.([^.]+)$/;
964 return 0 unless exists $Girocco::Config::reserved_suffixes{lc($1)};
965 return 0 if defined $dir && -e "$dir/$name$ext";
966 return 1;
969 # mostly undoes effect of `use CGI::Carp qw(fatalsToBrowser);`
970 # mostly undoes effect of `use CGI::Carp qw(warningsToBrowser);`
971 sub noFatalsToBrowser {
972 delete $SIG{__DIE__};
973 delete $SIG{__WARN__};
974 undef *CORE::GLOBAL::die;
975 *CORE::GLOBAL::die = sub {
976 no warnings;
977 my $ec = $! || ($? >> 8) || 255;
978 my (undef, $fn, $li) = caller(0);
979 my $loc = " at " . $fn . " line " . $li . ".\n";
980 my $msg = "";
981 $msg = join("", @_) if @_;
982 $msg = "Died" if $msg eq "";
983 $msg .= $loc unless $msg =~ /\n$/;
984 die $msg if $^S;
985 printf STDERR "%s", $msg;
986 exit($ec);
988 undef *CORE::GLOBAL::warn;
989 *CORE::GLOBAL::warn = sub {
990 no warnings;
991 my (undef, $fn, $li) = caller(0);
992 my $loc = " at " . $fn . " line " . $li . ".\n";
993 my $msg = "";
994 $msg = join("", @_) if @_;
995 $msg = "Warning: something's wrong" if $msg eq "";
996 $msg .= $loc unless $msg =~ /\n$/;
997 printf STDERR "%s", $msg;