mirroring: add support for hg+https? mirror source URLs
[girocco.git] / Girocco / Util.pm
blob9c26a98b3ec25443cea002644f5b6fc04e47dfff
1 package Girocco::Util;
3 use strict;
4 use warnings;
6 use Girocco::Config;
7 use Time::Local;
9 BEGIN {
10 use base qw(Exporter);
11 our @EXPORT = qw(scrypt jailed_file sendmail_pipe mailer_pipe
12 lock_file unlock_file valid_tag rand_adjust
13 filedb_atomic_append filedb_atomic_edit filedb_grep
14 filedb_atomic_grep valid_email valid_email_multi
15 valid_repo_url valid_web_url url_base url_path url_server
16 projects_html_list parse_rfc2822_date parse_any_date);
20 sub scrypt {
21 my ($pwd) = @_;
22 crypt($pwd||'', join ('', ('.', '/', 0..9, 'A'..'Z', 'a'..'z')[rand 64, rand 64]));
25 sub jailed_file {
26 my ($filename) = @_;
27 $filename =~ s,^/,,;
28 $Girocco::Config::chroot."/$filename";
31 sub lock_file {
32 my ($path) = @_;
34 $path .= '.lock';
36 use Errno qw(EEXIST);
37 use Fcntl qw(O_WRONLY O_CREAT O_EXCL);
38 use IO::Handle;
39 my $handle = new IO::Handle;
41 unless (sysopen($handle, $path, O_WRONLY|O_CREAT|O_EXCL)) {
42 my $cnt = 0;
43 while (not sysopen($handle, $path, O_WRONLY|O_CREAT|O_EXCL)) {
44 ($! == EEXIST) or die "$path open failed: $!";
45 ($cnt++ < 16) or die "$path open failed: cannot open lockfile";
46 sleep(1);
49 # XXX: filedb-specific
50 chmod 0664, $path or die "$path g+w failed: $!";
52 $handle;
55 sub _is_passwd_file {
56 return defined($_[0]) && $_[0] eq jailed_file('/etc/passwd');
59 sub _run_update_pwd_db {
60 my ($path, $updatearg) = @_;
61 my @cmd = ($Girocco::Config::basedir.'/bin/update-pwd-db', "$path");
62 push(@cmd, $updatearg) if $updatearg;
63 system(@cmd) == 0 or die "update-pwd-db failed: $?";
66 sub unlock_file {
67 my ($path, $noreplace, $updatearg) = @_;
69 if (!$noreplace) {
70 _run_update_pwd_db("$path.lock", $updatearg)
71 if $Girocco::Config::update_pwd_db && _is_passwd_file($path);
72 rename "$path.lock", $path or die "$path unlock failed: $!";
73 } else {
74 unlink "$path.lock" or die "$path unlock failed: $!";
78 sub filedb_atomic_append {
79 my ($file, $line, $updatearg) = @_;
80 my $id = 65536;
82 open my $src, '<', $file or die "$file open for reading failed: $!";
83 my $dst = lock_file($file);
85 while (<$src>) {
86 my $aid = (split /:/)[2];
87 $id = $aid + 1 if ($aid >= $id);
89 print $dst $_ or die "$file(l) write failed: $!";
92 $line =~ s/\\i/$id/g;
93 print $dst "$line\n" or die "$file(l) write failed: $!";
95 close $dst or die "$file(l) close failed: $!";
96 close $src;
98 unlock_file($file, 0, $updatearg);
100 $id;
103 sub filedb_atomic_edit {
104 my ($file, $fn, $updatearg) = @_;
106 open my $src, '<', $file or die "$file open for reading failed: $!";
107 my $dst = lock_file($file);
109 while (<$src>) {
110 print $dst $fn->($_) or die "$file(l) write failed: $!";
113 close $dst or die "$file(l) close failed: $!";
114 close $src;
116 unlock_file($file, 0, $updatearg);
119 sub filedb_atomic_grep {
120 my ($file, $fn) = @_;
121 my @results = ();
123 open my $src, '<', $file or die "$file open for reading failed: $!";
124 my $dst = lock_file($file);
126 while (<$src>) {
127 my $result = $fn->($_);
128 push(@results, $result) if $result;
131 close $dst or die "$file(l) close failed: $!";
132 close $src;
134 unlock_file($file, 1);
135 return @results;
138 sub filedb_grep {
139 my ($file, $fn) = @_;
140 my @results = ();
142 open my $src, '<', $file or die "$file open for reading failed: $!";
144 while (<$src>) {
145 my $result = $fn->($_);
146 push(@results, $result) if $result;
149 close $src;
151 return @results;
154 sub valid_email {
155 local $_ = $_[0];
156 /^[a-zA-Z0-9+._-]+@[a-zA-Z0-9.-]+$/;
158 sub valid_email_multi {
159 local $_ = $_[0];
160 # More relaxed, we just want to avoid too dangerous characters.
161 /^[a-zA-Z0-9+._, @-]+$/;
163 sub valid_web_url {
164 local $_ = $_[0];
165 /^https?:\/\/[a-zA-Z0-9.:-]+(\/[_\%a-zA-Z0-9.\/~:?&=;-]*)?(#[a-zA-Z0-9._-]+)?$/;
167 sub valid_repo_url {
168 my $url = shift || '';
169 # Currently neither username nor password is allowed in the URL and IPv6
170 # literal addresses are not accepted either.
171 $Girocco::Config::mirror_svn &&
172 $url =~ /^svn(\+https?)?:\/\/[a-zA-Z0-9.:-]+(\/[_\%a-zA-Z0-9.\/~-]*)?$/os
173 and return 1;
174 $Girocco::Config::mirror_darcs &&
175 $url =~ /^darcs:\/\/[a-zA-Z0-9.:-]+(\/[_\%a-zA-Z0-9.\/~-]*)?$/os
176 and return 1;
177 $Girocco::Config::mirror_bzr &&
178 $url =~ /^bzr:\/\/[a-zA-Z0-9.:-]+(\/[_\%a-zA-Z0-9.\/~-]*)?$/os
179 and return 1;
180 $Girocco::Config::mirror_hg &&
181 $url =~ /^hg\+https?:\/\/[a-zA-Z0-9.:-]+(\/[_\%a-zA-Z0-9.\/~-]*)?$/os
182 and return 1;
183 return $url =~ /^(https?|git):\/\/[a-zA-Z0-9.:-]+(\/[_\%a-zA-Z0-9.\/~-]*)?$/;
185 my %_badtags;
186 BEGIN {
187 %_badtags = (
188 about=>1, after=>1, all=>1, also=>1, an=>1, and=>1, another=>1, any=>1,
189 are=>1, as=>1, at=>1, be=>1, because=>1, been=>1, before=>1, being=>1,
190 between=>1, both=>1, but=>1, by=>1, came=>1, can=>1, come=>1, could=>1,
191 did=>1, do=>1, each=>1, for=>1, from=>1, get=>1, got=>1, had=>1, has=>1,
192 have=>1, he=>1, her=>1, here=>1, him=>1, himself=>1, his=>1, how=>1,
193 if=>1, in=>1, into=>1, is=>1, it=>1, like=>1, make=>1, many=>1, me=>1,
194 might=>1, more=>1, most=>1, much=>1, must=>1, my=>1, never=>1, now=>1,
195 of=>1, on=>1, only=>1, or=>1, other=>1, our=>1, out=>1, over=>1,
196 said=>1, same=>1, see=>1, should=>1, since=>1, some=>1, still=>1,
197 such=>1, take=>1, than=>1, that=>1, the=>1, their=>1, them=>1, then=>1,
198 there=>1, these=>1, they=>1, this=>1, those=>1, through=>1, to=>1,
199 too=>1, under=>1, up=>1, very=>1, was=>1, way=>1, we=>1, well=>1,
200 were=>1, what=>1, where=>1, which=>1, while=>1, who=>1, with=>1,
201 would=>1, you=>1, your=>1
204 # A valid tag must only have [a-zA-Z0-9:.+#_-] characters, must start with a
205 # letter, must not be a noise word and except for 'C' must be more than one
206 # character long and no more than 32 characters long.
207 sub valid_tag {
208 local $_ = $_[0] || '';
209 return 1 if $_ eq 'C'; # Currently only allowed single letter tag
210 return 0 unless /^[a-zA-Z][a-zA-Z0-9:.+#_-]+$/;
211 return 0 if $_badtags{lc($_)};
212 return length($_) <= 32 ? 1 : 0;
215 # If the passed in argument looks like a URL, return only the stuff up through
216 # the host:port part otherwise return the entire argument.
217 sub url_base {
218 my $url = shift || '';
219 # See RFC 3968
220 $url = $1.$2.$3.$4 if $url =~ m,^( [A-Za-z][A-Za-z0-9+.-]*: ) # scheme
221 ( // ) # // separator
222 ((?:[^\@]+\@)?) # optional userinfo
223 ( [^/?#]+ ) # host and port
224 (?:[/?#].*)?$,x; # path and optional query string and/or anchor
225 return $url;
228 # If the passed in argument looks like a URL, return only the stuff following
229 # the host:port part otherwise return the entire argument.
230 sub url_path {
231 my $url = shift || '';
232 my $no_empty = shift || 0;
233 # See RFC 3968
234 $url = $1 if $url =~ m,^(?: [A-Za-z][A-Za-z0-9+.-]*: ) # scheme
235 (?: // ) # // separator
236 (?: [^\@]+\@ )? # optional userinfo
237 (?: [^/?#]+ ) # host and port
238 ((?:[/?#].*)?)$,x; # path and optional query string and/or anchor
239 $url = '/' if $no_empty && $url eq '';
240 return $url;
243 # If both SERVER_NAME and SERVER_PORT are set pass the argument through url_path
244 # and then prefix it with the appropriate scheme (HTTPS=?on), host and port and
245 # return it. If a something that doesn't look like it could be the start of a
246 # URL path comes back from url_path or SERVER_NAME is a link-local IPv6 address
247 # then just return the argument unchanged.
248 sub url_server {
249 my $url = shift || '';
250 my $path = url_path($url);
251 return $url unless $path eq '' || $path =~ m|^[/?#]|;
252 return $url unless $ENV{'SERVER_NAME'} && $ENV{'SERVER_PORT'} &&
253 $ENV{'SERVER_PORT'} =~ /^[1-9][0-9]{0,4}$/;
254 return $url if $ENV{'SERVER_NAME'} =~ /^[[]?fe80:/i;
255 my $server = $ENV{'SERVER_NAME'};
256 # Deal with Apache bug where IPv6 literal server names do not include
257 # the required surrounding '[' and ']' characters
258 $server = '[' . $server . ']' if $server =~ /:/ && $server !~ /^[[]/;
259 my $ishttps = $ENV{'HTTPS'} && $ENV{'HTTPS'} =~ /^on$/i;
260 my $portnum = 0 + $ENV{'SERVER_PORT'};
261 my $port = '';
262 if (($ishttps && $portnum != 443) || (!$ishttps && $portnum != 80)) {
263 $port = ':' . $portnum;
265 return 'http' . ($ishttps ? 's' : '') . '://' . $server . $port . $path;
268 sub _escapeHTML {
269 my $str = shift;
270 $str =~ s/\&/\&amp;/gs;
271 $str =~ s/\</\&lt;/gs;
272 $str =~ s/\>/\&gt;/gs;
273 $str =~ s/\"/\&quot;/gs; #"
274 return $str;
277 # create relative time string from passed in age in seconds
278 sub _rel_age {
279 my $age = shift;
280 my $age_str;
282 if ($age > 60*60*24*365*2) {
283 $age_str = (int $age/60/60/24/365);
284 $age_str .= " years ago";
285 } elsif ($age > 60*60*24*(365/12)*2) {
286 $age_str = int $age/60/60/24/(365/12);
287 $age_str .= " months ago";
288 } elsif ($age > 60*60*24*7*2) {
289 $age_str = int $age/60/60/24/7;
290 $age_str .= " weeks ago";
291 } elsif ($age > 60*60*24*2) {
292 $age_str = int $age/60/60/24;
293 $age_str .= " days ago";
294 } elsif ($age > 60*60*2) {
295 $age_str = int $age/60/60;
296 $age_str .= " hours ago";
297 } elsif ($age > 60*2) {
298 $age_str = int $age/60;
299 $age_str .= " mins ago";
300 } elsif ($age > 2) {
301 $age_str = int $age;
302 $age_str .= " secs ago";
303 } elsif ($age >= 0) {
304 $age_str = "right now";
305 } else {
306 $age_str = "future time";
308 return $age_str;
311 # create relative time string from passed in idle in seconds
312 sub _rel_idle {
313 my $idle_str = _rel_age(shift);
314 $idle_str =~ s/ ago//;
315 $idle_str = "not at all" if $idle_str eq "right now";
316 return $idle_str;
319 sub _strftime {
320 use POSIX qw(strftime);
321 my ($fmt, $secs, $zonesecs) = @_;
322 my ($S,$M,$H,$d,$m,$y) = gmtime($secs + $zonesecs);
323 $zonesecs = int($zonesecs / 60);
324 $fmt =~ s/%z/\$z/g;
325 my $ans = strftime($fmt, $S, $M, $H, $d, $m, $y, -1, -1, -1);
326 my $z;
327 if ($zonesecs < 0) {
328 $z = "-";
329 $zonesecs = -$zonesecs;
330 } else {
331 $z = "+";
333 $z .= sprintf("%02d%02d", int($zonesecs/60), $zonesecs % 60);
334 $ans =~ s/\$z/$z/g;
335 return $ans;
338 # Take a list of project names and produce a nicely formated table that
339 # includes owner links and descriptions. If the list is empty returns ''.
340 # The first argument may be a hash ref that contains options. The following
341 # options are available:
342 # target -- sets the target value of the owner link
343 # emptyok -- if true returns an empty table rather than ''
344 # typecol -- if true include type column with hover info
345 # changed -- if true include a changed and idle column
346 sub projects_html_list {
347 my $options = {};
348 if (defined($_[0]) && ref($_[0]) eq 'HASH') {
349 $options = shift;
351 return '' unless @_ || (defined($options->{emptyok}) && $options->{emptyok});
352 require Girocco::Project;
353 my $count = 0;
354 my $target = '';
355 $target = " target=\""._escapeHTML($options->{target})."\""
356 if defined($options->{target});
357 my $withtype = defined($options->{typecol}) && $options->{typecol};
358 my $withchanged = defined($options->{changed}) && $options->{changed};
359 my $typehead = '';
360 $typehead = '<th>Type</th>' if $withtype;
361 my $chghead = '';
362 $chghead = substr(<<EOT, 0, -1) if $withchanged;
363 <th><span class="hover">Changed<span><span class="head">Changed</span
364 />The last time a ref change was received by this site.</span></span></th
365 ><th><span class="hover">Idle<span><span class="head">Idle</span
366 />The most recent committer time in <i>refs/heads</i>.</span></span></th
369 my $html = <<EOT;
370 <table class='projectlist'><tr><th>Project</th>$typehead$chghead<th class="desc">Description</th></tr>
372 my $trclass = ' class="odd"';
373 foreach (sort({lc($a) cmp lc($b)} @_)) {
374 if (Girocco::Project::does_exist($_, 1)) {
375 my $proj = Girocco::Project->load($_);
376 my $projname = $proj->{name}.".git";
377 my $projdesc = $proj->{desc}||'';
378 utf8::decode($projdesc) if utf8::valid($projdesc);
379 my $typecol = '';
380 if ($withtype) {
381 if ($proj->{mirror}) {
382 $typecol = substr(<<EOT, 0, -1);
383 <td class="type"><span class="hover">mirror<span class="nowrap">@{[_escapeHTML($proj->{url})]}</span></span></td>
385 } else {
386 my $users = @{$proj->{users}};
387 $users .= ' user';
388 $users .= 's' unless @{$proj->{users}} == 1;
389 my $userlist = join(', ', sort({lc($a) cmp lc($b)} @{$proj->{users}}));
390 my $spncls = length($userlist) > 25 ? '' : ' class="nowrap"';
391 $typecol = $userlist ? substr(<<EOT, 0, -1) : substr(<<EOT, 0, -1);
392 <td class="type"><span class="hover">$users<span$spncls>$userlist</span></span></td>
394 <td class="type">$users</td>
398 my $changecol = '';
399 if ($withchanged) {
400 my $rel = '';
401 my $changetime = $proj->{lastchange};
402 if ($changetime) {
403 $rel = "<span class=\"hover\">" .
404 _rel_age(time - parse_rfc2822_date($changetime)) .
405 "<span class=\"nowrap\">$changetime</span></span>";
406 } else {
407 $rel = "no commits";
409 $changecol = substr(<<EOT, 0, -1);
410 <td class="change">$rel</td>
412 my $idletime = $proj->{lastactivity};
413 my ($idlesecs, $tz);
414 $idlesecs = parse_any_date($idletime, \$tz) if $idletime;
415 if ($idlesecs) {
416 my $idle2822 = _strftime("%a, %d %b %Y %T %z", $idlesecs, $tz);
417 $rel = "<span class=\"hover\">" .
418 _rel_idle(time - $idlesecs) .
419 "<span class=\"nowrap\">$idle2822</span></span>";
420 } else {
421 $rel = "no commits";
423 $changecol .= substr(<<EOT, 0, -1);
424 <td class="idle">$rel</td>
427 $html .= <<EOT;
428 <tr$trclass><td><a href="@{[url_path($Girocco::Config::gitweburl)]}/$projname"$target
429 >@{[_escapeHTML($projname)]}</td>$typecol$changecol<td>@{[_escapeHTML($projdesc)]}</td></tr>
431 $trclass = $trclass ? '' : ' class="odd"';
432 ++$count;
435 $html .= <<EOT;
436 </table>
438 return ($count || (defined($options->{emptyok}) && $options->{emptyok})) ? $html : '';
441 my %_month_names;
442 BEGIN {
443 %_month_names = (
444 jan => 0, feb => 1, mar => 2, apr => 3, may => 4, jun => 5,
445 jul => 6, aug => 7, sep => 8, oct => 9, nov => 10, dec => 11
449 # Should be in "date '+%a, %d %b %Y %T %z'" format as saved to lastgc, lastrefresh and lastchange
450 # The leading "%a, " is optional, returns undef if unrecognized date. This is also known as
451 # RFC 2822 date format and git's '%cD', '%aD' and --date=rfc2822 format.
452 # If the second argument is a SCALAR ref, its value will be set to the TZ offset in seconds
453 sub parse_rfc2822_date {
454 my $dstr = shift || '';
455 my $tzoff = shift || '';
456 $dstr = $1 if $dstr =~/^[^\s]+,\s*(.*)$/;
457 return undef unless $dstr =~
458 /^\s*(\d{1,2})\s+([A-Za-z]{3})\s+(\d{4})\s+(\d{1,2}):(\d{2}):(\d{2})\s+([+-]\d{4})\s*$/;
459 my ($d,$b,$Y,$H,$M,$S,$z) = ($1,$2,$3,$4,$5,$6,$7);
460 my $m = $_month_names{lc($b)};
461 return undef unless defined($m);
462 my $seconds = timegm(0+$S, 0+$M, 0+$H, 0+$d, 0+$m, $Y-1900);
463 my $offset = 60 * (60 * (0+substr($z,1,2)) + (0+substr($z,3,2)));
464 $offset = -$offset if substr($z,0,1) eq '-';
465 $$tzoff = $offset if ref($tzoff) eq 'SCALAR';
466 return $seconds - $offset;
469 # Will parse any supported date format. Actually there are three formats
470 # currently supported:
471 # 1. RFC 2822 (uses parse_rfc2822_date)
472 # 2. RFC 3339 / ISO 8601 (T may be ' ', 'Z' is optional, ':' optional in TZ)
473 # 3. unix seconds since epoch with optional +/- trailing TZ (may not have a ':')
474 # Returns undef if unsupported date.
475 # If the second argument is a SCALAR ref, its value will be set to the TZ offset in seconds
476 sub parse_any_date {
477 my $dstr = shift || '';
478 my $tzoff = shift || '';
479 if ($dstr =~ /^\s*([-+]?\d+)(?:\s+([-+]\d{4}))?\s*$/) {
480 # Unix timestamp
481 my $ts = 0 + $1;
482 my $off = 0;
483 if ($2) {
484 my $z = $2;
485 $off = 60 * (60 * (0+substr($z,1,2)) + (0+substr($z,3,2)));
486 $off = -$off if substr($z,0,1) eq '-';
488 $$tzoff = $off if ref($tzoff) eq 'SCALAR';
489 return $ts;
491 if ($dstr =~ /^\s*(\d{4})-(\d{2})-(\d{2})[Tt ](\d{2}):(\d{2}):(\d{2})(?:[ ]([Zz]|(?:[-+]\d{2}:?\d{2})))?\s*$/) {
492 my ($Y,$m,$d,$H,$M,$S,$z) = ($1,$2,$3,$4,$5,$6,$7||'');
493 my $seconds = timegm(0+$S, 0+$M, 0+$H, 0+$d, $m-1, $Y-1900);
494 $z =~ s/://;
495 my $off = 0;
496 if (uc($z) ne 'Z') {
497 $off = 60 * (60 * (0+substr($z,1,2)) + (0+substr($z,3,2)));
498 $off = -$off if substr($z,0,1) eq '-';
500 $$tzoff = $off if ref($tzoff) eq 'SCALAR';
501 return $seconds - $off;
503 return parse_rfc2822_date($dstr, $tzoff);
506 # Input is a number such as a minute interval
507 # Return value is a random number between the input and 1.25*input
508 # This can be used to randomize the update and gc operations a bit to avoid
509 # having them all end up all clustered together
510 sub rand_adjust {
511 my $input = shift || 0;
512 return $input unless $input;
513 return $input + int(rand(0.25 * $input));
516 # Open a pipe to a new sendmail process. The '-i' option is always passed to
517 # the new process followed by any addtional arguments passed in. Note that
518 # the sendmail process is only expected to understand the '-i', '-t' and '-f'
519 # options. Using any other options via this function is not guaranteed to work.
520 # A list of recipients may follow the options. Combining a list of recipients
521 # with the '-t' option is not recommended.
522 sub sendmail_pipe {
523 return undef unless @_;
524 die "\$Girocco::Config::sendmail_bin is unset or not executable!\n"
525 unless $Girocco::Config::sendmail_bin && -x $Girocco::Config::sendmail_bin;
526 my $result = open(my $pipe, '|-', $Girocco::Config::sendmail_bin, '-i', @_);
527 return $result ? $pipe : undef;
530 # Open a pipe that works similarly to a mailer such as /usr/bin/mail in that
531 # if the first argument is '-s', a subject line will be automatically added
532 # (using the second argument as the subject). Any remaining arguments are
533 # expected to be recipient addresses that will be added to an explicit To:
534 # line as well as passed on to sendmail_pipe. In addition an
535 # "Auto-Submitted: auto-generated" header is always added as well as a suitable
536 # "From:" header.
537 sub mailer_pipe {
538 my $subject = undef;
539 if (@_ >= 2 && $_[0] eq '-s') {
540 shift;
541 $subject = shift;
543 my $tolist = join(", ", @_);
544 unshift(@_, '-f', $Girocco::Config::sender) if $Girocco::Config::sender;
545 my $pipe = sendmail_pipe(@_);
546 if ($pipe) {
547 print $pipe "From: \"$Girocco::Config::name\" ",
548 "($Girocco::Config::title) ",
549 "<$Girocco::Config::admin>\n";
550 print $pipe "To: $tolist\n";
551 print $pipe "Subject: $subject\n" if defined($subject);
552 print $pipe "Auto-Submitted: auto-generated\n";
553 print $pipe "\n";
555 return $pipe;