From cdb317fcf976d653bd605a8a70d83d97b7a43cc5 Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Sat, 12 Nov 2016 19:01:11 -0800 Subject: [PATCH] tags: implement tag folding and refactor code The tag cloud is most useful if tags are counted in a case-insensitive fashion. Arrange to automatically lower-case tags by default. However, any explicitly white-listed tags will have the case of the white-listed entry used instead. For example, if the tag "Git" was explicitly whitelisted and tag folding (the new $Girocco::Config::foldtags option) is enabled (it is by default) then adding any of the tag names "git", "giT", "gIt", "gIT", "Git", "GiT", "GIt" or "GIT" will all result in incrementing the same tag named "Git". If tag folding is enabled and "Git" is NOT explicitly whitelisted (the default) then any of those same eight tags would end up incrementing the tag named "git" instead. If tag folding is disabled then adding those eight different tags will end up incrementing eight different tags (unless the file system is case-insensitive). As part of this change, the tag incrementing logic has been moved from tagproj.cgi into a new Girocco::Project::add_ctag member function to match the already existing delete_ctag member function. Now tagproj.cgi calls add_ctag to add tags. Signed-off-by: Kyle J. McKay --- Girocco/Config.pm | 13 +++++++++++-- Girocco/Project.pm | 31 ++++++++++++++++++++++++++++++- Girocco/Util.pm | 43 +++++++++++++++++++++++++------------------ cgi/tagproj.cgi | 22 +++++++--------------- 4 files changed, 73 insertions(+), 36 deletions(-) diff --git a/Girocco/Config.pm b/Girocco/Config.pm index 032b792..e26b3f2 100644 --- a/Girocco/Config.pm +++ b/Girocco/Config.pm @@ -757,11 +757,20 @@ our @throttle_classes = (); # Any tag names listed here will be allowed even if they would otherwise not be. # Note that @allowed_tags takes precedence over @blocked_tags. -our @allowed_tags = (); +our @allowed_tags = (qw( )); # Any tag names listed here will be disallowed in addition to the standard # list of nonsense words etc. that are blocked as tags. -our @blocked_tags = (); +our @blocked_tags = (qw( )); + +# Case folding tags +# If this setting is true, then tags that differ only in case will always use +# the same-cased version. If this setting is enabled and the tag is present in +# @allowed_tags (or the embedded white list in Util.pm) then the case of the +# tag will match the white list entry otherwise it will be all lowercased. +# If this setting is disabled (false) tags are used with their case left as-is. +# RECOMMENDED VALUE: 1 (true) +our $foldtags = 1; # If there are no more than this many objects, then all deltas will be # recomputed when gc takes place. Note that this does not affect any diff --git a/Girocco/Project.pm b/Girocco/Project.pm index 75cb7d5..e86c570 100644 --- a/Girocco/Project.pm +++ b/Girocco/Project.pm @@ -1296,7 +1296,36 @@ sub delete_ctag { my ($ctag) = @_; # sanity check, disallow filenames starting with . .. or / - unlink($self->{path}.'/ctags/'.$ctag) if($ctag !~ m|^(\.\.?)/|); + unlink($self->{path}.'/ctags/'.$ctag) + unless !defined($ctag) || $ctag =~ m|^/| || $ctag =~ m{(?:^|/)(?:\.\.?)(?:/|$)}; +} + +# returns new tag count value on success (will always be >= 1) otherwise undef +sub add_ctag { + my $self = shift; + my $ctag = valid_tag(shift); + my $nochanged = shift; + + # sanity check, disallow filenames starting with . .. or / + return undef if !defined($ctag) || $ctag =~ m|^/| || $ctag =~ m{(?:^|/)(?:\.\.?)(?:/|$)}; + + my $val = 0; + my $ct; + if (open $ct, '<', $self->{path}."/ctags/$ctag") { + my $count = <$ct>; + close $ct; + defined $count or $count = ''; + chomp $count; + $val = $count =~ /^[1-9]\d*$/ ? $count : 1; + } + ++$val; + my $oldmask = umask(); + umask($oldmask & ~0060); + open $ct, '>', $self->{path}."/ctags/$ctag" and print $ct $val."\n" and close $ct; + $self->_set_changed unless $nochanged; + $self->_set_forkchange unless $nochanged; + umask($oldmask); + return $val; } sub get_ctag_names { diff --git a/Girocco/Util.pm b/Girocco/Util.pm index 62b8fa0..bc235ba 100644 --- a/Girocco/Util.pm +++ b/Girocco/Util.pm @@ -328,18 +328,15 @@ sub is_our_hostname { return $names{lc($test)} ? 1 : 0; } -my (%_oktags, %_badtags); +my (%_oktags, %_badtags, %_canontags, $_canontagscreated, @_whitetags); BEGIN { - # entries MUST be all lowercase to be effective - %_oktags = ( - # These are always okay (a "whitelist") even if they would otherwise - # not be allowed - ".net"=>1, "2d"=>1, "3d"=>1, "6502"=>1, "68000"=>1, "68008"=>1, - "68010"=>1, "68020"=>1, "68030"=>1, "68040"=>1, "68060"=>1, - "8086"=>1, "80286"=>1, "80386"=>1, "80486"=>1, "80586"=>1, - c=>1, cc=>1, make=>1, www=>1, x=>1 - ); - map({$_oktags{lc($_)}=1} @Girocco::Config::allowed_tags); + # These are always okay (a "whitelist") even if they would + # otherwise not be allowed + @_whitetags = (qw( + .net 2d 3d 6502 68000 68008 68010 68020 68030 68040 68060 + 8086 80286 80386 80486 80586 c cc make www x + )); + map({$_oktags{lc($_)}=1} @_whitetags, @Girocco::Config::allowed_tags); # entries MUST be all lowercase to be effective %_badtags = ( # These are "nonsense" or pointless tags @@ -389,14 +386,24 @@ BEGIN { # letter, must not be a noise word, must be more than one character long, # must not be a repeated letter and must be no more than 32 characters long. # However, anything in %_oktags is explicitly allowed even if it otherwise -# would violate the rules. +# would violate the rules (except that none of [,\s\\\/] are allowed in tags). +# Returns the canonical name for the tag if the tag is valid otherwise undef. sub valid_tag { - local $_ = $_[0] || ''; - return 1 if $_oktags{lc($_)}; - return 0 unless /^[a-zA-Z][a-zA-Z0-9:.+#_-]+$/; - return 0 if $_badtags{lc($_)}; - return 0 if /^(.)\1+$/; - return length($_) <= 32 ? 1 : 0; + local $_ = $_[0]; + return undef unless defined($_) && $_ ne "" && !/[,\s\/\\]/; + my $fold = $Girocco::Config::foldtags; + if ($fold && !$_canontagscreated) { + local $_; + %_canontags = (); + $_canontags{lc($_)} = $_ foreach sort({$b cmp $a} @_whitetags, @Girocco::Config::allowed_tags); + $_canontagscreated = 1; + } + return $_canontags{lc($_)} if $fold && exists($_canontags{lc($_)}); + return ($fold ? lc($_) : $_) if $_oktags{lc($_)}; + return undef unless /^[a-zA-Z][a-zA-Z0-9:.+#_-]+$/; + return undef if $_badtags{lc($_)}; + return undef if /^(.)\1+$/; + return length($_) <= 32 ? ($fold ? lc($_) : $_) : undef; } # If the passed in argument looks like a URL, return only the stuff up through diff --git a/cgi/tagproj.cgi b/cgi/tagproj.cgi index c90f2e2..b4023e5 100755 --- a/cgi/tagproj.cgi +++ b/cgi/tagproj.cgi @@ -40,24 +40,16 @@ if ($ctags =~ /[^ a-zA-Z0-9:.+#_-]/) { exit; } +my $oldmask = umask(); +umask($oldmask & ~0060); +my $changed; foreach my $ctag (split(/ /, $ctags)) { - # Locking is not important - valid_tag($ctag) or next; - my $val = 0; - my $ct; - if (open $ct, '<', $proj->{path}."/ctags/$ctag") { - my $count = <$ct>; - close $ct; - defined $count or $count = ''; - chomp $count; - $val = $count =~ /^[1-9]\d*$/ ? $count : 1; - } - my $oldmask = umask(); - umask($oldmask & ~0060); - open $ct, '>', $proj->{path}."/ctags/$ctag" and print $ct ($val+1)."\n" and close $ct; + $changed = 1 if $proj->add_ctag($ctag, 1); +} +if ($changed) { $proj->_set_changed; $proj->_set_forkchange; - umask($oldmask); } +umask($oldmask); print $cgi->header(-status=>303, -location=>"@{[url_path($Girocco::Config::gitweburl)]}/$pname.git"); -- 2.11.4.GIT