From ef5d305d1975c1808521f46d0016b4ed5e730330 Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Fri, 11 Nov 2016 08:52:09 -0800 Subject: [PATCH] Util.pm: add pure perl config file reader Add two new functions that can read a Git config-format file directly without needing to spawn any subprocesses: * read_config_file: preserves ordering and duplicates * read_config_file_hash: convenience when duplicates aren't needed Previously the only option was to spawn a subprocess using `git config --list` possibly with the `--null` option and parse the output. While the speed difference might not be noticed when only reading a single config file for a single project, the difference when reading multiple projects' config files is enormous. Multiple config files can easily be read in less than a tenth the time needed when spawning subprocesses. Typically only about 5% of the time is needed compared to spawning a subprocess. Signed-off-by: Kyle J. McKay --- Girocco/Util.pm | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 124 insertions(+), 1 deletion(-) diff --git a/Girocco/Util.pm b/Girocco/Util.pm index a0c9799..c6ec2fa 100644 --- a/Girocco/Util.pm +++ b/Girocco/Util.pm @@ -21,7 +21,8 @@ BEGIN { calc_windowmemory to_utf8 capture_command human_size calc_bigfilethreshold has_reserved_suffix noFatalsToBrowser calc_redeltathreshold - clean_email_multi); + clean_email_multi read_config_file + read_config_file_hash); } my $encoder; @@ -998,4 +999,126 @@ sub noFatalsToBrowser { }; } +my $cf_unesc; +BEGIN { + my %escvals = ( + b => "\b", + t => "\t", + n => "\n", + '"' => '"', + '\\' => '\\' + ); + $cf_unesc = sub { + $_[0] =~ s/\\([btn\042\\])/$escvals{$1}/g; + $_[0]; + }; +} + +# mimics Git's config.c git_parse_source function behavior +# returns array of arrayref of key and value +# except that valueless booleans have a value of undef +sub read_config_file { + local $_; + my ($fn, $warn) = @_; + my $li = 0; + my $section = ""; + my @vals = (); + open my $fh, '<', $fn or + $warn && warn("could not open \"$fn\": $!\n"), return(undef); + binmode($fh); + my $bad = sub { + close $fh; + warn "bad config line $li in file $fn\n" if $warn; + return undef; + }; + while (<$fh>) { + ++$li; + s/(?:\r\n|\n)$//; + $_ = to_utf8($_); + s/^\x{feff}// if $li == 1; + utf8::encode($_); + if (/^\s*\[(.*)$/) { + my $l = $1; + if ($l =~ /^([.a-zA-Z0-9-]+)\](.*)$/) { + $section = lc($1) . "."; + $_ = $2; + } elsif ($l =~ /^([.a-zA-Z0-9-]*)\s+"((?:[^\042\\\n]|\\.)*)"\](.*)$/) { + $section = lc($1) . "." . + &{sub{my $x=shift; $x =~ s/\\(.)/$1/g; $x}}($2) . "."; + $_ = $3; + } else { + return &$bad; + } + } + next if /^\s*(?:[;#]|$)/; + s/^\s+//; + if (/^([a-zA-Z][a-zA-Z0-9-]*)[ \t]*(.*)$/) { + my $k = $section . lc($1); + my $v = $2; + if ($v eq "") { + $v = undef; + } elsif ($v =~ /^=\s*(.*)$/) { + my $pd = $1; + $v = ""; + my $qt = 0; + { + if ($pd eq "") { + last if !$qt; + return &$bad; + } + if (!$qt && $pd =~ /^((?:[^"\\\n;#]|\\[btn"\\])+)/) { + my $a = $1; + $pd = substr($pd, length($a)); + if ($pd =~ /^[;#]/) { + $pd = ""; + $a =~ s/\s+$//; + } + $a =~ s/\s/ /g; + $v .= &$cf_unesc($a); + } elsif ($qt && $pd =~ /^((?:[^"\\\n]|\\[btn"\\])+)/) { + my $a = $1; + $pd = substr($pd, length($a)); + $v .= &$cf_unesc($a); + } elsif ($pd =~ /^\042/) { + $qt = !$qt; + $pd = substr($pd, 1); + } elsif (!$qt && $pd =~ /^[;#]/) { + $pd = ""; + } elsif ($pd eq "\\") { + $pd = <$fh>; + if (defined($pd)) { + ++$li; + $pd =~ s/(?:\r\n|\n)$//; + $pd = to_utf8($pd, 1); + $pd =~ s/^\s+// unless $v ne "" || $qt; + } else { + $pd = ""; + } + } else { + return &$bad; + } + redo; + } + } else { + return &$bad; + } + push(@vals, [$k, $v]); + } else { + return &$bad; + } + } + close $fh; + return \@vals; +} + +# Same as read_config_file except that a hashref is returned and +# subsequent same-key-name values replace earlier ones. +# Also valueless booleans are given the value 1 +sub read_config_file_hash { + my $result = read_config_file(@_); + return undef unless defined($result); + my %config = map {($$_[0], defined($$_[1])?$$_[1]:1)} @$result; + return \%config; +} + 1; -- 2.11.4.GIT