From ad17537098cbf97f744dc05e136490afa0d6661a Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Tue, 7 Jul 2020 13:21:44 -0700 Subject: [PATCH] ConfigUtil.pm: extract config file reading code In order to reuse the Git config file reading code with minimal overhead, extract it into a separate ConfigUtil.pm file and have Util.pm continue to use and export the same routines as before. Signed-off-by: Kyle J. McKay --- Girocco/ConfigUtil.pm | 159 ++++++++++++++++++++++++++++++++++++++++++++++++++ Girocco/Util.pm | 147 +--------------------------------------------- 2 files changed, 160 insertions(+), 146 deletions(-) create mode 100644 Girocco/ConfigUtil.pm diff --git a/Girocco/ConfigUtil.pm b/Girocco/ConfigUtil.pm new file mode 100644 index 0000000..f3552fa --- /dev/null +++ b/Girocco/ConfigUtil.pm @@ -0,0 +1,159 @@ +package Girocco::ConfigUtil; + +use 5.008; +use strict; +use warnings; + +use Encode; + +BEGIN { + use base qw(Exporter); + our @EXPORT = qw(to_utf8 read_config_file read_config_file_hash git_bool); +} + +my $encoder; +BEGIN { + $encoder = Encode::find_encoding('Windows-1252') || + Encode::find_encoding('ISO-8859-1') or + die "failed to load ISO-8859-1 encoder\n"; +} + +sub to_utf8($;$) { + my ($str, $encode) = @_; + return undef unless defined $str; + my $ans; + if (Encode::is_utf8($str) || utf8::decode($str)) { + $ans = $str; + } else { + $ans = $encoder->decode($str, Encode::FB_DEFAULT); + } + utf8::encode($ans) if $encode; + return $ans; +} + +my $cf_unesc; +BEGIN { + my %escvals = ( + b => "\b", + t => "\t", + n => "\n", + '"' => '"', + '\\' => '\\' + ); + $cf_unesc = sub { + $_[0] =~ s/\\([btn\042\\])/$escvals{$1}/g; + $_[0]; + }; +} + +# mimics Git's config.c git_parse_source function behavior +# returns array of arrayref of key and value +# except that valueless booleans have a value of undef +sub read_config_file { + local $_; + my ($fn, $warn) = @_; + my $li = 0; + my $section = ""; + my @vals = (); + open my $fh, '<', $fn or + $warn && warn("could not open \"$fn\": $!\n"), return(undef); + binmode($fh); + my $bad = sub { + close $fh; + warn "bad config line $li in file $fn\n" if $warn; + return undef; + }; + while (<$fh>) { + ++$li; + s/(?:\r\n|\n)$//; + $_ = to_utf8($_); + s/^\x{feff}// if $li == 1; + utf8::encode($_); + if (/^\s*\[/gc) { + if (/\G([.a-zA-Z0-9-]+)\]/gc) { + $section = lc($1) . "."; + } elsif (/\G([.a-zA-Z0-9-]*)\s+"((?:[^\042\\\n]|\\.)*)"\]/gc) { + $section = lc($1) . "." . + &{sub{my $x=shift; $x =~ s/\\(.)/$1/g; $x}}($2) . "."; + } else { + return &$bad; + } + } + /\G\s+/gc; + next if /\G(?:[;#]|$)/; + if (/\G([a-zA-Z][a-zA-Z0-9-]*)[ \t]*/gc) { + my $k = $section . lc($1); + my $v; + if (/\G$/) { + $v = undef; + } elsif (/\G=\s*/gc) { + $v = ""; + my $qt = 0; + { + if (/\G$/) { + last if !$qt; + return &$bad; + } + if (!$qt && /\G((?:[^"\\\n;#]|\\[btn"\\])+)/gc) { + my $a = $1; + if (/\G[;#]/) { + $_ = ""; + $a =~ s/\s+$//; + } + $a =~ s/\s/ /g; + $v .= &$cf_unesc($a); + } elsif ($qt && /\G((?:[^"\\\n]|\\[btn"\\])+)/gc) { + my $a = $1; + $v .= &$cf_unesc($a); + } elsif (/\G\042/gc) { + $qt = !$qt; + } elsif (!$qt && /\G[;#]/gc) { + $_ = ""; + } elsif (/\G\\$/) { + $_ = <$fh>; + if (defined($_)) { + ++$li; + s/(?:\r\n|\n)$//; + $_ = to_utf8($_, 1); + /^\s+/gc unless $v ne "" || $qt; + } else { + $_ = ""; + } + } else { + return &$bad; + } + redo; + } + } else { + return &$bad; + } + push(@vals, [$k, $v]); + } else { + return &$bad; + } + } + close $fh; + return \@vals; +} + +# Same as read_config_file except that a hashref is returned and +# subsequent same-key-name values replace earlier ones. +# Also valueless booleans are given the value 1 +sub read_config_file_hash { + my $result = read_config_file(@_); + return undef unless defined($result); + my %config = map {($$_[0], defined($$_[1])?$$_[1]:1)} @$result; + return \%config; +} + +# Returns 0 for false, 1 for true, undef for unrecognized or undef +# Unless the optional second argument is true in which case undef returns 1 +sub git_bool { + defined($_[0]) or return $_[1] ? 1 : undef; + my $v = lc($_[0]); + return 0 if $v eq 'false' || $v eq 'off' || $v eq 'no' || $v eq '' || $v =~ /^[-+]?0+$/; + return 1 if $v eq 'true' || $v eq 'on' || $v eq 'yes' || $v =~ /^[-+]?0*[1-9][0-9]*$/; + return undef; +} + +1; diff --git a/Girocco/Util.pm b/Girocco/Util.pm index c140044..61834d3 100644 --- a/Girocco/Util.pm +++ b/Girocco/Util.pm @@ -5,8 +5,8 @@ use strict; use warnings; use Girocco::Config; +use Girocco::ConfigUtil; use Time::Local; -use Encode; BEGIN { use base qw(Exporter); @@ -26,26 +26,6 @@ BEGIN { is_shellish read_HEAD_ref git_add_config); } -my $encoder; -BEGIN { - $encoder = Encode::find_encoding('Windows-1252') || - Encode::find_encoding('ISO-8859-1') or - die "failed to load ISO-8859-1 encoder\n"; -} - -sub to_utf8($;$) { - my ($str, $encode) = @_; - return undef unless defined $str; - my $ans; - if (Encode::is_utf8($str) || utf8::decode($str)) { - $ans = $str; - } else { - $ans = $encoder->decode($str, Encode::FB_DEFAULT); - } - utf8::encode($ans) if $encode; - return $ans; -} - BEGIN {require "Girocco/extra/capture_command.pl"} # Return the entire output sent to stdout from running a command @@ -1060,121 +1040,6 @@ sub read_HEAD_symref { return defined($hv) && $hv =~ m,^refs/., ? $hv : undef; } -my $cf_unesc; -BEGIN { - my %escvals = ( - b => "\b", - t => "\t", - n => "\n", - '"' => '"', - '\\' => '\\' - ); - $cf_unesc = sub { - $_[0] =~ s/\\([btn\042\\])/$escvals{$1}/g; - $_[0]; - }; -} - -# mimics Git's config.c git_parse_source function behavior -# returns array of arrayref of key and value -# except that valueless booleans have a value of undef -sub read_config_file { - local $_; - my ($fn, $warn) = @_; - my $li = 0; - my $section = ""; - my @vals = (); - open my $fh, '<', $fn or - $warn && warn("could not open \"$fn\": $!\n"), return(undef); - binmode($fh); - my $bad = sub { - close $fh; - warn "bad config line $li in file $fn\n" if $warn; - return undef; - }; - while (<$fh>) { - ++$li; - s/(?:\r\n|\n)$//; - $_ = to_utf8($_); - s/^\x{feff}// if $li == 1; - utf8::encode($_); - if (/^\s*\[/gc) { - if (/\G([.a-zA-Z0-9-]+)\]/gc) { - $section = lc($1) . "."; - } elsif (/\G([.a-zA-Z0-9-]*)\s+"((?:[^\042\\\n]|\\.)*)"\]/gc) { - $section = lc($1) . "." . - &{sub{my $x=shift; $x =~ s/\\(.)/$1/g; $x}}($2) . "."; - } else { - return &$bad; - } - } - /\G\s+/gc; - next if /\G(?:[;#]|$)/; - if (/\G([a-zA-Z][a-zA-Z0-9-]*)[ \t]*/gc) { - my $k = $section . lc($1); - my $v; - if (/\G$/) { - $v = undef; - } elsif (/\G=\s*/gc) { - $v = ""; - my $qt = 0; - { - if (/\G$/) { - last if !$qt; - return &$bad; - } - if (!$qt && /\G((?:[^"\\\n;#]|\\[btn"\\])+)/gc) { - my $a = $1; - if (/\G[;#]/) { - $_ = ""; - $a =~ s/\s+$//; - } - $a =~ s/\s/ /g; - $v .= &$cf_unesc($a); - } elsif ($qt && /\G((?:[^"\\\n]|\\[btn"\\])+)/gc) { - my $a = $1; - $v .= &$cf_unesc($a); - } elsif (/\G\042/gc) { - $qt = !$qt; - } elsif (!$qt && /\G[;#]/gc) { - $_ = ""; - } elsif (/\G\\$/) { - $_ = <$fh>; - if (defined($_)) { - ++$li; - s/(?:\r\n|\n)$//; - $_ = to_utf8($_, 1); - /^\s+/gc unless $v ne "" || $qt; - } else { - $_ = ""; - } - } else { - return &$bad; - } - redo; - } - } else { - return &$bad; - } - push(@vals, [$k, $v]); - } else { - return &$bad; - } - } - close $fh; - return \@vals; -} - -# Same as read_config_file except that a hashref is returned and -# subsequent same-key-name values replace earlier ones. -# Also valueless booleans are given the value 1 -sub read_config_file_hash { - my $result = read_config_file(@_); - return undef unless defined($result); - my %config = map {($$_[0], defined($$_[1])?$$_[1]:1)} @$result; - return \%config; -} - # similar to Git's test except that GIT_OBJECT_DIRECTORY is ignored sub is_git_dir { my $gd = shift; @@ -1199,16 +1064,6 @@ sub is_git_dir { return $hv =~ /^[0-9a-f]{40}/; } -# Returns 0 for false, 1 for true, undef for unrecognized or undef -# Unless the optional second argument is true in which case undef returns 1 -sub git_bool { - defined($_[0]) or return $_[1] ? 1 : undef; - my $v = lc($_[0]); - return 0 if $v eq 'false' || $v eq 'off' || $v eq 'no' || $v eq '' || $v =~ /^[-+]?0+$/; - return 1 if $v eq 'true' || $v eq 'on' || $v eq 'yes' || $v =~ /^[-+]?0*[1-9][0-9]*$/; - return undef; -} - # Returns a PATH properly prefixed which guarantees that Git is found and the # basedir/bin utilities are found as intended. $ENV{PATH} is LEFT UNCHANGED! # Caller is responsible for assigning result to $ENV{PATH} or otherwise -- 2.11.4.GIT