From eea0e91db739f65ff6ceea880560f9d48c85487a Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Mon, 8 Feb 2021 00:26:02 -0700 Subject: [PATCH] Girocco/Util.pm: add from_json function With the recent addition of the to_json function, might as well have a matching inverse from_json function. Go ahead and add a from_json function that's the inverse of to_json. Signed-off-by: Kyle J. McKay --- Girocco/Util.pm | 134 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 133 insertions(+), 1 deletion(-) diff --git a/Girocco/Util.pm b/Girocco/Util.pm index 96c7c7d..165bc1a 100644 --- a/Girocco/Util.pm +++ b/Girocco/Util.pm @@ -26,7 +26,7 @@ BEGIN { clean_email_multi read_HEAD_symref read_config_file read_config_file_hash is_git_dir git_bool util_path is_shellish read_HEAD_ref git_add_config to_json - json_bool); + json_bool from_json); } BEGIN {require "Girocco/extra/capture_command.pl"} @@ -1243,4 +1243,136 @@ sub _json_hash { return $ans; } +# returns undef on error and sets $@ (otherwise $@ cleared) +# if the JSON string to decode is "null" then undef is returned and $@ eq "" +# $_[0] -> string value to decode from JSON +# $_[1] -> if true return integers instead of json_bool for true/false +# $_[2] -> if true strings are utf8::encode'd (i.e. they're bytes not chars) +# returns scalar which will be an ARRAY or HASH ref for JSON array or hash values +# using to_json(from_json($json_value)) will somewhat "normalize" $json_value +# (and optionally pretty it up) and always recombine valid surrogate pairs +sub from_json { + my $ans = undef; + eval {$ans = _from_jsonx(@_)}; + return $ans; +} + +# will die on bad input +sub _from_jsonx { + my ($val, $nobool, $enc) = @_; + defined($val) or return undef; + my $l = length($val); + pos($val) = 0; + my $atom = _from_json_value(\$val, $l, $nobool, $enc); + $val =~ /\G\s+/gc; + pos($val) >= $l or + die "garbage found at offset ".pos($val); + return $atom; +} + +sub _from_json_value { + my ($val, $l, $nobool, $enc) = @_; + $$val =~ /\G\s+/gc; + my $c = substr($$val, pos($$val), 1); + $c eq "" and die "unexpected end of input at offset ".pos($$val); + $c eq "{" and return _from_json_hash($val, $l, $nobool, $enc); + $c eq "[" and return _from_json_array($val, $l, $nobool, $enc); + $c eq '"' and return _from_json_str($val, $enc); + index("-0123456789", $c) >= 0 and do { + $$val =~ /\G(-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][-+]?\d+)?)/gc and + return int($1) == $1 ? int($1) : $1; + die "invalid JSON number at offset ".pos($$val); + }; + $$val =~ /\Gnull\b/gc and return undef; + $$val =~ /\Gtrue\b/gc and return $nobool?1:json_bool(1); + $$val =~ /\Gfalse\b/gc and return $nobool?0:json_bool(0); + die "invalid JSON value at offset ".pos($$val); +} + +my %json_unesc; BEGIN {%json_unesc=( + '\\' => "\\", + '"' => '"', + 'b' => "\b", + 't' => "\t", + 'n' => "\n", + 'f' => "\f", + 'r' => "\r" +)} + +sub _from_json_str { + my ($val, $enc) = @_; + my $opos = pos($$val); + $$val =~ /\G\042((?:[^\\\042]|\\.)*)\042/gsc and + return _from_json_strval($1, $opos+1, $enc); + die "invalid JSON string starting at offset $opos"; +} + +sub _from_json_strval { + my ($val, $pos, $enc) = @_; + Encode::is_utf8($val) || utf8::decode($val) or + die "invalid UTF-8 string starting at offset $pos"; + $val =~ s{\\([\\\042btnfr]|u[0-9a-fA-F]{4})}{ + substr($1,0,1) eq "u" ? &{sub{ + my $c = hex(substr($1,1,4)); + 0xD800 <= $c && $c <= 0xDFFF ? + "\\" . $1 : + chr(hex(substr($1,1,4))) + }} : $json_unesc{$1} + }goxe; + $val =~ s{\\u([Dd][89AaBb][0-9a-fA-F]{2})\\u([Dd][CcDdEeFf][0-9a-fA-F]{2})}{ + chr(( ((hex($1)&0x03FF)<<10) | (hex($2)&0x03FF) ) + 0x10000) + }goxe; + !Encode::is_utf8($val) || utf8::encode($val) if $enc; + return $val; +} + +sub _from_json_array { + my ($val, $l, $nobool, $enc) = @_; + my @a = (); + $$val =~ /\G\[/gc or die "expected '[' at offset ".pos($$val); + my $wantcomma = 0; + while (pos($$val) < $l && substr($$val, pos($$val), 1) ne "]") { + $$val =~ /\G\s+/gc and next; + !$wantcomma && substr($$val, pos($$val), 1) eq "," and + die "unexpected comma (,) in JSON array at offset ".pos($$val); + $wantcomma && !($$val =~ /\G,/gc) and + die "expected comma (,) or right-bracket (]) in JSON array at offset ".pos($$val); + push(@a, _from_json_value($val, $l, $nobool, $enc)); + $wantcomma = 1; + } + $$val =~ /\G\]/gc or die "expected ']' at offset ".pos($$val); + return \@a; +} + +sub _from_json_hash { + my ($val, $l, $nobool, $enc) = @_; + my %h = (); + $$val =~ /\G\{/gc or die "expected '{' at offset ".pos($$val); + my $wantc = ""; + my $k = undef; + while (pos($$val) < $l && substr($$val, pos($$val), 1) ne "}") { + $$val =~ /\G\s+/gc and next; + !$wantc && index(":,", substr($$val, pos($$val), 1)) >= 0 and + die "unexpected colon (:) or comma (,) in JSON hash at offset ".pos($$val); + $wantc eq ":" && !($$val =~ /\G:/gc) and + die "expected colon (:) in JSON hash at offset ".pos($$val); + $wantc eq "," && !($$val =~ /\G,/gc) and + die "expected comma (,) or right-brace (}) in JSON hash at offset ".pos($$val); + $wantc and $$val =~ /\G\s+/gc; + $wantc eq "," and $wantc = ""; + !$wantc && substr($$val, pos($$val), 1) ne '"' and + die "expected double-quote (\") in JSON hash at offset ".pos($$val); + !$wantc and do { + $k = _from_json_str($val, $enc); + $wantc = ":"; + next; + }; + $h{$k} = _from_json_value($val, $l, $nobool, $enc); + $wantc = ","; + } + $wantc ne ":" or die "expected ':' at offset ".pos($$val); + $$val =~ /\G\}/gc or die "expected '}' at offset ".pos($$val); + return \%h; +} + 1; -- 2.11.4.GIT