repo-parse: only increase counter on matches
[aurutils.git] / lib / aur-repo-parse
blob71f5009de4c24c87f1676b9302349193f269a547
1 #!/usr/bin/env perl
2 use strict;
3 use warnings;
4 use v5.20;
6 use open ":std", ":encoding(UTF-8)";
7 use Cwd 'abs_path';
8 use File::Basename;
10 use AUR::Json qw(write_json);
11 my $argv0 = 'repo-parse';
13 # Attributes which (where applicable) match AurJson, such that `aur-repo-parse --json`
14 # can be piped to `aur-format`.
15 my %repo_add_attributes = (
16 'ARCH' => 'Arch', 'BASE' => 'PackageBase', 'BUILDDATE' => 'BuildDate',
17 'CONFLICTS' => 'Conflicts', 'CHECKDEPENDS' => 'CheckDepends', 'CSIZE' => 'CSize',
18 'DEPENDS' => 'Depends', 'DESC' => 'Description', 'FILENAME' => 'FileName',
19 'ISIZE' => 'ISize', 'LICENSE' => 'License', 'MAKEDEPENDS' => 'MakeDepends',
20 'MD5SUM' => 'Md5Sum', 'NAME' => 'Name', 'OPTDEPENDS' => 'OptDepends',
21 'PACKAGER' => 'Packager', 'PROVIDES' => 'Provides', 'REPLACES' => 'Replaces',
22 'SHA256SUM' => 'Sha256Sum', 'URL' => 'URL', 'VERSION' => 'Version',
23 'PGPSIG' => 'PgpSig', 'GROUPS' => 'Groups', 'FILES' => 'Files'
26 my %repo_add_array = (
27 'GROUPS' => 1, 'LICENSE' => 1, 'REPLACES' => 1, 'CONFLICTS' => 1, 'PROVIDES' => 1,
28 'DEPENDS' => 1, 'OPTDEPENDS' => 1, 'MAKEDEPENDS' => 1, 'CHECKDEPENDS' => 1, 'FILES' => 1
31 # md5sum and sha256sum are numeric values, but too large to be represented in int32/64.
32 my %repo_add_numeric = (
33 'BUILDDATE' => 1, 'CSIZE' => 1, 'ISIZE' => 1
37 sub entry_search {
38 my ($expr, $entry_data) = @_;
40 if (length($expr) and defined($entry_data)) {
41 # Search entry-by-entry on arrays
42 if (ref($entry_data) eq 'ARRAY') {
43 return grep(/$expr/, @{$entry_data});
44 } else {
45 return $entry_data =~ /$expr/;
47 } else {
48 return defined($entry_data);
52 sub parse_db {
53 my ($fh, $db_path, $db_name, $header, $handler, $search_expr, $search_label, @varargs) = @_;
54 my $count = 0;
55 my ($entry, $filename, $attr, $attr_label);
57 while (my $row = <$fh>) {
58 chomp($row);
60 if ($row =~ /^%\Q$header\E%$/) {
61 $filename = readline($fh);
62 chomp($filename);
64 # Evaluate condition on previous entry and run handler
65 if ($count > 0) {
66 if (entry_search($search_expr, $entry->{$search_label})) {
67 $handler->($entry, $count++, 0, @varargs);
69 } else {
70 $count++;
72 # New entry in the database (hashref)
73 %{$entry} = ();
74 $entry->{'DBPath'} = $db_path;
75 $entry->{'Repository'} = $db_name;
76 $entry->{$repo_add_attributes{$header}} = $filename;
78 elsif ($row =~ /^%.+%$/) {
79 if (not length($filename)) {
80 die "$argv0: attribute '$header' not set";
82 $attr = substr($row, 1, -1);
83 $attr_label = $repo_add_attributes{$attr};
85 if (not defined $attr_label) {
86 warn "$argv0: unknown attribute '$attr'";
87 $attr_label = ucfirst(lc($attr));
90 elsif ($row eq "") {
91 next;
93 else {
94 die unless length($attr) and length($attr_label);
96 if (defined($repo_add_numeric{$attr})) {
97 $entry->{$attr_label} = $row + 0; # integer type
98 } elsif (defined($repo_add_array{$attr})) {
99 push(@{$entry->{$attr_label}}, $row); # array type
100 } else {
101 $entry->{$attr_label} = $row; # string type
105 # Process last entry
106 if ($count > 0) {
107 if (entry_search($search_expr, $entry->{$search_label})) {
108 $handler->($entry, $count, 1, @varargs);
109 } else {
110 # Run handler with empty input to ensure correct termination for --json (#1120)
111 $handler->(undef, $count, 1, @varargs);
114 return $count;
117 sub repo_json {
118 my ($pkg, $count, $last) = @_;
120 if (defined $pkg) {
121 print '[' if $count == 1;
122 print ',' if $count > 1;
124 my $json_text = write_json($pkg);
125 print $json_text;
127 print "]\n" if $last == 1;
130 sub repo_jsonl {
131 my ($pkg, undef, undef) = @_;
132 return if not defined $pkg;
134 my $json_text = write_json($pkg);
135 print $json_text . "\n";
138 sub repo_list {
139 my ($pkg, undef, undef, $delim, $quiet) = @_;
140 return if not defined $pkg;
142 my $name = $pkg->{'Name'};
143 my $pver = $pkg->{'Version'};
145 if ($quiet) {
146 say $name;
147 } else {
148 say join($delim, $name, $pver);
152 sub repo_table {
153 my ($pkg, undef, undef, $delim) = @_;
154 return if not defined $pkg;
156 my $name = $pkg->{'Name'};
157 my $base = $pkg->{'PackageBase'};
158 my $pver = $pkg->{'Version'};
160 say join($delim, $name, $name, $base, $pver, 'Self');
162 for my $key ('Depends', 'MakeDepends', 'CheckDepends') {
163 if (ref($pkg->{$key}) eq 'ARRAY') {
164 map { say join($delim, $name, $_, $base, $pver, $key) } @{$pkg->{$key}};
169 sub repo_attr {
170 my ($pkg, undef, undef, $attr) = @_;
171 return if not defined $pkg;
173 my $value = $pkg->{$repo_add_attributes{$attr}};
175 if (defined($value) and ref($value) eq 'ARRAY') {
176 say join("\n", @{$value});
177 } elsif (defined($value)) {
178 say $value;
182 unless (caller) {
183 use Getopt::Long;
184 my $opt_json = 0;
185 my $opt_jsonl = 0;
186 my $opt_list = 0;
187 my $opt_list_attr = 0;
188 my $opt_table = 0;
189 my $opt_quiet = 0;
190 my $opt_delim;
191 my $opt_attr = "";
192 my @opt_db_path;
193 my $opt_stdin = 0;
194 my $opt_search = "";
195 my $opt_search_by = 'Name';
197 # optional arguments
198 GetOptions('J|json' => \$opt_json,
199 'jsonl' => \$opt_jsonl,
200 'F|attr=s' => \$opt_attr,
201 'l|list' => \$opt_list,
202 'q|quiet' => \$opt_quiet,
203 't|table' => \$opt_table,
204 'd|delim' => \$opt_delim,
205 'list-attr' => \$opt_list_attr,
206 'p|path=s' => \@opt_db_path,
207 's|search=s' => \$opt_search,
208 'search-by=s' => \$opt_search_by)
209 or exit(1);
211 if (scalar(@ARGV) > 0 and ($ARGV[0] eq "-" or $ARGV[0] eq "/dev/stdin")) {
212 $opt_stdin = 1;
214 elsif ($opt_list_attr) {
215 say(join("\n", sort(keys(%repo_add_attributes))));
216 exit(0);
218 elsif (scalar(@opt_db_path) < 1 and not $opt_stdin) {
219 say STDERR $argv0 . ': repository path must be specified';
220 exit(1);
223 # Callback function run on each entry in the database
224 my ($handler, @varargs);
226 if ($opt_json) {
227 $handler = \&repo_json;
229 elsif ($opt_jsonl) {
230 $handler = \&repo_jsonl;
232 elsif (length($opt_attr) and defined($repo_add_attributes{$opt_attr})) {
233 $handler = \&repo_attr;
234 @varargs = $opt_attr;
236 elsif (length($opt_attr)) {
237 say STDERR "$argv0: unknown attribute '$opt_attr'";
238 exit(2);
240 elsif ($opt_list) {
241 $handler = \&repo_list;
242 @varargs = (length($opt_delim) ? $opt_delim : "\t", $opt_quiet);
244 elsif ($opt_table) {
245 $handler = \&repo_table;
246 @varargs = length($opt_delim) ? $opt_delim : "\t";
248 else {
249 say STDERR $argv0 . ': no mode specified';
250 exit(1);
253 # Verify search field
254 my $opt_search_attr = $repo_add_attributes{uc($opt_search_by)};
256 if (not defined $opt_search_attr) {
257 say STDERR "$argv0: unknown attribute '$opt_search_by'";
258 exit(2);
260 my @parse_db_args = ($handler, $opt_search, $opt_search_attr, @varargs);
262 # Take input from stdin instead of a pacman database
263 if ($opt_stdin) {
264 parse_db(*STDIN, "/dev/stdin", 'local', 'FILENAME', @parse_db_args);
265 exit(0);
268 # bsdtar(1) does not support extracting multiple files in a single invocation,
269 # so fork a new process for each specified path.
270 for my $db_path (@opt_db_path) {
271 my $db_abs_path = abs_path($db_path);
272 my $db_name = basename($db_path);
274 if (not length($db_abs_path)) {
275 say STDERR $argv0 . ": file path '$db_path' not found";
276 exit(2);
279 # repo-add(8) only accepts *.db or *.db.tar* extensions
280 if ($db_name =~ /\.(db|files)(\.tar(\.\w+)?)?$/g) {
281 $db_name = substr $db_name, 0, $-[0];
282 } else {
283 say STDERR "$argv0: $db_name does not have a valid database archive extension";
284 exit(1);
287 # When parsing the database, do not require a full extraction to either memory or disk
288 # by reading `tar` output line-by-line. It is not strictly necessary to depend on
289 # attribute order (i.e. %FILENAME% occuring in first place) while doing so; however,
290 # the `--verbose` flag printing file names has different behavior for different `tar`
291 # versions. Specifically, `bsdtar -xv` does not add a newline after the file path,
292 # while `tar -xv` does.
293 my @extract = ('bsdtar', '-Oxf', $db_abs_path);
294 my $child_pid = open(my $fh, "-|", @extract) or die $!;
296 if ($child_pid) { # parent process
297 my $count = parse_db($fh, $db_abs_path, $db_name, 'FILENAME', @parse_db_args);
299 waitpid($child_pid, 0);
301 exit(2) if $?;
305 # vim: set et sw=4 sts=4 ft=perl: