installed_progs.t: Python checks stdout too, 150 ok
[sunny256-utils.git] / src / fldb / fldb
blobf0293ce82c8bab242bb5215a0d064a9379a3d8b4
1 #!/usr/bin/env perl
3 #=======================================================================
4 # fldb
5 # File ID: 2285858a-f9f1-11dd-8b2b-000475e441b9
6 # File Library Database
8 # Character set: UTF-8
9 # ©opyleft 2008– Øyvind A. Holm <sunny@sunbase.org>
10 # License: GNU General Public License version 2 or later, see end of
11 # file for legal stuff.
12 #=======================================================================
14 use strict;
15 use warnings;
16 use Getopt::Long;
17 use DBI;
19 use lib "$ENV{'HOME'}/bin/src/fldb";
20 use FLDButf;
21 use FLDBsum;
22 use FLDBdebug;
23 use FLDBpg;
25 $| = 1;
27 our $Debug = 0;
28 my $STD_DATABASE = "fldb";
30 our %Opt = (
32 'add' => 0,
33 'crc32' => 0,
34 'database' => $STD_DATABASE,
35 'debug' => 0,
36 'description' => "",
37 'files-from' => "",
38 'help' => 0,
39 'json' => 1,
40 'long' => 0,
41 'postgres' => 0,
42 'quiet' => 0,
43 'verbose' => 0,
44 'version' => 0,
45 'xml' => 0,
46 'zero' => 0,
50 our $progname = $0;
51 $progname =~ s/^.*\/(.*?)$/$1/;
52 our $VERSION = "0.3.0";
54 Getopt::Long::Configure("bundling");
55 GetOptions(
57 "add|a" => \$Opt{'add'},
58 "crc32" => \$Opt{'crc32'},
59 "database|D=s" => \$Opt{'database'},
60 "debug" => \$Opt{'debug'},
61 "description|d=s" => \$Opt{'description'},
62 "files-from|f=s" => \$Opt{'files-from'},
63 "help|h" => \$Opt{'help'},
64 "json|j" => \$Opt{'json'},
65 "long|l" => \$Opt{'long'},
66 "postgres" => \$Opt{'postgres'},
67 "quiet|q+" => \$Opt{'quiet'},
68 "verbose|v+" => \$Opt{'verbose'},
69 "version" => \$Opt{'version'},
70 "xml|x" => \$Opt{'xml'},
71 "zero|z" => \$Opt{'zero'},
73 ) || die("$progname: Option error. Use -h for help.\n");
75 $Opt{'debug'} && ($Debug = 1);
76 $Opt{'verbose'} -= $Opt{'quiet'};
77 ($Opt{'postgres'} || $Opt{'xml'}) && ($Opt{'json'} = 0);
78 ($Opt{'json'}+$Opt{'postgres'}+$Opt{'xml'} > 1) && die("$progname: Cannot mix --json, --postgres and --xml options\n");
79 $Opt{'help'} && usage(0);
80 if ($Opt{'version'}) {
81 print_version();
82 exit(0);
85 my $postgresql_database = $Opt{'database'};
86 my $postgresql_host="localhost";
87 my ($dbh, $sth);
88 chomp(my $Hostname = `/bin/hostname`); # FIXME
89 if (!valid_utf8($Hostname)) {
90 $Hostname = latin1_to_utf8($Hostname);
92 my $safe_hostname = safe_string($Hostname);
93 my $has_printed = 0;
94 my $use_stdin = ($Opt{'files-from'} eq '-') ? 1 : 0;
96 if ($Opt{'postgres'} && $Opt{'add'}) {
97 $dbh = DBI->connect("DBI:Pg:dbname=$postgresql_database;host=$postgresql_host")
98 or die("connect: På trynet: $!");
101 my $Sql; # How ironic.
103 $Opt{'zero'} && ($/ = "\x00");
105 $Opt{'json'} && print("{\n \"files\":[");
106 $Opt{'xml'} && print("<fldb>\n");
107 if (length($Opt{'files-from'})) {
108 D("Opt{files-from} = '$Opt{'files-from'}'");
109 if ($use_stdin || open(FP, "<", $Opt{'files-from'})) {
110 while (my $Filename = $use_stdin ? <STDIN> : <FP>) {
111 chomp($Filename);
112 process_file($Filename);
114 close(FP);
115 } else {
116 msg(-1, "$Opt{'files-from'}: Cannot open file for read: $!");
118 } else {
119 for (@ARGV) {
120 chomp;
121 process_file($_);
124 $Opt{'json'} && print("\n ]\n}\n");
125 $Opt{'xml'} && print("</fldb>\n");
127 exit 0;
129 sub process_file {
130 # {{{
131 my $Filename = shift;
132 D("process_file('$Filename')");
133 if (!-f $Filename) {
134 msg(0, "$Filename: Ignoring non-file");
135 return;
137 $Sql = add_entry($Filename);
138 if (defined($Sql)) {
139 if ($Opt{'add'}) {
140 $Opt{'verbose'} && print("$Filename\n");
141 $dbh->do($Sql) || msg(-1, "$Filename: Cannot INSERT");
142 } else {
143 $Opt{'json'} && $has_printed && print(',');
144 print($Sql);
145 $has_printed = 1;
148 # }}}
149 } # process_file()
151 sub add_entry {
152 # {{{
153 my $Filename = shift;
154 if ($Filename =~ /\0/) {
155 msg(-1, "$Filename: Ignoring filename containing zero byte. Did you forget the --zero option?");
156 return(undef);
158 my $safe_filename = safe_string($Filename);
159 D("add_entry(\"$Filename\")");
160 my $Retval = "";
161 my @stat_array = ();
162 if (@stat_array = stat($Filename)) {
163 # {{{
164 my ($Dev, $Inode, $Perm, $Nlinks, $Uid, $Gid, $Rdev, $Size,
165 $Atime, $Mtime, $Ctime, $Blksize, $Blocks) = @stat_array;
166 $Mtime = sec_to_string($Mtime);
167 $Ctime = sec_to_string($Ctime);
168 D("Perm før: '$Perm'");
169 $Perm = sprintf("%04o", $Perm & 07777);
170 D("Perm etter: '$Perm'");
171 my %Sum = checksum($Filename, $Opt{'crc32'});
172 if (scalar(%Sum)) {
173 # {{{
174 my $crc32_str;
175 if ($Opt{'xml'}){
176 $crc32_str = $Opt{'crc32'} ? "<crc32>$Sum{crc32}</crc32> " : "";
177 } elsif ($Opt{'json'}) {
178 $crc32_str = $Opt{'crc32'} ? "\"crc32\":\"$Sum{crc32}\"" : "";
179 } elsif ($Opt{'postgres'}) {
180 $crc32_str = $Opt{'crc32'} ? "'$Sum{crc32}'" : "NULL";
182 D("crc32_str = '$crc32_str'");
183 my $latin1_str;
184 if (valid_utf8($safe_filename)) {
185 $latin1_str = $Opt{'postgres'} ? "FALSE" : "";
186 } else {
187 if ($Opt{'xml'}) {
188 $latin1_str = '<latin1>1</latin1>';
189 } elsif ($Opt{'json'}) {
190 $latin1_str = '"latin1":"1"';
191 } elsif ($Opt{'postgres'}) {
192 $latin1_str = 'TRUE';
194 $safe_filename = latin1_to_utf8($safe_filename);
196 D("latin1_str = '$latin1_str'");
197 my $base_filename = $safe_filename;
198 $base_filename =~ s/^.*\/(.*?)$/$1/;
199 D("base_filename = '$base_filename'");
200 if ($Opt{'xml'}) {
201 my $descr_str = length($Opt{'description'})
202 ? "<descr>" . txt_to_xml($Opt{'description'}) . "</descr> "
203 : "";
204 D("descr_str = \"$descr_str\"");
205 if (!$Opt{'long'}) {
206 # {{{
207 $Retval = sprintf(
208 "<file> " .
209 "<size>%u</size> " .
210 "<sha256>%s</sha256> " .
211 "<sha1>%s</sha1> " .
212 "<gitblob>%s</gitblob> " .
213 "<md5>%s</md5> " .
214 "%s" . # $crc32_str
215 "<filename>%s</filename> " .
216 "<mtime>%s</mtime> " .
217 "<perm>%s</perm> " .
218 "%s" . # $descr_str
219 "%s" . # $latin_str
220 "</file>\n",
221 $Size,
222 $Sum{'sha256'},
223 $Sum{'sha1'},
224 $Sum{'gitblob'},
225 $Sum{'md5'},
226 $crc32_str,
227 $safe_filename,
228 $Mtime,
229 $Perm,
230 $descr_str,
231 $latin1_str,
233 # }}}
234 } else {
235 # {{{
236 $Retval = sprintf(
237 "<file> " .
238 "<size>%s</size> " .
239 "<sha256>%s</sha256> " .
240 "<sha1>%s</sha1> " .
241 "<gitblob>%s</gitblob> " .
242 "<md5>%s</md5> " .
243 "%s" . # $crc32_str
244 "<filename>%s</filename> " .
245 "<mtime>%s</mtime> " .
246 "<perm>%s</perm> " .
247 "%s" .
248 "<ctime>%s</ctime> " .
249 "<path>%s</path> " .
250 "<inode>%s</inode> " .
251 "<links>%s</links> " .
252 "<device>%s</device> " .
253 "<hostname>%s</hostname> " .
254 "<uid>%s</uid> " .
255 "<gid>%s</gid> " .
256 # "<lastver>%s</lastver> " .
257 # "<nextver>%s</nextver> " .
258 "%s" . # $latin1_str
259 "</file>\n",
260 $Size,
261 $Sum{sha256},
262 $Sum{sha1},
263 $Sum{gitblob},
264 $Sum{md5},
265 $crc32_str,
266 $base_filename,
267 $Mtime,
268 $Perm,
269 $descr_str,
270 $Ctime,
271 $safe_filename,
272 $Inode,
273 $Nlinks,
274 txt_to_xml($Dev),
275 $safe_hostname,
276 $Uid,
277 $Gid,
278 # "",
279 # "",
280 $latin1_str,
282 # }}}
284 } elsif ($Opt{'json'}) {
285 my @json = ();
286 my $descr_str = length($Opt{'description'})
287 ? '"descr":"' . txt_to_json($Opt{'description'}) . '"'
288 : "";
289 push(@json,
290 sprintf(
291 '"filename":"%s",' .
292 '"size":%u,' .
293 '"sha256":"%s",' .
294 '"sha1":"%s",' .
295 '"gitblob":"%s",' .
296 '"md5":"%s"',
297 $Opt{'long'}
298 ? $base_filename
299 : $safe_filename,
300 $Size,
301 $Sum{'sha256'},
302 $Sum{'sha1'},
303 $Sum{'gitblob'},
304 $Sum{'md5'},
307 length($crc32_str) && push(@json, $crc32_str);
308 push(@json,
309 sprintf(
310 '"mtime":"%s",' .
311 '"perm":"%s"',
312 $Mtime,
313 $Perm,
316 length($descr_str) && push(@json, $descr_str);
317 if ($Opt{'long'}) {
318 # {{{
319 push(@json,
320 sprintf(
321 '"ctime":"%s",' .
322 '"path":"%s",' .
323 '"inode":%u,' .
324 '"links":%u,' .
325 '"device":%u,' .
326 '"hostname":"%s",' .
327 '"uid":%u,' .
328 '"gid":%u',
329 $Ctime,
330 $safe_filename,
331 $Inode,
332 $Nlinks,
333 txt_to_json($Dev),
334 $safe_hostname,
335 $Uid,
336 $Gid,
339 # }}}
341 length($latin1_str) && push(@json, $latin1_str);
342 $Retval = "\n {" . join(',', @json) . "}";
343 } elsif ($Opt{'postgres'}) {
344 my $descr_str = length($Opt{'description'})
345 ? "E'" . safe_sql($Opt{'description'}) . "'"
346 : "NULL";
347 D("descr_str = \"$descr_str\"");
348 if (!$Opt{'long'}) {
349 # {{{
350 $Retval = sprintf(<<END,
351 INSERT INTO files (
352 sha256, sha1, gitblob, md5, crc32,
353 size, filename, mtime, perm,
354 descr,
355 latin1
356 ) VALUES (
357 '%s', '%s', '%s', '%s', %s,
358 %s, E'%s', '%s', '%s',
363 $Sum{sha256}, $Sum{sha1}, $Sum{gitblob}, $Sum{md5}, $crc32_str,
364 $Size, $base_filename, $Mtime, $Perm,
365 $descr_str,
366 $latin1_str,
368 # }}}
369 } else {
370 # {{{
371 $Retval = sprintf(<<END,
372 INSERT INTO files (
373 sha256, sha1, gitblob, md5, crc32,
374 size, filename, mtime, perm, descr, ctime,
375 path,
376 inode, links, device, hostname,
377 uid, gid,
378 lastver, nextver,
379 latin1
380 ) VALUES (
381 '%s', '%s', '%s', '%s', %s,
382 %s, E'%s', '%s', '%s', %s, '%s',
383 E'%s',
384 %s, %s, E'%s', E'%s',
385 %s, %s,
386 %s, %s,
390 $Sum{sha256}, $Sum{sha1}, $Sum{gitblob}, $Sum{md5}, $crc32_str,
391 $Size, $base_filename, $Mtime, $Perm, $descr_str, $Ctime,
392 $safe_filename,
393 $Inode, $Nlinks, safe_sql($Dev), $safe_hostname,
394 $Uid, $Gid,
395 'NULL', 'NULL',
396 $latin1_str,
398 # }}}
401 D("=== \$Retval \x7B\x7B\x7B\n$Retval=== \x7D\x7D\x7D");
402 # }}}
403 } else {
404 msg(-1, "$Filename: Cannot read file: $!");
405 $Retval = undef;
407 # }}}
408 } else {
409 msg(-1, "$Filename: Cannot stat file: $!");
410 $Retval = undef;
412 return($Retval);
413 # }}}
414 } # add_entry()
416 sub safe_string {
417 # {{{
418 my $Str = shift;
420 if ($Opt{'xml'}) {
421 $Str = txt_to_xml($Str);
422 } elsif ($Opt{'json'}) {
423 $Str = txt_to_json($Str);
424 } elsif ($Opt{'postgres'}) {
425 $Str = safe_sql($Str);
427 return($Str);
428 # }}}
429 } # safe_string()
431 sub txt_to_json {
432 # Convert plain text to JSON {{{
433 my $Txt = shift;
434 $Txt =~ s/\\/\\\\/gs;
435 $Txt =~ s/"/\\"/gs;
436 $Txt =~ s/\x08/\\b/gs;
437 $Txt =~ s/\x09/\\t/gs;
438 $Txt =~ s/\x0a/\\n/gs;
439 $Txt =~ s/\x0c/\\f/gs;
440 $Txt =~ s/\x0d/\\r/gs;
441 $Txt =~ s/([\x00-\x1f])/sprintf('\u%04X', ord($1))/gse;
442 return($Txt);
443 # }}}
444 } # txt_to_json()
446 sub txt_to_xml {
447 # Convert plain text to XML {{{
448 my $Txt = shift;
449 $Txt =~ s/&/&amp;/gs;
450 $Txt =~ s/</&lt;/gs;
451 $Txt =~ s/>/&gt;/gs;
452 return($Txt);
453 # }}}
454 } # txt_to_xml()
456 sub print_version {
457 # Print program version {{{
458 print("$progname $VERSION\n");
459 # }}}
460 } # print_version()
462 sub usage {
463 # Send the help message to stdout {{{
464 my $Retval = shift;
466 if ($Opt{'verbose'}) {
467 print("\n");
468 print_version();
470 print(<<END);
472 Usage: $progname [options] [file [files [...]]]
474 Options:
476 -a, --add
477 Add file information to database.
478 --crc32
479 Also calculate CRC32. Reads the whole file into memory, so it’s not
480 suitable for big files. Maybe fixed in newer Perl versions.
481 -d x, --description x
482 Use x as file description.
483 -D x, --database x
484 Use database x.
485 -f x, --files-from x
486 Read filenames from x. Use - (hyphen) to read list from stdin.
487 -h, --help
488 Show this help.
489 -j, --json
490 Generate JSON output. This is the default output format.
491 -l, --long
492 Use long format, include local information.
493 --postgres
494 Generate SQL for use with Postgres.
495 -q, --quiet
496 Be quiet, suppress messages. Can be repeated.
497 -v, --verbose
498 Increase level of verbosity. Can be repeated.
499 --version
500 Print version information.
501 -x, --xml
502 Use XML output.
503 -z, --zero
504 Filenames are separated by a zero byte (\\0x00) instead of newline
505 (\\n). This makes it possible to read files containing newlines.
506 --debug
507 Print debugging messages.
510 exit($Retval);
511 # }}}
512 } # usage()
514 sub sec_to_string {
515 # Convert seconds since 1970 to "yyyy-mm-ddThh:mm:ssZ" {{{
516 my ($Seconds) = shift;
517 ($Seconds =~ /^-?(\d*)(\.\d+)?$/) || return(undef);
519 my @TA = gmtime($Seconds);
520 my($DateString) = sprintf("%04u-%02u-%02uT%02u:%02u:%02uZ",
521 $TA[5]+1900, $TA[4]+1, $TA[3],
522 $TA[2], $TA[1], $TA[0]);
523 return($DateString);
524 # }}}
525 } # sec_to_string()
527 sub msg {
528 # Print a status message to stderr based on verbosity level {{{
529 my ($verbose_level, $Txt) = @_;
531 if ($Opt{'verbose'} >= $verbose_level) {
532 print(STDERR "$progname: $Txt\n");
534 # }}}
535 } # msg()
537 __END__
539 # Plain Old Documentation (POD) {{{
541 =pod
543 =head1 NAME
547 =head1 SYNOPSIS
549 [options] [file [files [...]]]
551 =head1 DESCRIPTION
555 =head1 OPTIONS
557 =over 4
559 =item B<-h>, B<--help>
561 Print a brief help summary.
563 =item B<-v>, B<--verbose>
565 Increase level of verbosity. Can be repeated.
567 =item B<--version>
569 Print version information.
571 =item B<--debug>
573 Print debugging messages.
575 =back
577 =head1 BUGS
581 =head1 AUTHOR
583 Made by Øyvind A. Holm S<E<lt>sunny@sunbase.orgE<gt>>.
585 =head1 COPYRIGHT
587 Copyleft © Øyvind A. Holm E<lt>sunny@sunbase.orgE<gt>
588 This is free software; see the file F<COPYING> for legalese stuff.
590 =head1 LICENCE
592 This program is free software: you can redistribute it and/or modify it
593 under the terms of the GNU General Public License as published by the
594 Free Software Foundation, either version 2 of the License, or (at your
595 option) any later version.
597 This program is distributed in the hope that it will be useful, but
598 WITHOUT ANY WARRANTY; without even the implied warranty of
599 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
600 See the GNU General Public License for more details.
602 You should have received a copy of the GNU General Public License along
603 with this program.
604 If not, see L<http://www.gnu.org/licenses/>.
606 =head1 SEE ALSO
608 =cut
610 # }}}
612 # vim: set fenc=UTF-8 ft=perl fdm=marker ts=4 sw=4 sts=4 et fo+=w :