mktar: Use `wc` instead of `du` in summary message
[sunny256-utils.git] / ga-sjekk
blob409cb41bb1db5c0f5f8e1c4fc80514f0fbfbf122
1 #!/usr/bin/env bash
3 #=======================================================================
4 # ga-sjekk
5 # File ID: 4d412868-f13b-11e4-be3f-000df06acc56
7 # Check if the specified files are already archived in git-annex. Files
8 # that are already archived are moved to "$founddir/$dir/".
10 # Author: Øyvind A. Holm <sunny@sunbase.org>
11 # License: GNU General Public License version 2 or later.
12 #=======================================================================
14 progname=ga-sjekk
15 VERSION=0.5.0
17 ARGS="$(getopt -o "\
23 " -l "\
24 help,\
25 dry-run,\
26 quiet,\
27 update,\
28 verbose,\
29 version,\
30 " -n "$progname" -- "$@")"
31 test "$?" = "0" || exit 1
32 eval set -- "$ARGS"
34 opt_help=0
35 opt_quiet=0
36 opt_update=0
37 opt_verbose=0
38 while :; do
39 case "$1" in
40 -h|--help) opt_help=1; shift ;;
41 -n|--dry-run) opt_dry_run=1; shift ;;
42 -q|--quiet) opt_quiet=$(($opt_quiet + 1)); shift ;;
43 -u|--update) opt_update=1; shift ;;
44 -v|--verbose) opt_verbose=$(($opt_verbose + 1)); shift ;;
45 --version) echo $progname $VERSION; exit 0 ;;
46 --) shift; break ;;
47 *) echo $progname: Internal error >&2; exit 1 ;;
48 esac
49 done
50 opt_verbose=$(($opt_verbose - $opt_quiet))
52 founddir=".ga-found"
54 if test "$opt_help" = "1"; then
55 test $opt_verbose -gt 0 && { echo; echo $progname $VERSION; }
56 cat <<END
58 Check if the specified files are already archived in git-annex. The file
59 information is stored in the directory defined in the GASJEKK_DIR
60 environment variable.
62 Files that already exist in any of the annexes are moved to "$founddir/"
63 while keeping the original directory structure. Files with size 0 are
64 ignored.
66 Usage: $progname [options] FILE [FILE [...]]
68 Options:
70 -n, --dry-run
71 Don't move the files to $founddir/, only show if they exist in any
72 repos.
73 -h, --help
74 Show this help.
75 -q, --quiet
76 Be more quiet. Can be repeated to increase silence.
77 -u, --update
78 Update the SQLite database in \$GASJEKK_DIR. Use the directory name
79 as name of the repo. No action is taken if the Git config variable
80 $progname.disable is set to "true".
81 -v, --verbose
82 Increase level of verbosity. Can be repeated. One -v lists file
83 information of every file on the command line if they exist in the
84 annex.
85 --version
86 Print version information.
88 END
89 exit 0
92 msg() {
93 test $1 -gt $opt_verbose && return;
94 shift
95 echo "$progname: $*" >&2
98 err() {
99 echo "$progname: $*" >&2
100 exit 1
103 is_annex() {
104 test -z "$(git config --get annex.uuid)" && return 1 || return 0
107 dbdir="$GASJEKK_DIR"
108 test -z "$dbdir" && err GASJEKK_DIR environment variable not defined
110 db="$dbdir/ga-sjekk.sqlite"
112 if test "$opt_update" = "1"; then
113 is_annex || err Current dir is not an annex, annex.uuid is empty
114 if test "$(git config --get $progname.disable)" = "true"; then
115 echo "$progname: $progname.disable = true, ignoring repo" >&2
116 exit 0
118 if test -e "$db"; then
119 test -w "$db" || err $db: Database is not writable
121 toplevel="$(git rev-parse --show-toplevel)"
122 test -z "$toplevel" && err Could not find top of current Git repo
123 cd "$toplevel" || err $toplevel: Could not chdir to top of current Git repo
124 repo=$(basename $(pwd -P))
125 test -z "$repo" && err Could not find repo name, basename returned empty
126 msg 0 Import from repo \"$repo\"
128 json="$dbdir/ga-sjekk.$repo.mjson"
129 msg 1 Import JSON from git annex find
130 git annex find --json --include="*" >$json ||
131 err git annex find in $(pwd) failed
132 msg 1 Update $db
133 cat <<SQL_END | sqlite3 "$db"
134 BEGIN;
135 CREATE TEMPORARY TABLE incoming (
136 j JSON
137 UNIQUE
138 ON CONFLICT IGNORE
140 CREATE TABLE IF NOT EXISTS json (
141 repo TEXT,
142 j JSON
143 UNIQUE
144 ON CONFLICT IGNORE
146 .separator "\\t"
147 .import $json incoming
148 CREATE TABLE IF NOT EXISTS files (
149 repo TEXT,
150 key TEXT,
151 size INTEGER,
152 file TEXT,
153 UNIQUE (repo, key, size, file)
154 ON CONFLICT IGNORE
156 DELETE FROM files WHERE repo = '$repo';
157 INSERT INTO json (repo, j)
158 SELECT '$repo', j FROM incoming;
159 INSERT INTO files (repo, key, size, file)
160 SELECT
161 '$repo',
162 json_extract(j, '\$.key'),
163 json_extract(j, '\$.bytesize') AS size,
164 json_extract(j, '\$.file')
165 FROM incoming;
166 DROP TABLE incoming;
167 DELETE FROM files WHERE size = 0;
168 CREATE INDEX IF NOT EXISTS idx_files_key ON files (key);
169 CREATE INDEX IF NOT EXISTS idx_files_size ON files (size);
170 COMMIT;
171 SQL_END
172 rm -f "$json"
174 exit
177 test -e "$db" || err $db: Database not found
179 for f in "$@"; do
180 test -d "$f" && continue
181 test -e "$f" || continue
182 size=$(wc -c "$f" | awk '{print $1}')
183 test -z "$size" && continue
184 numfound=$(sqlite3 "$db" "SELECT count(*) FROM files WHERE size = $size;")
185 test -z "$numfound" && continue
186 test $numfound -eq 0 && continue
188 key=$(ga-key -q "$f")
189 test -z "$key" && continue
190 found_in="$(
191 sqlite3 "$db" "
192 SELECT distinct repo FROM files
193 WHERE key = '$key'
194 ORDER BY repo;"
196 if test -n "$found_in"; then
197 echo $f: Found in $found_in
198 if test $opt_verbose -ge 1; then
199 sqlite3 "$db" ".mode tabs" \
200 "SELECT '', repo, file FROM files
201 WHERE key = '$key'
202 ORDER BY repo, file;"
204 if test "$opt_dry_run" != "1"; then
205 dir=$(echo $found_in | tr ' ' +)
206 dest="$founddir/$dir/$(dirname "$f")"
207 mkdir -p "$dest"
208 mv -i "$f" "$dest"
211 done