Add ssdiff to examples
[Spreadsheet-Read.git] / examples / ss-dups-tk.pl
1 #!/pro/bin/perl
2
3 # ss-dup-tk.pl: Find dups in spreadsheet
4 # (m)'09 [23-01-2009] Copyright H.M.Brand 2005-2014
5
6 use strict;
7 use warnings;
8
9 sub usage
10 {
11 my $err = shift and select STDERR;
12 print
13 "usage: $0 [-t] [-S <sheets>] [-R <rows>] [-C columns] [-F <fields>]\n",
14 "\t-t Only check on true values\n",
15 "\t-S sheets Check sheet(s). Defaul = 1, 1,3-5,all\n",
16 "\t-R rows Check row(s). Defaul = all, 6,19-66\n",
17 "\t-C columns Check column(s). Defaul = all, 2,5-9\n",
18 "\t-F fields Check field(s). Defaul = all, A1,A2,B15,C23\n";
19 exit $err;
20 } # usage
21
22 use Spreadsheet::Read;
23
24 use Getopt::Long qw(:config bundling nopermute noignorecase);
25 my $opt_v = 0;
26 my $opt_t = 0; # Only check on true values
27 my @opt_S; # Sheets to print
28 my @opt_R; # Rows to print
29 my @opt_C; # Columns to print
30 my @opt_F;
31 GetOptions (
32 "help|?" => sub { usage (0); },
33
34 "S|sheets=s" => \@opt_S,
35 "R|rows=s" => \@opt_R,
36 "C|columns=s" => \@opt_C,
37 "F|fields=s" => \@opt_F,
38
39 "t|true" => \$opt_t,
40
41 "v|verbose:1" => \$opt_v,
42 ) or usage (1);
43
44 @opt_S or @opt_S = (1);
45
46
47 use Tk;
48 use Tk::ROText;
49
50 my $file = shift || (sort { -M $b <=> -M $a } glob "*.xls")[0];
51 my ($mw, $is, $ss, $dt) = (MainWindow->new, "1.0");
52
53 sub ReadFile
54 {
55 $file or return;
56
57 $dt->delete ("1.0", "end");
58 unless ($ss = ReadData ($file)) {
59 $dt->insert ("end", "Cannot read $file as spreadsheet\n");
60 return;
61 }
62
63 my @ss = map { qq{"$ss->[$_]{label}"} } 1 .. $ss->[0]{sheets};
64
65 my @finfo = (
66 "File: $file", ( map {
67 "Sheet $_: '$ss->[$_]{label}'\t($ss->[$_]{maxcol} x $ss->[$_]{maxrow})"
68 } 1 .. $ss->[0]{sheets} ),
69 "==============================================================");
70 $dt->insert ("end", join "\n", @finfo, "");
71 $is = (@finfo + 1).".0";
72 return $ss;
73 } # ReadFile
74
75 my $tf = $mw->Frame ()->pack (qw( -side top -anchor nw -expand 1 -fill both ));
76 $tf->Entry (
77 -textvariable => \$file,
78 -width => 40,
79 -vcmd => \&ReadFile,
80 )->pack (qw(-side left -expand 1 -fill both));
81
82 my %ftyp;
83 for ([ xls => [ "Excel Files", [qw( .xls .XLS )] ] ],
84 [ xlsx => [ "Excel Files", [qw( .xlsx .XLSX )] ] ],
85 [ sxc => [ "OpenOffice Files", [qw( .sxc .SXC )] ] ],
86 [ ods => [ "OpenOffice Files", [qw( .ods .ODS )] ] ],
87 [ csv => [ "CSV Files", [qw( .csv .CSV )] ] ],
88 ) {
89 my ($ft, $r) = @$_;
90 Spreadsheet::Read::parses ($ft) or next;
91 push @{$ftyp{$r->[0]}}, @{$r->[1]};
92 push @{$ftyp{"All spreadsheet types"}}, @{$r->[1]};
93 }
94 $tf->Button (
95 -text => "Select file",
96 -command => sub {
97 $ss = undef;
98 $file = $mw->getOpenFile (
99 -filetypes => [ ( map { [ $_, $ftyp{$_} ] } sort keys %ftyp ),
100 [ "All files", "*" ],
101 ],
102 );
103 ReadFile ();
104 },
105 )->pack (qw(-side left -expand 1 -fill both));
106 $tf->Button (
107 -text => "Detect",
108 -command => \&Detect,
109 )->pack (qw(-side left -expand 1 -fill both));
110 $tf->Button (
111 -text => "Show",
112 -command => \&Show,
113 )->pack (qw(-side left -expand 1 -fill both));
114 $tf->Button (
115 -text => "Exit",
116 -command => \&exit,
117 )->pack (qw(-side left -expand 1 -fill both));
118
119 my $mf = $mw->Frame ()->pack (qw( -side top -anchor nw -expand 1 -fill both ));
120 my $sw = $mf->Scrolled ("ROText",
121 -scrollbars => "osoe",
122 -height => 40,
123 -width => 85,
124 -foreground => "Black",
125 -background => "White",
126 -highlightthickness => 0,
127 -setgrid => 1)->pack (qw(-expand 1 -fill both));
128 $dt = $sw->Subwidget ("scrolled");
129 #$sw->Subwidget ("xscrollbar")->packForget;
130 $dt->configure (
131 -wrap => "none",
132 -font => "mono 12",
133 );
134
135 my $bf = $mw->Frame ()->pack (qw( -side top -anchor nw -expand 1 -fill both ));
136 $bf->Checkbutton (
137 -variable => \$opt_t,
138 -text => "True values only",
139 )->pack (qw(-side left -expand 1 -fill both));
140 { my $opt_S = @opt_S ? join ",", @opt_S : 1;
141 $bf->Label (
142 -text => "Sheet(s)",
143 )->pack (qw(-side left -expand 1 -fill both));
144 $bf->Entry (
145 -textvariable => \$opt_S,
146 -width => 10,
147 -validate => "focusout",
148 -vcmd => sub {
149 @opt_S = grep m/\S/, split m/\s*,\s*/ => $opt_S;
150 1;
151 },
152 )->pack (qw(-side left -expand 1 -fill both));
153 }
154 { my $opt_R = join ",", @opt_R;
155 $bf->Label (
156 -text => "Rows(s)",
157 )->pack (qw(-side left -expand 1 -fill both));
158 $bf->Entry (
159 -textvariable => \$opt_R,
160 -width => 10,
161 -validate => "focusout",
162 -vcmd => sub {
163 @opt_R = grep m/\S/, split m/\s*,\s*/ => $opt_R;
164 1;
165 },
166 )->pack (qw(-side left -expand 1 -fill both));
167 }
168 { my $opt_C = join ",", @opt_C;
169 $bf->Label (
170 -text => "Columns(s)",
171 )->pack (qw(-side left -expand 1 -fill both));
172 $bf->Entry (
173 -textvariable => \$opt_C,
174 -width => 10,
175 -validate => "focusout",
176 -vcmd => sub {
177 @opt_C = grep m/\S/, split m/\s*,\s*/ => $opt_C;
178 1;
179 },
180 )->pack (qw(-side left -expand 1 -fill both));
181 }
182
183 sub ranges (@)
184 {
185 my @g;
186 foreach my $arg (@_) {
187 for (split m/,/, $arg) {
188 if (m/^(\w+)\.\.(\w+)$/) {
189 my ($s, $e) = ($1, $2);
190 $s =~ m/^[1-9]\d*$/ or ($s, $e) = (qq("$s"), qq("$e"));
191 eval "push \@g, $s .. $e";
192 }
193 else {
194 push @g, $_;
195 }
196 }
197 }
198 $opt_v and print STDERR "( @g )\n";
199 @g;
200 } # ranges
201
202 sub Detect
203 {
204 $ss or ReadFile ();
205
206 $dt->delete ($is, "end");
207 $dt->insert ("end", join "\n", "",
208 "Shts: @opt_S",
209 "Rows: @opt_R",
210 "Cols: @opt_C",
211 "--------------------------------------------------------------",
212 "");
213 my %done;
214 my @S = $opt_S[0] eq "all" ? (1 .. $ss->[0]{sheets}) : ranges (@opt_S);
215 my @R = ranges (@opt_R);
216 my @C = ranges (@opt_C);
217 my %f = map { uc $_ => 1 } ("@opt_F" =~ m/(\b[A-Z]\d+\b)/ig);
218
219 foreach my $s (@S) {
220 my $xls = $ss->[$s] or die "Cannot read sheet $s\n";
221
222 my @r = @R ? @R : (1 .. $xls->{maxrow});
223 my @c = @C ? @C : (1 .. $xls->{maxcol});
224
225 foreach my $r (@r) {
226 foreach my $c (@c) {
227 defined $xls->{cell}[$c][$r] or next;
228 my $v = uc $xls->{cell}[$c][$r];
229 my $cell = cr2cell ($c, $r);
230 @S > 1 and $cell = $xls->{label} . "[$cell]";
231
232 $opt_t && !$v and next;
233 @opt_F && !exists $f{$cell} and next;
234
235 if (exists $done{$v}) {
236 $dt->insert ("end", sprintf "Cell %-5s is dup of %-5s '%s'\n", $cell, $done{$v}, $v);
237 next;
238 }
239 $done{$v} = $cell;
240 }
241 }
242 }
243 } # Detect
244
245 sub Show
246 {
247 $ss or ReadFile ();
248
249 $dt->delete ($is, "end");
250 $dt->insert ("end", join "\n", "",
251 "Shts: @opt_S",
252 "Rows: @opt_R",
253 "Cols: @opt_C");
254 my @S = $opt_S[0] eq "all" ? (1 .. $ss->[0]{sheets}) : ranges (@opt_S);
255 my @R = ranges (@opt_R);
256 my @C = ranges (@opt_C);
257 my %f = map { uc $_ => 1 } ("@opt_F" =~ m/(\b[A-Z]\d+\b)/ig);
258
259 foreach my $s (@S) {
260 my $xls = $ss->[$s] or die "Cannot read sheet $s\n";
261
262 $dt->insert ("end",
263 "\n--------------------------------------------------------------".
264 "\nSheet $s: '$xls->{label}'\t($xls->{maxcol} x $xls->{maxrow})\n");
265
266 my @r = @R ? @R : (1 .. $xls->{maxrow});
267 my @c = @C ? @C : (1 .. $xls->{maxcol});
268
269 $dt->insert ("end", " |");
270 for (@c) {
271 (my $ch = cr2cell ($_, 1)) =~ s/1$//;
272 $dt->insert ("end", sprintf "%11s |", $ch);
273 }
274 $dt->insert ("end", "\n-----+");
275 $dt->insert ("end", "------------+") for @c;
276 foreach my $r (@r) {
277 $dt->insert ("end", sprintf "\n%4d |", $r);
278 foreach my $c (@c) {
279 my $cell = cr2cell ($c, $r);
280 my $v = defined $xls->{cell}[$c][$r]
281 ? $xls->{$cell}
282 : "--";
283 length ($v) < 12 and substr $v, 0, 0, " " x (12 - length $v);
284 $dt->insert ("end", substr ($v, 0, 12). "|");
285 }
286 }
287 }
288 } # Show
289
290 MainLoop;