2 * ssgrep.c: Search spreadsheets of selected strings
4 * Copyright (C) 2008 Jody Goldberg
5 * Copyright (C) 2008-2009 Morten Welinder (terra@gnome.org)
7 #include <gnumeric-config.h>
9 #include <libgnumeric.h>
10 #include <goffice/goffice.h>
11 #include <command-context-stderr.h>
12 #include <workbook-view.h>
14 #include <application.h>
16 #include <gnm-plugin.h>
22 #include <parse-util.h>
23 #include <sheet-object-cell-comment.h>
24 #include <gnumeric-conf.h>
26 #include <gsf/gsf-input-stdio.h>
27 #include <gsf/gsf-input-textline.h>
28 #include <glib/gi18n.h>
31 static gboolean ssgrep_locus_values
= TRUE
;
32 static gboolean ssgrep_locus_expressions
= TRUE
;
33 static gboolean ssgrep_locus_results
= FALSE
;
34 static gboolean ssgrep_locus_comments
= TRUE
;
35 static gboolean ssgrep_locus_scripts
= TRUE
;
36 static gboolean ssgrep_ignore_case
= FALSE
;
37 static gboolean ssgrep_match_words
= FALSE
;
38 static gboolean ssgrep_quiet
= FALSE
;
39 static gboolean ssgrep_count
= FALSE
;
40 static gboolean ssgrep_print_filenames
= (gboolean
)2;
41 static gboolean ssgrep_print_matching_filenames
= FALSE
;
42 static gboolean ssgrep_print_nonmatching_filenames
= FALSE
;
43 static gboolean ssgrep_print_locus
= FALSE
;
44 static gboolean ssgrep_print_type
= FALSE
;
45 static char *ssgrep_pattern
= NULL
;
46 static gboolean ssgrep_fixed_strings
= FALSE
;
47 static gboolean ssgrep_recalc
= FALSE
;
48 static gboolean ssgrep_invert_match
= FALSE
;
49 static gboolean ssgrep_string_table
= FALSE
;
51 static gboolean ssgrep_show_version
= FALSE
;
52 static char *ssgrep_pattern_file
= NULL
;
54 static gboolean ssgrep_error
= FALSE
;
55 static gboolean ssgrep_any_matches
= FALSE
;
57 static GOptionEntry
const ssgrep_options
[] = {
60 0, G_OPTION_ARG_NONE
, &ssgrep_count
,
61 N_("Only print a count of matches per file"),
65 "string-table-count", 'C',
66 0, G_OPTION_ARG_NONE
, &ssgrep_string_table
,
67 N_("Search only via the string table, display a count of the references."),
73 0, G_OPTION_ARG_STRING
, &ssgrep_pattern_file
,
74 N_("Get patterns from a file, one per line"),
80 0, G_OPTION_ARG_NONE
, &ssgrep_fixed_strings
,
81 N_("Pattern is a set of fixed strings"),
87 0, G_OPTION_ARG_NONE
, &ssgrep_print_filenames
,
88 N_("Print the filename for each match"),
93 "without-filename", 'h',
94 G_OPTION_FLAG_REVERSE
, G_OPTION_ARG_NONE
, &ssgrep_print_filenames
,
95 N_("Do not print the filename for each match"),
101 0, G_OPTION_ARG_NONE
, &ssgrep_ignore_case
,
102 N_("Ignore differences in letter case"),
107 "files-with-matches", 'l',
108 0, G_OPTION_ARG_NONE
, &ssgrep_print_matching_filenames
,
109 N_("Print filenames with matches"),
114 "files-without-matches", 'L',
115 0, G_OPTION_ARG_NONE
, &ssgrep_print_nonmatching_filenames
,
116 N_("Print filenames without matches"),
122 0, G_OPTION_ARG_NONE
, &ssgrep_print_locus
,
123 N_("Print the location of each match"),
129 0, G_OPTION_ARG_NONE
, &ssgrep_quiet
,
130 N_("Suppress all normal output"),
135 "search-results", 'R',
136 0, G_OPTION_ARG_NONE
, &ssgrep_locus_results
,
137 N_("Search results of expressions too"),
143 0, G_OPTION_ARG_NONE
, &ssgrep_print_type
,
144 N_("Print the location type of each match"),
150 0, G_OPTION_ARG_NONE
, &ssgrep_invert_match
,
151 N_("Search for cells that do not match"),
157 0, G_OPTION_ARG_NONE
, &ssgrep_show_version
,
158 N_("Display program version"),
164 0, G_OPTION_ARG_NONE
, &ssgrep_match_words
,
165 N_("Match only whole words"),
171 0, G_OPTION_ARG_NONE
, &ssgrep_recalc
,
172 N_("Recalculate all cells"),
176 /* ---------------------------------------- */
189 add_result (StringTableSearch
*state
, char const *clean
, unsigned int n
)
193 if (NULL
== state
->results
)
194 state
->results
= g_hash_table_new (g_str_hash
, g_str_equal
);
195 else if (NULL
!= (prev
= g_hash_table_lookup (state
->results
, clean
)))
196 n
+= GPOINTER_TO_UINT (prev
);
197 g_hash_table_replace (state
->results
, (gpointer
) clean
, GUINT_TO_POINTER (n
));
201 cb_check_strings (G_GNUC_UNUSED gpointer key
, gpointer str
, gpointer user_data
)
203 StringTableSearch
*state
= user_data
;
204 char *clean
= g_utf8_strdown (key
, -1);
205 char const *orig
= g_hash_table_lookup (state
->targets
, clean
);
207 add_result (state
, clean
, go_string_get_ref_count (str
));
212 cb_check_func (gpointer clean
, gpointer orig
, gpointer user_data
)
214 StringTableSearch
*state
= user_data
;
215 GnmFunc
*func
= gnm_func_lookup (clean
, state
->wb
);
216 if (func
&& gnm_func_get_in_use (func
))
217 add_result (state
, clean
, 1);
221 cb_find_target_in_module (gpointer clean
, gpointer orig
, gpointer user_data
)
223 StringTableSearch
*state
= user_data
;
225 char const *ptr
= state
->lc_code
;
227 while (NULL
!= (ptr
= strstr (ptr
, clean
))) {
233 add_result (state
, clean
, n
);
237 cb_check_module (gpointer name
, gpointer code
, gpointer user_data
)
239 StringTableSearch
*state
= user_data
;
240 state
->lc_code
= g_utf8_strdown (code
, -1);
241 g_hash_table_foreach (state
->targets
, &cb_find_target_in_module
, state
);
242 g_free ((gpointer
)state
->lc_code
);
243 state
->lc_code
= NULL
;
247 cb_dump_results (gpointer name
, gpointer count
)
249 g_print ("\t%s : %u\n", (char const *)name
, GPOINTER_TO_UINT (count
));
253 search_string_table (Workbook
*wb
, char const *file_name
, GHashTable
*targets
)
255 StringTableSearch state
;
259 state
.targets
= targets
;
260 state
.results
= NULL
;
261 go_string_foreach_base (&cb_check_strings
, &state
);
262 g_hash_table_foreach (targets
, &cb_check_func
, &state
);
264 if (NULL
!= (modules
= g_object_get_data (G_OBJECT (wb
), "VBA")))
265 g_hash_table_foreach (modules
, &cb_check_module
, &state
);
266 if (NULL
!= state
.results
) {
267 g_print ("%s\n", file_name
);
268 g_hash_table_foreach (state
.results
, (GHFunc
)&cb_dump_results
, NULL
);
269 g_hash_table_destroy (state
.results
);
274 ssgrep (const char *arg
, char const *uri
, GOIOContext
*ioc
, GHashTable
*targets
, char const *pattern
)
278 GnmSearchReplace
*search
;
283 wbv
= workbook_view_new_from_uri (uri
, NULL
, ioc
, NULL
);
288 wb
= wb_view_get_workbook (wbv
);
290 if (ssgrep_locus_results
) {
292 workbook_recalc_all (wb
);
296 if (ssgrep_string_table
) {
297 search_string_table (wb
, arg
, targets
);
302 search
= (GnmSearchReplace
*)
303 g_object_new (GNM_SEARCH_REPLACE_TYPE
,
304 "search-text", ssgrep_pattern
,
306 "invert", ssgrep_invert_match
,
307 "ignore-case", ssgrep_ignore_case
,
308 "match-words", ssgrep_match_words
,
309 "search-strings", ssgrep_locus_values
,
310 "search-other-values", ssgrep_locus_values
,
311 "search-expressions", ssgrep_locus_expressions
,
312 "search-expression-results", ssgrep_locus_results
,
313 "search-comments", ssgrep_locus_comments
,
314 "search-scripts", ssgrep_locus_scripts
,
315 "sheet", workbook_sheet_by_index (wb
, 0),
316 "scope", GNM_SRS_WORKBOOK
,
319 cells
= gnm_search_collect_cells (search
);
320 matches
= gnm_search_filter_matching (search
, cells
);
321 has_match
= (matches
->len
> 0);
324 ssgrep_any_matches
= TRUE
;
328 } else if (ssgrep_print_nonmatching_filenames
) {
330 g_print ("%s\n", arg
);
331 } else if (ssgrep_print_matching_filenames
) {
333 g_print ("%s\n", arg
);
334 } else if (ssgrep_count
) {
335 if (ssgrep_print_filenames
)
336 g_print ("%s:", arg
);
337 g_print ("%u\n", matches
->len
);
340 for (ui
= 0; ui
< matches
->len
; ui
++) {
341 const GnmSearchFilterResult
*item
= g_ptr_array_index (matches
, ui
);
343 const char *locus_type
= "";
345 switch (item
->locus
) {
346 case GNM_SRL_CONTENTS
: {
347 GnmCell
const *cell
=
348 sheet_cell_get (item
->ep
.sheet
,
351 txt
= gnm_cell_get_entered_text (cell
);
352 locus_type
= _("cell");
356 case GNM_SRL_VALUE
: {
357 GnmCell
const *cell
=
358 sheet_cell_get (item
->ep
.sheet
,
361 if (cell
&& cell
->value
)
362 txt
= value_get_as_string (cell
->value
);
363 locus_type
= _("result");
367 case GNM_SRL_COMMENT
: {
368 GnmComment
*comment
= sheet_get_comment (item
->ep
.sheet
, &item
->ep
.eval
);
369 txt
= g_strdup (cell_comment_text_get (comment
));
370 locus_type
= _("comment");
374 ; /* Probably should not happen. */
377 if (ssgrep_print_filenames
)
378 g_print ("%s:", arg
);
380 if (ssgrep_print_type
)
381 g_print ("%s:", locus_type
);
383 if (ssgrep_print_locus
)
385 item
->ep
.sheet
->name_quoted
,
386 cellpos_as_string (&item
->ep
.eval
));
389 g_print ("%s\n", txt
);
396 gnm_search_filter_matching_free (matches
);
397 gnm_search_collect_cells_free (cells
);
398 g_object_unref (search
);
402 /* simple stripped down hash of lower case target, only used for string table
405 add_target (GHashTable
*ssgrep_targets
, char const *target
)
407 char *orig
= g_strstrip (g_strdup (target
));
408 char *clean
= g_utf8_strdown (orig
, -1);
409 g_hash_table_insert (ssgrep_targets
, clean
, orig
);
413 main (int argc
, char const **argv
)
415 GHashTable
*ssgrep_targets
;
416 GOErrorInfo
*plugin_errs
;
419 GOptionContext
*ocontext
;
420 GError
*error
= NULL
;
422 const char *argv_stdin
[] = { "fd://1", NULL
};
424 /* No code before here, we need to init threads */
425 argv
= gnm_pre_parse_init (argc
, argv
);
427 gnm_conf_set_persistence (FALSE
);
429 ocontext
= g_option_context_new (_("PATTERN INFILE..."));
430 g_option_context_add_main_entries (ocontext
, ssgrep_options
, GETTEXT_PACKAGE
);
431 g_option_context_add_group (ocontext
, gnm_get_option_group ());
432 g_option_context_parse (ocontext
, &argc
, (gchar
***)&argv
, &error
);
433 g_option_context_free (ocontext
);
436 g_printerr (_("%s\nRun '%s --help' to see a full list of available command line options.\n"),
437 error
->message
, g_get_prgname ());
438 g_error_free (error
);
442 if (ssgrep_show_version
) {
443 g_printerr (_("version '%s'\ndatadir := '%s'\nlibdir := '%s'\n"),
444 GNM_VERSION_FULL
, gnm_sys_data_dir (), gnm_sys_lib_dir ());
450 ssgrep_targets
= g_hash_table_new_full (g_str_hash
, g_str_equal
, g_free
, g_free
);
451 if (ssgrep_pattern_file
) {
452 char *uri
= go_shell_arg_to_uri (ssgrep_pattern_file
);
454 GsfInputTextline
*textline
;
456 const unsigned char *line
;
459 input
= go_file_open (uri
, &err
);
463 g_printerr (_("%s: Cannot read %s: %s\n"),
464 g_get_prgname (), ssgrep_pattern_file
, err
->message
);
469 textline
= (GsfInputTextline
*)gsf_input_textline_new (input
);
470 g_object_unref (input
);
472 pat
= g_string_new (NULL
);
473 while (NULL
!= (line
= gsf_input_textline_ascii_gets (textline
))) {
475 g_string_append_c (pat
, '|');
477 if (ssgrep_fixed_strings
)
478 go_regexp_quote (pat
, line
);
480 g_string_append (pat
, line
);
482 add_target (ssgrep_targets
, line
);
485 ssgrep_pattern
= g_string_free (pat
, FALSE
);
487 g_object_unref (textline
);
493 g_printerr (_("%s: Missing pattern\n"), g_get_prgname ());
497 if (ssgrep_fixed_strings
) {
498 GString
*pat
= g_string_new (NULL
);
499 go_regexp_quote (pat
, argv
[1]);
500 ssgrep_pattern
= g_string_free (pat
, FALSE
);
502 ssgrep_pattern
= g_strdup (argv
[1]);
503 add_target (ssgrep_targets
, argv
[1]);
509 if (argv
[i
] == NULL
) {
515 cc
= gnm_cmd_context_stderr_new ();
516 gnm_plugins_init (GO_CMD_CONTEXT (cc
));
517 go_plugin_db_activate_plugin_list (
518 go_plugins_get_available_plugins (), &plugin_errs
);
520 /* FIXME: What do we want to do here? */
521 go_error_info_free (plugin_errs
);
524 ioc
= go_io_context_new (cc
);
525 go_io_context_set_num_files (ioc
, N
);
526 go_component_set_default_command_context (cc
);
528 if (ssgrep_print_filenames
== (gboolean
)2)
529 ssgrep_print_filenames
= (N
> 1);
531 for (; argv
[i
]; i
++) {
532 const char *arg
= argv
[i
];
533 char *uri
= go_shell_arg_to_uri (arg
);
534 go_io_context_processing_file (ioc
, uri
);
535 ssgrep (arg
, uri
, ioc
, ssgrep_targets
, ssgrep_pattern
);
539 g_hash_table_destroy (ssgrep_targets
);
541 go_component_set_default_command_context (NULL
);
542 g_object_unref (ioc
);
546 gnm_pre_parse_shutdown ();
548 /* This special case matches what "man grep" says. */
549 if (ssgrep_quiet
&& ssgrep_any_matches
)
555 return ssgrep_any_matches
? 0 : 1;