Update Spanish translation
[gnumeric.git] / src / ssgrep.c
blob5bc3258238aa08e9f63357d976931bbee6cb54d1
1 /*
2 * ssgrep.c: Search spreadsheets of selected strings
4 * Copyright (C) 2008 Jody Goldberg
5 * Copyright (C) 2008-2009 Morten Welinder (terra@gnome.org)
6 */
7 #include <gnumeric-config.h>
8 #include <gnumeric.h>
9 #include <libgnumeric.h>
10 #include <goffice/goffice.h>
11 #include <command-context-stderr.h>
12 #include <workbook-view.h>
13 #include <workbook.h>
14 #include <application.h>
15 #include <gutils.h>
16 #include <gnm-plugin.h>
17 #include <search.h>
18 #include <sheet.h>
19 #include <cell.h>
20 #include <value.h>
21 #include <func.h>
22 #include <parse-util.h>
23 #include <sheet-object-cell-comment.h>
24 #include <gnumeric-conf.h>
26 #include <gsf/gsf-input-stdio.h>
27 #include <gsf/gsf-input-textline.h>
28 #include <glib/gi18n.h>
29 #include <string.h>
31 static gboolean ssgrep_locus_values = TRUE;
32 static gboolean ssgrep_locus_expressions = TRUE;
33 static gboolean ssgrep_locus_results = FALSE;
34 static gboolean ssgrep_locus_comments = TRUE;
35 static gboolean ssgrep_locus_scripts = TRUE;
36 static gboolean ssgrep_ignore_case = FALSE;
37 static gboolean ssgrep_match_words = FALSE;
38 static gboolean ssgrep_quiet = FALSE;
39 static gboolean ssgrep_count = FALSE;
40 static gboolean ssgrep_print_filenames = (gboolean)2;
41 static gboolean ssgrep_print_matching_filenames = FALSE;
42 static gboolean ssgrep_print_nonmatching_filenames = FALSE;
43 static gboolean ssgrep_print_locus = FALSE;
44 static gboolean ssgrep_print_type = FALSE;
45 static char *ssgrep_pattern = NULL;
46 static gboolean ssgrep_fixed_strings = FALSE;
47 static gboolean ssgrep_recalc = FALSE;
48 static gboolean ssgrep_invert_match = FALSE;
49 static gboolean ssgrep_string_table = FALSE;
51 static gboolean ssgrep_show_version = FALSE;
52 static char *ssgrep_pattern_file = NULL;
54 static gboolean ssgrep_error = FALSE;
55 static gboolean ssgrep_any_matches = FALSE;
57 static GOptionEntry const ssgrep_options [] = {
59 "count", 'c',
60 0, G_OPTION_ARG_NONE, &ssgrep_count,
61 N_("Only print a count of matches per file"),
62 NULL
65 "string-table-count", 'C',
66 0, G_OPTION_ARG_NONE, &ssgrep_string_table,
67 N_("Search only via the string table, display a count of the references."),
68 NULL
72 "pattern-file", 'f',
73 0, G_OPTION_ARG_STRING, &ssgrep_pattern_file,
74 N_("Get patterns from a file, one per line"),
75 N_("FILE")
79 "fixed-strings", 'F',
80 0, G_OPTION_ARG_NONE, &ssgrep_fixed_strings,
81 N_("Pattern is a set of fixed strings"),
82 NULL
86 "with-filename", 'H',
87 0, G_OPTION_ARG_NONE, &ssgrep_print_filenames,
88 N_("Print the filename for each match"),
89 NULL
93 "without-filename", 'h',
94 G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, &ssgrep_print_filenames,
95 N_("Do not print the filename for each match"),
96 NULL
100 "ignore-case", 'i',
101 0, G_OPTION_ARG_NONE, &ssgrep_ignore_case,
102 N_("Ignore differences in letter case"),
103 NULL
107 "files-with-matches", 'l',
108 0, G_OPTION_ARG_NONE, &ssgrep_print_matching_filenames,
109 N_("Print filenames with matches"),
110 NULL
114 "files-without-matches", 'L',
115 0, G_OPTION_ARG_NONE, &ssgrep_print_nonmatching_filenames,
116 N_("Print filenames without matches"),
117 NULL
121 "print-locus", 'n',
122 0, G_OPTION_ARG_NONE, &ssgrep_print_locus,
123 N_("Print the location of each match"),
124 NULL
128 "quiet", 'q',
129 0, G_OPTION_ARG_NONE, &ssgrep_quiet,
130 N_("Suppress all normal output"),
131 NULL
135 "search-results", 'R',
136 0, G_OPTION_ARG_NONE, &ssgrep_locus_results,
137 N_("Search results of expressions too"),
138 NULL
142 "print-type", 'T',
143 0, G_OPTION_ARG_NONE, &ssgrep_print_type,
144 N_("Print the location type of each match"),
145 NULL
149 "invert-match", 'v',
150 0, G_OPTION_ARG_NONE, &ssgrep_invert_match,
151 N_("Search for cells that do not match"),
152 NULL
156 "version", 'V',
157 0, G_OPTION_ARG_NONE, &ssgrep_show_version,
158 N_("Display program version"),
159 NULL
163 "word-regexp", 'w',
164 0, G_OPTION_ARG_NONE, &ssgrep_match_words,
165 N_("Match only whole words"),
166 NULL
170 "recalc", 0,
171 0, G_OPTION_ARG_NONE, &ssgrep_recalc,
172 N_("Recalculate all cells"),
173 NULL
176 /* ---------------------------------------- */
178 { NULL }
181 typedef struct {
182 Workbook *wb;
183 GHashTable *targets;
184 GHashTable *results;
185 char const *lc_code;
186 } StringTableSearch;
188 static void
189 add_result (StringTableSearch *state, char const *clean, unsigned int n)
191 gpointer prev;
193 if (NULL == state->results)
194 state->results = g_hash_table_new (g_str_hash, g_str_equal);
195 else if (NULL != (prev = g_hash_table_lookup (state->results, clean)))
196 n += GPOINTER_TO_UINT (prev);
197 g_hash_table_replace (state->results, (gpointer) clean, GUINT_TO_POINTER (n));
200 static void
201 cb_check_strings (G_GNUC_UNUSED gpointer key, gpointer str, gpointer user_data)
203 StringTableSearch *state = user_data;
204 char *clean = g_utf8_strdown (key, -1);
205 char const *orig = g_hash_table_lookup (state->targets, clean);
206 if (NULL != orig)
207 add_result (state, clean, go_string_get_ref_count (str));
208 g_free (clean);
211 static void
212 cb_check_func (gpointer clean, gpointer orig, gpointer user_data)
214 StringTableSearch *state = user_data;
215 GnmFunc *func = gnm_func_lookup (clean, state->wb);
216 if (func && gnm_func_get_in_use (func))
217 add_result (state, clean, 1);
220 static void
221 cb_find_target_in_module (gpointer clean, gpointer orig, gpointer user_data)
223 StringTableSearch *state = user_data;
224 unsigned n = 0;
225 char const *ptr = state->lc_code;
227 while (NULL != (ptr = strstr (ptr, clean))) {
228 n++;
229 ptr++;
232 if (n > 0)
233 add_result (state, clean, n);
236 static void
237 cb_check_module (gpointer name, gpointer code, gpointer user_data)
239 StringTableSearch *state = user_data;
240 state->lc_code = g_utf8_strdown (code, -1);
241 g_hash_table_foreach (state->targets, &cb_find_target_in_module, state);
242 g_free ((gpointer)state->lc_code);
243 state->lc_code = NULL;
246 static void
247 cb_dump_results (gpointer name, gpointer count)
249 g_print ("\t%s : %u\n", (char const *)name, GPOINTER_TO_UINT (count));
252 static void
253 search_string_table (Workbook *wb, char const *file_name, GHashTable *targets)
255 StringTableSearch state;
256 GHashTable *modules;
258 state.wb = wb;
259 state.targets = targets;
260 state.results = NULL;
261 go_string_foreach_base (&cb_check_strings, &state);
262 g_hash_table_foreach (targets, &cb_check_func, &state);
264 if (NULL != (modules = g_object_get_data (G_OBJECT (wb), "VBA")))
265 g_hash_table_foreach (modules, &cb_check_module, &state);
266 if (NULL != state.results) {
267 g_print ("%s\n", file_name);
268 g_hash_table_foreach (state.results, (GHFunc)&cb_dump_results, NULL);
269 g_hash_table_destroy (state.results);
273 static void
274 ssgrep (const char *arg, char const *uri, GOIOContext *ioc, GHashTable *targets, char const *pattern)
276 WorkbookView *wbv;
277 Workbook *wb;
278 GnmSearchReplace *search;
279 GPtrArray *cells;
280 GPtrArray *matches;
281 gboolean has_match;
283 wbv = workbook_view_new_from_uri (uri, NULL, ioc, NULL);
284 if (wbv == NULL) {
285 ssgrep_error = TRUE;
286 return;
288 wb = wb_view_get_workbook (wbv);
290 if (ssgrep_locus_results) {
291 if (ssgrep_recalc)
292 workbook_recalc_all (wb);
293 gnm_app_recalc ();
296 if (ssgrep_string_table) {
297 search_string_table (wb, arg, targets);
298 g_object_unref (wb);
299 return;
302 search = (GnmSearchReplace*)
303 g_object_new (GNM_SEARCH_REPLACE_TYPE,
304 "search-text", ssgrep_pattern,
305 "is-regexp", TRUE,
306 "invert", ssgrep_invert_match,
307 "ignore-case", ssgrep_ignore_case,
308 "match-words", ssgrep_match_words,
309 "search-strings", ssgrep_locus_values,
310 "search-other-values", ssgrep_locus_values,
311 "search-expressions", ssgrep_locus_expressions,
312 "search-expression-results", ssgrep_locus_results,
313 "search-comments", ssgrep_locus_comments,
314 "search-scripts", ssgrep_locus_scripts,
315 "sheet", workbook_sheet_by_index (wb, 0),
316 "scope", GNM_SRS_WORKBOOK,
317 NULL);
319 cells = gnm_search_collect_cells (search);
320 matches = gnm_search_filter_matching (search, cells);
321 has_match = (matches->len > 0);
323 if (has_match)
324 ssgrep_any_matches = TRUE;
326 if (ssgrep_quiet) {
327 /* Nothing */
328 } else if (ssgrep_print_nonmatching_filenames) {
329 if (!has_match)
330 g_print ("%s\n", arg);
331 } else if (ssgrep_print_matching_filenames) {
332 if (has_match)
333 g_print ("%s\n", arg);
334 } else if (ssgrep_count) {
335 if (ssgrep_print_filenames)
336 g_print ("%s:", arg);
337 g_print ("%u\n", matches->len);
338 } else {
339 unsigned ui;
340 for (ui = 0; ui < matches->len; ui++) {
341 const GnmSearchFilterResult *item = g_ptr_array_index (matches, ui);
342 char *txt = NULL;
343 const char *locus_type = "";
345 switch (item->locus) {
346 case GNM_SRL_CONTENTS: {
347 GnmCell const *cell =
348 sheet_cell_get (item->ep.sheet,
349 item->ep.eval.col,
350 item->ep.eval.row);
351 txt = gnm_cell_get_entered_text (cell);
352 locus_type = _("cell");
353 break;
356 case GNM_SRL_VALUE: {
357 GnmCell const *cell =
358 sheet_cell_get (item->ep.sheet,
359 item->ep.eval.col,
360 item->ep.eval.row);
361 if (cell && cell->value)
362 txt = value_get_as_string (cell->value);
363 locus_type = _("result");
364 break;
367 case GNM_SRL_COMMENT: {
368 GnmComment *comment = sheet_get_comment (item->ep.sheet, &item->ep.eval);
369 txt = g_strdup (cell_comment_text_get (comment));
370 locus_type = _("comment");
371 break;
373 default:
374 ; /* Probably should not happen. */
377 if (ssgrep_print_filenames)
378 g_print ("%s:", arg);
380 if (ssgrep_print_type)
381 g_print ("%s:", locus_type);
383 if (ssgrep_print_locus)
384 g_print ("%s!%s:",
385 item->ep.sheet->name_quoted,
386 cellpos_as_string (&item->ep.eval));
388 if (txt) {
389 g_print ("%s\n", txt);
390 g_free (txt);
391 } else
392 g_print ("\n");
396 gnm_search_filter_matching_free (matches);
397 gnm_search_collect_cells_free (cells);
398 g_object_unref (search);
399 g_object_unref (wb);
402 /* simple stripped down hash of lower case target, only used for string table
403 * searches */
404 static void
405 add_target (GHashTable *ssgrep_targets, char const *target)
407 char *orig = g_strstrip (g_strdup (target));
408 char *clean = g_utf8_strdown (orig, -1);
409 g_hash_table_insert (ssgrep_targets, clean, orig);
413 main (int argc, char const **argv)
415 GHashTable *ssgrep_targets;
416 GOErrorInfo *plugin_errs;
417 GOIOContext *ioc;
418 GOCmdContext *cc;
419 GOptionContext *ocontext;
420 GError *error = NULL;
421 int i, N;
422 const char *argv_stdin[] = { "fd://1", NULL };
424 /* No code before here, we need to init threads */
425 argv = gnm_pre_parse_init (argc, argv);
427 gnm_conf_set_persistence (FALSE);
429 ocontext = g_option_context_new (_("PATTERN INFILE..."));
430 g_option_context_add_main_entries (ocontext, ssgrep_options, GETTEXT_PACKAGE);
431 g_option_context_add_group (ocontext, gnm_get_option_group ());
432 g_option_context_parse (ocontext, &argc, (gchar ***)&argv, &error);
433 g_option_context_free (ocontext);
435 if (error) {
436 g_printerr (_("%s\nRun '%s --help' to see a full list of available command line options.\n"),
437 error->message, g_get_prgname ());
438 g_error_free (error);
439 return 1;
442 if (ssgrep_show_version) {
443 g_printerr (_("version '%s'\ndatadir := '%s'\nlibdir := '%s'\n"),
444 GNM_VERSION_FULL, gnm_sys_data_dir (), gnm_sys_lib_dir ());
445 return 0;
448 gnm_init ();
450 ssgrep_targets = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free);
451 if (ssgrep_pattern_file) {
452 char *uri = go_shell_arg_to_uri (ssgrep_pattern_file);
453 GsfInput *input;
454 GsfInputTextline *textline;
455 GError *err = NULL;
456 const unsigned char *line;
457 GString *pat;
459 input = go_file_open (uri, &err);
460 g_free (uri);
462 if (!input) {
463 g_printerr (_("%s: Cannot read %s: %s\n"),
464 g_get_prgname (), ssgrep_pattern_file, err->message);
465 g_error_free (err);
466 return 1;
469 textline = (GsfInputTextline *)gsf_input_textline_new (input);
470 g_object_unref (input);
472 pat = g_string_new (NULL);
473 while (NULL != (line = gsf_input_textline_ascii_gets (textline))) {
474 if (pat->len)
475 g_string_append_c (pat, '|');
477 if (ssgrep_fixed_strings)
478 go_regexp_quote (pat, line);
479 else
480 g_string_append (pat, line);
482 add_target (ssgrep_targets, line);
485 ssgrep_pattern = g_string_free (pat, FALSE);
487 g_object_unref (textline);
489 i = 1;
490 N = argc - i;
491 } else {
492 if (argc < 2) {
493 g_printerr (_("%s: Missing pattern\n"), g_get_prgname ());
494 return 1;
497 if (ssgrep_fixed_strings) {
498 GString *pat = g_string_new (NULL);
499 go_regexp_quote (pat, argv[1]);
500 ssgrep_pattern = g_string_free (pat, FALSE);
501 } else
502 ssgrep_pattern = g_strdup (argv[1]);
503 add_target (ssgrep_targets, argv[1]);
505 i = 2;
506 N = argc - i;
509 if (argv[i] == NULL) {
510 argv = argv_stdin;
511 i = 0;
512 N = 1;
515 cc = gnm_cmd_context_stderr_new ();
516 gnm_plugins_init (GO_CMD_CONTEXT (cc));
517 go_plugin_db_activate_plugin_list (
518 go_plugins_get_available_plugins (), &plugin_errs);
519 if (plugin_errs) {
520 /* FIXME: What do we want to do here? */
521 go_error_info_free (plugin_errs);
524 ioc = go_io_context_new (cc);
525 go_io_context_set_num_files (ioc, N);
526 go_component_set_default_command_context (cc);
528 if (ssgrep_print_filenames == (gboolean)2)
529 ssgrep_print_filenames = (N > 1);
531 for (; argv[i]; i++) {
532 const char *arg = argv[i];
533 char *uri = go_shell_arg_to_uri (arg);
534 go_io_context_processing_file (ioc, uri);
535 ssgrep (arg, uri, ioc, ssgrep_targets, ssgrep_pattern);
536 g_free (uri);
539 g_hash_table_destroy (ssgrep_targets);
541 go_component_set_default_command_context (NULL);
542 g_object_unref (ioc);
544 g_object_unref (cc);
545 gnm_shutdown ();
546 gnm_pre_parse_shutdown ();
548 /* This special case matches what "man grep" says. */
549 if (ssgrep_quiet && ssgrep_any_matches)
550 return 0;
552 if (ssgrep_error)
553 return 2;
555 return ssgrep_any_matches ? 0 : 1;