1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
3 * ssgrep.c: Search spreadsheets of selected strings
5 * Copyright (C) 2008 Jody Goldberg
6 * Copyright (C) 2008-2009 Morten Welinder (terra@gnome.org)
8 #include <gnumeric-config.h>
10 #include "libgnumeric.h"
11 #include <goffice/goffice.h>
12 #include "command-context-stderr.h"
13 #include "workbook-view.h"
15 #include "application.h"
17 #include "gnm-plugin.h"
23 #include "parse-util.h"
24 #include "sheet-object-cell-comment.h"
26 #include <gsf/gsf-input-stdio.h>
27 #include <gsf/gsf-input-textline.h>
28 #include <glib/gi18n.h>
31 static gboolean ssgrep_locus_values
= TRUE
;
32 static gboolean ssgrep_locus_expressions
= TRUE
;
33 static gboolean ssgrep_locus_results
= FALSE
;
34 static gboolean ssgrep_locus_comments
= TRUE
;
35 static gboolean ssgrep_locus_scripts
= TRUE
;
36 static gboolean ssgrep_ignore_case
= FALSE
;
37 static gboolean ssgrep_match_words
= FALSE
;
38 static gboolean ssgrep_quiet
= FALSE
;
39 static gboolean ssgrep_count
= FALSE
;
40 static gboolean ssgrep_print_filenames
= (gboolean
)2;
41 static gboolean ssgrep_print_matching_filenames
= FALSE
;
42 static gboolean ssgrep_print_nonmatching_filenames
= FALSE
;
43 static gboolean ssgrep_print_locus
= FALSE
;
44 static gboolean ssgrep_print_type
= FALSE
;
45 static char *ssgrep_pattern
= NULL
;
46 static gboolean ssgrep_fixed_strings
= FALSE
;
47 static gboolean ssgrep_recalc
= FALSE
;
48 static gboolean ssgrep_invert_match
= FALSE
;
49 static gboolean ssgrep_string_table
= FALSE
;
51 static gboolean ssgrep_show_version
= FALSE
;
52 static char *ssgrep_pattern_file
= NULL
;
54 static gboolean ssgrep_error
= FALSE
;
55 static gboolean ssgrep_any_matches
= FALSE
;
57 static GOptionEntry
const ssgrep_options
[] = {
60 0, G_OPTION_ARG_NONE
, &ssgrep_count
,
61 N_("Only print a count of matches per file"),
65 "string-table-count", 'C',
66 0, G_OPTION_ARG_NONE
, &ssgrep_string_table
,
67 N_("Search only via the string table, display a count of the references."),
73 0, G_OPTION_ARG_STRING
, &ssgrep_pattern_file
,
74 N_("Get patterns from a file, one per line"),
80 0, G_OPTION_ARG_NONE
, &ssgrep_fixed_strings
,
81 N_("Pattern is a set of fixed strings"),
87 0, G_OPTION_ARG_NONE
, &ssgrep_print_filenames
,
88 N_("Print the filename for each match"),
93 "without-filename", 'h',
94 G_OPTION_FLAG_REVERSE
, G_OPTION_ARG_NONE
, &ssgrep_print_filenames
,
95 N_("Do not print the filename for each match"),
101 0, G_OPTION_ARG_NONE
, &ssgrep_ignore_case
,
102 N_("Ignore differences in letter case"),
107 "files-with-matches", 'l',
108 0, G_OPTION_ARG_NONE
, &ssgrep_print_matching_filenames
,
109 N_("Print filenames with matches"),
114 "files-without-matches", 'L',
115 0, G_OPTION_ARG_NONE
, &ssgrep_print_nonmatching_filenames
,
116 N_("Print filenames without matches"),
122 0, G_OPTION_ARG_NONE
, &ssgrep_print_locus
,
123 N_("Print the location of each match"),
129 0, G_OPTION_ARG_NONE
, &ssgrep_quiet
,
130 N_("Suppress all normal output"),
135 "search-results", 'R',
136 0, G_OPTION_ARG_NONE
, &ssgrep_locus_results
,
137 N_("Search results of expressions too"),
143 0, G_OPTION_ARG_NONE
, &ssgrep_print_type
,
144 N_("Print the location type of each match"),
150 0, G_OPTION_ARG_NONE
, &ssgrep_invert_match
,
151 N_("Search for cells that do not match"),
157 0, G_OPTION_ARG_NONE
, &ssgrep_show_version
,
158 N_("Display program version"),
164 0, G_OPTION_ARG_NONE
, &ssgrep_match_words
,
165 N_("Match only whole words"),
171 0, G_OPTION_ARG_NONE
, &ssgrep_recalc
,
172 N_("Recalculate all cells"),
176 /* ---------------------------------------- */
189 add_result (StringTableSearch
*state
, char const *clean
, unsigned int n
)
193 if (NULL
== state
->results
)
194 state
->results
= g_hash_table_new (g_str_hash
, g_str_equal
);
195 else if (NULL
!= (prev
= g_hash_table_lookup (state
->results
, clean
)))
196 n
+= GPOINTER_TO_UINT (prev
);
197 g_hash_table_replace (state
->results
, (gpointer
) clean
, GUINT_TO_POINTER (n
));
201 cb_check_strings (G_GNUC_UNUSED gpointer key
, gpointer str
, gpointer user_data
)
203 StringTableSearch
*state
= user_data
;
204 char *clean
= g_utf8_strdown (key
, -1);
205 char const *orig
= g_hash_table_lookup (state
->targets
, clean
);
207 add_result (state
, clean
, go_string_get_ref_count (str
));
212 cb_check_func (gpointer clean
, gpointer orig
, gpointer user_data
)
214 StringTableSearch
*state
= user_data
;
215 GnmFunc
*func
= gnm_func_lookup (clean
, state
->wb
);
217 add_result (state
, clean
, func
->usage_count
);
221 cb_find_target_in_module (gpointer clean
, gpointer orig
, gpointer user_data
)
223 StringTableSearch
*state
= user_data
;
225 char const *ptr
= state
->lc_code
;
227 while (NULL
!= (ptr
= strstr (ptr
, clean
))) {
233 add_result (state
, clean
, n
);
237 cb_check_module (gpointer name
, gpointer code
, gpointer user_data
)
239 StringTableSearch
*state
= user_data
;
240 state
->lc_code
= g_utf8_strdown (code
, -1);
241 g_hash_table_foreach (state
->targets
, &cb_find_target_in_module
, state
);
242 g_free ((gpointer
)state
->lc_code
);
243 state
->lc_code
= NULL
;
247 cb_dump_results (gpointer name
, gpointer count
)
249 g_print ("\t%s : %u\n", (char const *)name
, GPOINTER_TO_UINT (count
));
253 search_string_table (Workbook
*wb
, char const *file_name
, GHashTable
*targets
)
255 StringTableSearch state
;
259 state
.targets
= targets
;
260 state
.results
= NULL
;
261 go_string_foreach_base (&cb_check_strings
, &state
);
262 g_hash_table_foreach (targets
, &cb_check_func
, &state
);
264 if (NULL
!= (modules
= g_object_get_data (G_OBJECT (wb
), "VBA")))
265 g_hash_table_foreach (modules
, &cb_check_module
, &state
);
266 if (NULL
!= state
.results
) {
267 g_print ("%s\n", file_name
);
268 g_hash_table_foreach (state
.results
, (GHFunc
)&cb_dump_results
, NULL
);
269 g_hash_table_destroy (state
.results
);
274 ssgrep (const char *arg
, char const *uri
, GOIOContext
*ioc
, GHashTable
*targets
, char const *pattern
)
278 GnmSearchReplace
*search
;
283 wbv
= workbook_view_new_from_uri (uri
, NULL
, ioc
, NULL
);
288 wb
= wb_view_get_workbook (wbv
);
290 if (ssgrep_locus_results
) {
292 workbook_recalc_all (wb
);
296 if (ssgrep_string_table
) {
297 search_string_table (wb
, arg
, targets
);
302 search
= (GnmSearchReplace
*)
303 g_object_new (GNM_SEARCH_REPLACE_TYPE
,
304 "search-text", ssgrep_pattern
,
306 "invert", ssgrep_invert_match
,
307 "ignore-case", ssgrep_ignore_case
,
308 "match-words", ssgrep_match_words
,
309 "search-strings", ssgrep_locus_values
,
310 "search-other-values", ssgrep_locus_values
,
311 "search-expressions", ssgrep_locus_expressions
,
312 "search-expression-results", ssgrep_locus_results
,
313 "search-comments", ssgrep_locus_comments
,
314 "search-scripts", ssgrep_locus_scripts
,
315 "sheet", workbook_sheet_by_index (wb
, 0),
316 "scope", GNM_SRS_WORKBOOK
,
319 cells
= gnm_search_collect_cells (search
);
320 matches
= gnm_search_filter_matching (search
, cells
);
321 has_match
= (matches
->len
> 0);
324 ssgrep_any_matches
= TRUE
;
328 } else if (ssgrep_print_nonmatching_filenames
) {
330 g_print ("%s\n", arg
);
331 } else if (ssgrep_print_matching_filenames
) {
333 g_print ("%s\n", arg
);
334 } else if (ssgrep_count
) {
335 if (ssgrep_print_filenames
)
336 g_print ("%s:", arg
);
337 g_print ("%u\n", matches
->len
);
340 for (ui
= 0; ui
< matches
->len
; ui
++) {
341 const GnmSearchFilterResult
*item
= g_ptr_array_index (matches
, ui
);
343 const char *locus_type
= "";
345 switch (item
->locus
) {
346 case GNM_SRL_CONTENTS
: {
347 GnmCell
const *cell
=
348 sheet_cell_get (item
->ep
.sheet
,
351 txt
= gnm_cell_get_entered_text (cell
);
352 locus_type
= _("cell");
356 case GNM_SRL_VALUE
: {
357 GnmCell
const *cell
=
358 sheet_cell_get (item
->ep
.sheet
,
361 if (cell
&& cell
->value
)
362 txt
= value_get_as_string (cell
->value
);
363 locus_type
= _("result");
367 case GNM_SRL_COMMENT
: {
368 GnmComment
*comment
= sheet_get_comment (item
->ep
.sheet
, &item
->ep
.eval
);
369 txt
= g_strdup (cell_comment_text_get (comment
));
370 locus_type
= _("comment");
374 ; /* Probably should not happen. */
377 if (ssgrep_print_filenames
)
378 g_print ("%s:", arg
);
380 if (ssgrep_print_type
)
381 g_print ("%s:", locus_type
);
383 if (ssgrep_print_locus
)
385 item
->ep
.sheet
->name_quoted
,
386 cellpos_as_string (&item
->ep
.eval
));
389 g_print ("%s\n", txt
);
396 gnm_search_filter_matching_free (matches
);
397 gnm_search_collect_cells_free (cells
);
398 g_object_unref (search
);
402 /* simple stripped down hash of lower case target, only used for string table
405 add_target (GHashTable
*ssgrep_targets
, char const *target
)
407 char *orig
= g_strstrip (g_strdup (target
));
408 char *clean
= g_utf8_strdown (orig
, -1);
409 g_hash_table_insert (ssgrep_targets
, clean
, orig
);
413 main (int argc
, char const **argv
)
415 GHashTable
*ssgrep_targets
;
416 GOErrorInfo
*plugin_errs
;
419 GOptionContext
*ocontext
;
420 GError
*error
= NULL
;
422 const char *argv_stdin
[] = { "fd://1", NULL
};
424 /* No code before here, we need to init threads */
425 argv
= gnm_pre_parse_init (argc
, argv
);
427 ocontext
= g_option_context_new (_("PATTERN INFILE..."));
428 g_option_context_add_main_entries (ocontext
, ssgrep_options
, GETTEXT_PACKAGE
);
429 g_option_context_add_group (ocontext
, gnm_get_option_group ());
430 g_option_context_parse (ocontext
, &argc
, (gchar
***)&argv
, &error
);
431 g_option_context_free (ocontext
);
434 g_printerr (_("%s\nRun '%s --help' to see a full list of available command line options.\n"),
435 error
->message
, g_get_prgname ());
436 g_error_free (error
);
440 if (ssgrep_show_version
) {
441 g_printerr (_("version '%s'\ndatadir := '%s'\nlibdir := '%s'\n"),
442 GNM_VERSION_FULL
, gnm_sys_data_dir (), gnm_sys_lib_dir ());
448 ssgrep_targets
= g_hash_table_new_full (g_str_hash
, g_str_equal
, g_free
, g_free
);
449 if (ssgrep_pattern_file
) {
450 char *uri
= go_shell_arg_to_uri (ssgrep_pattern_file
);
452 GsfInputTextline
*textline
;
454 const unsigned char *line
;
457 input
= go_file_open (uri
, &err
);
461 g_printerr (_("%s: Cannot read %s: %s\n"),
462 g_get_prgname (), ssgrep_pattern_file
, err
->message
);
467 textline
= (GsfInputTextline
*)gsf_input_textline_new (input
);
468 g_object_unref (input
);
470 pat
= g_string_new (NULL
);
471 while (NULL
!= (line
= gsf_input_textline_ascii_gets (textline
))) {
473 g_string_append_c (pat
, '|');
475 if (ssgrep_fixed_strings
)
476 go_regexp_quote (pat
, line
);
478 g_string_append (pat
, line
);
480 add_target (ssgrep_targets
, line
);
483 ssgrep_pattern
= g_string_free (pat
, FALSE
);
485 g_object_unref (textline
);
491 g_printerr (_("%s: Missing pattern\n"), g_get_prgname ());
495 if (ssgrep_fixed_strings
) {
496 GString
*pat
= g_string_new (NULL
);
497 go_regexp_quote (pat
, argv
[1]);
498 ssgrep_pattern
= g_string_free (pat
, FALSE
);
500 ssgrep_pattern
= g_strdup (argv
[1]);
501 add_target (ssgrep_targets
, argv
[1]);
507 if (argv
[i
] == NULL
) {
513 cc
= gnm_cmd_context_stderr_new ();
514 gnm_plugins_init (GO_CMD_CONTEXT (cc
));
515 go_plugin_db_activate_plugin_list (
516 go_plugins_get_available_plugins (), &plugin_errs
);
518 /* FIXME: What do we want to do here? */
519 go_error_info_free (plugin_errs
);
522 ioc
= go_io_context_new (cc
);
523 go_io_context_set_num_files (ioc
, N
);
524 go_component_set_default_command_context (cc
);
526 if (ssgrep_print_filenames
== (gboolean
)2)
527 ssgrep_print_filenames
= (N
> 1);
529 for (; argv
[i
]; i
++) {
530 const char *arg
= argv
[i
];
531 char *uri
= go_shell_arg_to_uri (arg
);
532 go_io_context_processing_file (ioc
, uri
);
533 ssgrep (arg
, uri
, ioc
, ssgrep_targets
, ssgrep_pattern
);
537 g_hash_table_destroy (ssgrep_targets
);
539 go_component_set_default_command_context (NULL
);
540 g_object_unref (ioc
);
544 gnm_pre_parse_shutdown ();
546 /* This special case matches what "man grep" says. */
547 if (ssgrep_quiet
&& ssgrep_any_matches
)
553 return ssgrep_any_matches
? 0 : 1;