Editor: sync with new global config location (user menu and syntax files).
[midnight-commander.git] / src / search / regex.c
blob70c05ae6fd6c79d303de578c008cac8619f1d198
1 /*
2 Search text engine.
3 Regex search
5 Copyright (C) 2009 The Free Software Foundation, Inc.
7 Written by:
8 Slava Zanko <slavazanko@gmail.com>, 2009.
10 This file is part of the Midnight Commander.
12 The Midnight Commander is free software; you can redistribute it
13 and/or modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
17 The Midnight Commander is distributed in the hope that it will be
18 useful, but WITHOUT ANY WARRANTY; without even the implied warranty
19 of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
25 MA 02110-1301, USA.
28 #include <config.h>
31 #include "../src/global.h"
32 #include "../src/search/search.h"
33 #include "../src/search/internal.h"
34 #include "../src/strutil.h"
35 #include "../src/charsets.h"
37 /*** global variables ****************************************************************************/
39 /*** file scope macro definitions ****************************************************************/
41 /*** file scope type declarations ****************************************************************/
43 /*** file scope variables ************************************************************************/
45 /*** file scope functions ************************************************************************/
47 static gboolean
48 mc_search__regex_str_append_if_special (GString * copy_to, GString * regex_str, gsize * offset)
50 char *tmp_regex_str;
51 gsize spec_chr_len;
52 char **spec_chr;
53 char *special_chars[] = {
54 "\\s", "\\S",
55 "\\d", "\\D",
56 "\\B", "\\B",
57 "\\w", "\\W",
58 "\\t", "\\n",
59 "\\r", "\\f",
60 "\\a", "\\e",
61 "\\x", "\\X",
62 "\\c", "\\C",
63 "\\l", "\\L",
64 "\\u", "\\U",
65 "\\E", "\\Q",
66 NULL
68 spec_chr = special_chars;
70 tmp_regex_str = &(regex_str->str[*offset]);
72 while (*spec_chr) {
73 spec_chr_len = strlen (*spec_chr);
74 if (!strncmp (tmp_regex_str, *spec_chr, spec_chr_len)) {
75 if (!mc_search__regex_is_char_escaped (regex_str->str, tmp_regex_str - 1)) {
76 if (!strncmp ("\\x", *spec_chr, spec_chr_len)) {
77 if (*(tmp_regex_str + spec_chr_len) == '{') {
78 while ((spec_chr_len < regex_str->len - *offset)
79 && *(tmp_regex_str + spec_chr_len) != '}')
80 spec_chr_len++;
81 if (*(tmp_regex_str + spec_chr_len) == '}')
82 spec_chr_len++;
83 } else
84 spec_chr_len += 2;
86 g_string_append_len (copy_to, tmp_regex_str, spec_chr_len);
87 *offset += spec_chr_len;
88 return TRUE;
91 spec_chr++;
93 return FALSE;
97 /* --------------------------------------------------------------------------------------------- */
98 static void
99 mc_search__cond_struct_new_regex_hex_add (const char *charset, GString * str_to,
100 const char *one_char, gsize str_len)
102 GString *upp, *low;
103 gchar *tmp_str;
104 gsize loop;
106 upp = mc_search__toupper_case_str (charset, one_char, str_len);
107 low = mc_search__tolower_case_str (charset, one_char, str_len);
109 for (loop = 0; loop < upp->len; loop++) {
111 if (loop < low->len) {
112 if (upp->str[loop] == low->str[loop])
113 tmp_str = g_strdup_printf ("\\x%02X", (unsigned char) upp->str[loop]);
114 else
115 tmp_str =
116 g_strdup_printf ("[\\x%02X\\x%02X]", (unsigned char) upp->str[loop],
117 (unsigned char) low->str[loop]);
118 } else {
119 tmp_str = g_strdup_printf ("\\x%02X", (unsigned char) upp->str[loop]);
121 g_string_append (str_to, tmp_str);
122 g_free (tmp_str);
124 g_string_free (upp, TRUE);
125 g_string_free (low, TRUE);
128 /* --------------------------------------------------------------------------------------------- */
130 static void
131 mc_search__cond_struct_new_regex_accum_append (const char *charset, GString * str_to,
132 GString * str_from)
134 GString *recoded_part;
135 gchar *one_char;
136 gsize loop;
137 gboolean just_letters;
139 loop = 0;
140 recoded_part = g_string_new ("");
142 while (loop < str_from->len) {
143 one_char =
144 mc_search__get_one_symbol (charset, &(str_from->str[loop]),
145 (str_from->len - loop > 6) ? 6 : str_from->len - loop,
146 &just_letters);
147 if (!strlen (one_char)) {
148 loop++;
149 continue;
151 if (just_letters) {
152 mc_search__cond_struct_new_regex_hex_add (charset, recoded_part, one_char,
153 strlen (one_char));
154 } else {
155 g_string_append (recoded_part, one_char);
157 loop += strlen (one_char);
158 if (!strlen (one_char))
159 loop++;
160 g_free (one_char);
163 g_string_append (str_to, recoded_part->str);
164 g_string_free (recoded_part, TRUE);
165 g_string_set_size (str_from, 0);
168 /* --------------------------------------------------------------------------------------------- */
170 static GString *
171 mc_search__cond_struct_new_regex_ci_str (const char *charset, const char *str, gsize str_len)
173 GString *accumulator, *spec_char, *ret_str;
174 gsize loop;
175 GString *tmp;
176 tmp = g_string_new_len (str, str_len);
179 ret_str = g_string_new ("");
180 accumulator = g_string_new ("");
181 spec_char = g_string_new ("");
182 loop = 0;
184 while (loop <= str_len) {
185 if (mc_search__regex_str_append_if_special (spec_char, tmp, &loop)) {
186 mc_search__cond_struct_new_regex_accum_append (charset, ret_str, accumulator);
187 g_string_append_len (ret_str, spec_char->str, spec_char->len);
188 g_string_set_size (spec_char, 0);
189 continue;
192 if (tmp->str[loop] == '['
193 && !mc_search__regex_is_char_escaped (tmp->str, &(tmp->str[loop]) - 1)) {
194 mc_search__cond_struct_new_regex_accum_append (charset, ret_str, accumulator);
196 while (loop < str_len && !(tmp->str[loop] == ']'
197 && !mc_search__regex_is_char_escaped (tmp->str,
198 &(tmp->str[loop]) -
199 1))) {
200 g_string_append_c (ret_str, tmp->str[loop]);
201 loop++;
204 g_string_append_c (ret_str, tmp->str[loop]);
205 loop++;
206 continue;
209 TODO: handle [ and ]
211 g_string_append_c (accumulator, tmp->str[loop]);
212 loop++;
214 mc_search__cond_struct_new_regex_accum_append (charset, ret_str, accumulator);
216 g_string_free (accumulator, TRUE);
217 g_string_free (spec_char, TRUE);
218 g_string_free (tmp, TRUE);
219 return ret_str;
222 /* --------------------------------------------------------------------------------------------- */
224 static mc_search__found_cond_t
225 mc_search__regex_found_cond_one (mc_search_t * mc_search, mc_search_regex_t * regex,
226 GString * search_str)
228 #if GLIB_CHECK_VERSION (2, 14, 0)
229 GError *error = NULL;
231 if (!g_regex_match_full
232 (regex, search_str->str, -1, 0, 0, &mc_search->regex_match_info, &error)) {
233 g_match_info_free (mc_search->regex_match_info);
234 mc_search->regex_match_info = NULL;
235 if (error) {
236 mc_search->error = MC_SEARCH_E_REGEX;
237 mc_search->error_str = str_conv_gerror_message (error, _(" Regular expression error "));
238 g_error_free (error);
239 return COND__FOUND_ERROR;
241 return COND__NOT_FOUND;
243 #else
244 #if HAVE_LIBPCRE
245 mc_search->num_rezults = pcre_exec (regex, mc_search->regex_match_info,
246 search_str->str, search_str->len, 0, 0, mc_search->iovector,
247 MC_SEARCH__PCRE_MAX_MATCHES);
248 if (mc_search->num_rezults < 0) {
249 return COND__NOT_FOUND;
251 #else /* HAVE_LIBPCRE */
253 if (regexec (regex, search_str->str, MC_SEARCH__NUM_REPL_ARGS, mc_search->regex_match_info, 0))
254 return COND__NOT_FOUND;
256 for (mc_search->num_rezults = 0; mc_search->num_rezults < MC_SEARCH__NUM_REPL_ARGS;
257 mc_search->num_rezults++) {
258 if (mc_search->regex_match_info[mc_search->num_rezults].rm_eo == 0)
259 break;
262 #endif /* HAVE_LIBPCRE */
263 #endif /* GLIB_CHECK_VERSION (2, 14, 0) */
264 return COND__FOUND_OK;
268 /* --------------------------------------------------------------------------------------------- */
270 static mc_search__found_cond_t
271 mc_search__regex_found_cond (mc_search_t * mc_search, GString * search_str)
273 gsize loop1;
274 mc_search_cond_t *mc_search_cond;
275 mc_search__found_cond_t ret;
277 for (loop1 = 0; loop1 < mc_search->conditions->len; loop1++) {
278 mc_search_cond = (mc_search_cond_t *) g_ptr_array_index (mc_search->conditions, loop1);
280 if (!mc_search_cond->regex_handle)
281 continue;
283 ret = mc_search__regex_found_cond_one (mc_search, mc_search_cond->regex_handle, search_str);
285 if (ret != COND__NOT_FOUND)
286 return ret;
288 return COND__NOT_ALL_FOUND;
291 /* --------------------------------------------------------------------------------------------- */
293 #if ! GLIB_CHECK_VERSION (2, 14, 0)
294 static int
295 mc_search_regex__get_num_replace_tokens (const gchar * str, gsize len)
297 int count_tokens = 0;
298 gsize loop;
299 for (loop = 0; loop < len - 1; loop++) {
300 if (str[loop] == '\\' && (str[loop + 1] & (char) 0xf0) == 0x30 /* 0-9 */ ) {
301 if (mc_search__regex_is_char_escaped (str, &str[loop - 1]))
302 continue;
303 count_tokens++;
306 return count_tokens;
309 /* --------------------------------------------------------------------------------------------- */
311 static void
312 mc_search_regex__append_found_token_by_num (const mc_search_t * mc_search, const gchar * fnd_str,
313 GString * str, gsize index)
315 #if HAVE_LIBPCRE
316 int fnd_start = mc_search->iovector[index * 2 + 0];
317 int fnd_end = mc_search->iovector[index * 2 + 1];
318 #else /* HAVE_LIBPCRE */
319 int fnd_start = mc_search->regex_match_info[index].rm_so;
320 int fnd_end = mc_search->regex_match_info[index].rm_eo;
321 #endif /* HAVE_LIBPCRE */
323 int fnd_len = fnd_end - fnd_start;
324 gchar *start_str = fnd_str + fnd_start;
326 if (fnd_len == 0)
327 return;
329 g_string_append_len (str, start_str, fnd_len);
333 #endif /* GLIB_CHECK_VERSION (2, 14, 0) */
336 /*** public functions ****************************************************************************/
338 void
339 mc_search__cond_struct_new_init_regex (const char *charset, mc_search_t * mc_search,
340 mc_search_cond_t * mc_search_cond)
342 GString *tmp = NULL;
343 #if GLIB_CHECK_VERSION (2, 14, 0)
344 GError *error = NULL;
345 #else
346 const char *error;
347 int erroffset;
348 #endif
350 if (!mc_search->is_case_sentitive) {
351 tmp = g_string_new_len (mc_search_cond->str->str, mc_search_cond->str->len);
352 g_string_free (mc_search_cond->str, TRUE);
353 mc_search_cond->str = mc_search__cond_struct_new_regex_ci_str (charset, tmp->str, tmp->len);
354 g_string_free (tmp, TRUE);
356 #if GLIB_CHECK_VERSION (2, 14, 0)
357 mc_search_cond->regex_handle =
358 g_regex_new (mc_search_cond->str->str, G_REGEX_OPTIMIZE | G_REGEX_RAW, 0, &error);
360 if (error != NULL) {
361 mc_search->error = MC_SEARCH_E_REGEX_COMPILE;
362 mc_search->error_str = str_conv_gerror_message (error, _(" Regular expression error "));
363 g_error_free (error);
364 return;
366 #else /* GLIB_CHECK_VERSION (2, 14, 0) */
367 #if HAVE_LIBPCRE
368 mc_search_cond->regex_handle =
369 pcre_compile (mc_search_cond->str->str, PCRE_EXTRA, &error, &erroffset, NULL);
370 if (mc_search_cond->regex_handle == NULL) {
371 mc_search->error = MC_SEARCH_E_REGEX_COMPILE;
372 mc_search->error_str = g_strdup (error);
373 return;
375 mc_search->regex_match_info = pcre_study (mc_search_cond->regex_handle, 0, &error);
376 if (mc_search->regex_match_info == NULL) {
377 if (error) {
378 mc_search->error = MC_SEARCH_E_REGEX_COMPILE;
379 mc_search->error_str = g_strdup (error);
380 free (mc_search_cond->regex_handle);
381 mc_search_cond->regex_handle = NULL;
382 return;
385 #else /* HAVE_LIBPCRE */
386 mc_search_cond->regex_handle = g_malloc0 (sizeof (regex_t));
387 erroffset = regcomp (mc_search_cond->regex_handle, mc_search_cond->str->str, REG_EXTENDED);
388 if (erroffset) {
389 size_t err_len = regerror (erroffset, mc_search_cond->regex_handle, NULL, 0);
390 error = g_malloc (err_len + 1);
391 regerror (erroffset, mc_search_cond->regex_handle, error, err_len);
392 mc_search->error = MC_SEARCH_E_REGEX_COMPILE;
393 mc_search->error_str = error;
394 regfree (mc_search_cond->regex_handle);
395 mc_search_cond->regex_handle = NULL;
396 return;
398 mc_search->regex_match_info = g_new0 (mc_search_matchinfo_t, MC_SEARCH__NUM_REPL_ARGS);
399 #endif /* HAVE_LIBPCRE */
400 #endif /* GLIB_CHECK_VERSION (2, 14, 0) */
403 /* --------------------------------------------------------------------------------------------- */
405 gboolean
406 mc_search__run_regex (mc_search_t * mc_search, const void *user_data,
407 gsize start_search, gsize end_search, gsize * found_len)
409 gsize current_pos, start_buffer;
410 int current_chr = 0;
411 gint start_pos;
412 gint end_pos;
414 if (mc_search->regex_buffer != NULL)
415 g_string_free (mc_search->regex_buffer, TRUE);
417 mc_search->regex_buffer = g_string_new ("");
419 current_pos = start_search;
420 while (current_pos <= end_search) {
421 g_string_set_size (mc_search->regex_buffer, 0);
422 start_buffer = current_pos;
424 while (1) {
425 current_chr = mc_search__get_char (mc_search, user_data, current_pos);
426 if (current_chr == -1)
427 break;
429 g_string_append_c (mc_search->regex_buffer, (char) current_chr);
431 current_pos++;
433 if (current_chr == 0 || (char) current_chr == '\n')
434 break;
436 if (current_pos > end_search)
437 break;
440 if (current_chr == -1)
441 break;
443 switch (mc_search__regex_found_cond (mc_search, mc_search->regex_buffer)) {
444 case COND__FOUND_OK:
445 #if GLIB_CHECK_VERSION (2, 14, 0)
446 g_match_info_fetch_pos (mc_search->regex_match_info, 0, &start_pos, &end_pos);
447 #else /* GLIB_CHECK_VERSION (2, 14, 0) */
448 #if HAVE_LIBPCRE
449 start_pos = mc_search->iovector[0];
450 end_pos = mc_search->iovector[1];
451 #else /* HAVE_LIBPCRE */
452 start_pos = mc_search->regex_match_info[0].rm_so;
453 end_pos = mc_search->regex_match_info[0].rm_eo;
454 #endif /* HAVE_LIBPCRE */
455 #endif /* GLIB_CHECK_VERSION (2, 14, 0) */
456 if (found_len)
457 *found_len = end_pos - start_pos;
458 mc_search->normal_offset = start_buffer + start_pos;
459 return TRUE;
460 break;
461 case COND__NOT_ALL_FOUND:
462 break;
463 default:
464 g_string_free (mc_search->regex_buffer, TRUE);
465 mc_search->regex_buffer = NULL;
466 return FALSE;
467 break;
470 g_string_free (mc_search->regex_buffer, TRUE);
471 mc_search->regex_buffer = NULL;
472 mc_search->error = MC_SEARCH_E_NOTFOUND;
473 mc_search->error_str = g_strdup (_(STR_E_NOTFOUND));
474 return FALSE;
477 /* --------------------------------------------------------------------------------------------- */
478 GString *
479 mc_search_regex_prepare_replace_str (mc_search_t * mc_search, GString * replace_str)
481 GString *ret;
482 gchar *tmp_str;
483 #if GLIB_CHECK_VERSION (2, 14, 0)
484 GError *error = NULL;
486 tmp_str = g_match_info_expand_references (mc_search->regex_match_info,
487 replace_str->str, &error);
489 if (error) {
490 mc_search->error = MC_SEARCH_E_REGEX_REPLACE;
491 mc_search->error_str = g_strdup (error->message);
492 g_error_free (error);
493 return NULL;
496 ret = g_string_new (tmp_str);
497 g_free (tmp_str);
498 return ret;
499 #else /* GLIB_CHECK_VERSION (2, 14, 0) */
500 int num_replace_tokens;
501 gsize loop;
502 gsize index, len;
504 gchar *prev_str;
506 num_replace_tokens =
507 mc_search_regex__get_num_replace_tokens (replace_str->str, replace_str->len);
509 if (mc_search->num_rezults < 0)
510 return g_string_new_len (replace_str->str, replace_str->len);
512 if (num_replace_tokens > mc_search->num_rezults - 1
513 || num_replace_tokens > MC_SEARCH__NUM_REPL_ARGS) {
514 mc_search->error = MC_SEARCH_E_REGEX_REPLACE;
515 mc_search->error_str = g_strdup (STR_E_RPL_NOT_EQ_TO_FOUND);
516 return NULL;
519 ret = g_string_new ("");
520 prev_str = replace_str->str;
521 for (loop = 0; loop < replace_str->len - 1; loop++) {
522 if (replace_str->str[loop] == '\\'
523 && (replace_str->str[loop + 1] & (char) 0xf0) == 0x30 /* 0-9 */ ) {
524 if (mc_search__regex_is_char_escaped (replace_str->str, &replace_str->str[loop - 1]))
525 continue;
526 len = 0;
527 while (loop + 1 + len < replace_str->len
528 && (replace_str->str[loop + 1 + len] & (char) 0xf0) == 0x30)
529 len++;
530 tmp_str = g_strndup (&(replace_str->str[loop + 1]), len);
531 index = (gsize) atoi (tmp_str);
532 g_free (tmp_str);
533 if (index > mc_search->num_rezults) {
534 g_string_free (ret, TRUE);
535 mc_search->error = MC_SEARCH_E_REGEX_REPLACE;
536 mc_search->error_str = g_strdup_printf (STR_E_RPL_INVALID_TOKEN, index);
537 return NULL;
539 if (loop)
540 g_string_append_len (ret, prev_str, replace_str->str - prev_str + loop);
542 mc_search_regex__append_found_token_by_num (mc_search, mc_search->regex_buffer->str,
543 ret, index);
544 prev_str = replace_str->str + loop + len + 1;
547 g_string_append_len (ret, prev_str, replace_str->str - prev_str + replace_str->len);
549 return ret;
550 #endif /* GLIB_CHECK_VERSION (2, 14, 0) */