5 Copyright (C) 2009 The Free Software Foundation, Inc.
8 Slava Zanko <slavazanko@gmail.com>, 2009.
10 This file is part of the Midnight Commander.
12 The Midnight Commander is free software; you can redistribute it
13 and/or modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
17 The Midnight Commander is distributed in the hope that it will be
18 useful, but WITHOUT ANY WARRANTY; without even the implied warranty
19 of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
32 #include "../src/global.h"
33 #include "../src/search/search.h"
34 #include "../src/search/internal.h"
35 #include "../src/strutil.h"
36 #include "../src/strescape.h"
37 #include "../src/charsets.h"
39 /*** global variables ****************************************************************************/
41 /*** file scope macro definitions ****************************************************************/
43 /*** file scope type declarations ****************************************************************/
46 REPLACE_T_NO_TRANSFORM
= 0,
47 REPLACE_T_UPP_TRANSFORM_CHAR
= 1,
48 REPLACE_T_LOW_TRANSFORM_CHAR
= 2,
49 REPLACE_T_UPP_TRANSFORM
= 4,
50 REPLACE_T_LOW_TRANSFORM
= 8
51 } replace_transform_type_t
;
53 /*** file scope variables ************************************************************************/
55 /*** file scope functions ************************************************************************/
58 mc_search__regex_str_append_if_special (GString
* copy_to
, GString
* regex_str
, gsize
* offset
)
62 const char **spec_chr
;
63 const char *special_chars
[] = {
78 spec_chr
= special_chars
;
80 tmp_regex_str
= &(regex_str
->str
[*offset
]);
83 spec_chr_len
= strlen (*spec_chr
);
84 if (!strncmp (tmp_regex_str
, *spec_chr
, spec_chr_len
)) {
85 if (!strutils_is_char_escaped (regex_str
->str
, tmp_regex_str
)) {
86 if (!strncmp ("\\x", *spec_chr
, spec_chr_len
)) {
87 if (*(tmp_regex_str
+ spec_chr_len
) == '{') {
88 while ((spec_chr_len
< regex_str
->len
- *offset
)
89 && *(tmp_regex_str
+ spec_chr_len
) != '}')
91 if (*(tmp_regex_str
+ spec_chr_len
) == '}')
96 g_string_append_len (copy_to
, tmp_regex_str
, spec_chr_len
);
97 *offset
+= spec_chr_len
;
107 /* --------------------------------------------------------------------------------------------- */
109 mc_search__cond_struct_new_regex_hex_add (const char *charset
, GString
* str_to
,
110 const char *one_char
, gsize str_len
)
116 upp
= mc_search__toupper_case_str (charset
, one_char
, str_len
);
117 low
= mc_search__tolower_case_str (charset
, one_char
, str_len
);
119 for (loop
= 0; loop
< upp
->len
; loop
++) {
121 if (loop
< low
->len
) {
122 if (upp
->str
[loop
] == low
->str
[loop
])
123 tmp_str
= g_strdup_printf ("\\x%02X", (unsigned char) upp
->str
[loop
]);
126 g_strdup_printf ("[\\x%02X\\x%02X]", (unsigned char) upp
->str
[loop
],
127 (unsigned char) low
->str
[loop
]);
129 tmp_str
= g_strdup_printf ("\\x%02X", (unsigned char) upp
->str
[loop
]);
131 g_string_append (str_to
, tmp_str
);
134 g_string_free (upp
, TRUE
);
135 g_string_free (low
, TRUE
);
138 /* --------------------------------------------------------------------------------------------- */
141 mc_search__cond_struct_new_regex_accum_append (const char *charset
, GString
* str_to
,
144 GString
*recoded_part
;
147 gboolean just_letters
;
150 recoded_part
= g_string_new ("");
152 while (loop
< str_from
->len
) {
154 mc_search__get_one_symbol (charset
, &(str_from
->str
[loop
]),
155 (str_from
->len
- loop
> 6) ? 6 : str_from
->len
- loop
,
157 if (!strlen (one_char
)) {
162 mc_search__cond_struct_new_regex_hex_add (charset
, recoded_part
, one_char
,
165 g_string_append (recoded_part
, one_char
);
167 loop
+= strlen (one_char
);
168 if (!strlen (one_char
))
173 g_string_append (str_to
, recoded_part
->str
);
174 g_string_free (recoded_part
, TRUE
);
175 g_string_set_size (str_from
, 0);
178 /* --------------------------------------------------------------------------------------------- */
181 mc_search__cond_struct_new_regex_ci_str (const char *charset
, const char *str
, gsize str_len
)
183 GString
*accumulator
, *spec_char
, *ret_str
;
186 tmp
= g_string_new_len (str
, str_len
);
189 ret_str
= g_string_new ("");
190 accumulator
= g_string_new ("");
191 spec_char
= g_string_new ("");
194 while (loop
<= str_len
) {
195 if (mc_search__regex_str_append_if_special (spec_char
, tmp
, &loop
)) {
196 mc_search__cond_struct_new_regex_accum_append (charset
, ret_str
, accumulator
);
197 g_string_append_len (ret_str
, spec_char
->str
, spec_char
->len
);
198 g_string_set_size (spec_char
, 0);
202 if (tmp
->str
[loop
] == '[' && !strutils_is_char_escaped (tmp
->str
, &(tmp
->str
[loop
]))) {
203 mc_search__cond_struct_new_regex_accum_append (charset
, ret_str
, accumulator
);
205 while (loop
< str_len
&& !(tmp
->str
[loop
] == ']'
206 && !strutils_is_char_escaped (tmp
->str
,
207 &(tmp
->str
[loop
])))) {
208 g_string_append_c (ret_str
, tmp
->str
[loop
]);
212 g_string_append_c (ret_str
, tmp
->str
[loop
]);
219 g_string_append_c (accumulator
, tmp
->str
[loop
]);
222 mc_search__cond_struct_new_regex_accum_append (charset
, ret_str
, accumulator
);
224 g_string_free (accumulator
, TRUE
);
225 g_string_free (spec_char
, TRUE
);
226 g_string_free (tmp
, TRUE
);
230 /* --------------------------------------------------------------------------------------------- */
232 static mc_search__found_cond_t
233 mc_search__regex_found_cond_one (mc_search_t
* lc_mc_search
, mc_search_regex_t
* regex
,
234 GString
* search_str
)
236 #ifdef SEARCH_TYPE_GLIB
237 GError
*error
= NULL
;
239 if (!g_regex_match_full
240 (regex
, search_str
->str
, -1, 0, G_REGEX_MATCH_NEWLINE_ANY
, &lc_mc_search
->regex_match_info
,
242 g_match_info_free (lc_mc_search
->regex_match_info
);
243 lc_mc_search
->regex_match_info
= NULL
;
245 lc_mc_search
->error
= MC_SEARCH_E_REGEX
;
246 lc_mc_search
->error_str
= str_conv_gerror_message (error
, _(" Regular expression error "));
247 g_error_free (error
);
248 return COND__FOUND_ERROR
;
250 return COND__NOT_FOUND
;
252 lc_mc_search
->num_rezults
= g_match_info_get_match_count (lc_mc_search
->regex_match_info
);
253 #else /* SEARCH_TYPE_GLIB */
254 lc_mc_search
->num_rezults
= pcre_exec (regex
, lc_mc_search
->regex_match_info
,
255 search_str
->str
, search_str
->len
- 1, 0, 0, lc_mc_search
->iovector
,
256 MC_SEARCH__NUM_REPLACE_ARGS
);
257 if (lc_mc_search
->num_rezults
< 0) {
258 return COND__NOT_FOUND
;
260 #endif /* SEARCH_TYPE_GLIB */
261 return COND__FOUND_OK
;
265 /* --------------------------------------------------------------------------------------------- */
267 static mc_search__found_cond_t
268 mc_search__regex_found_cond (mc_search_t
* lc_mc_search
, GString
* search_str
)
271 mc_search_cond_t
*mc_search_cond
;
272 mc_search__found_cond_t ret
;
274 for (loop1
= 0; loop1
< lc_mc_search
->conditions
->len
; loop1
++) {
275 mc_search_cond
= (mc_search_cond_t
*) g_ptr_array_index (lc_mc_search
->conditions
, loop1
);
277 if (!mc_search_cond
->regex_handle
)
280 ret
= mc_search__regex_found_cond_one (lc_mc_search
, mc_search_cond
->regex_handle
, search_str
);
282 if (ret
!= COND__NOT_FOUND
)
285 return COND__NOT_ALL_FOUND
;
288 /* --------------------------------------------------------------------------------------------- */
291 mc_search_regex__get_max_num_of_replace_tokens (const gchar
* str
, gsize len
)
295 for (loop
= 0; loop
< len
- 1; loop
++) {
296 if (str
[loop
] == '\\' && (str
[loop
+ 1] & (char) 0xf0) == 0x30 /* 0-9 */ ) {
297 if (strutils_is_char_escaped (str
, &str
[loop
]))
299 if (max_token
< str
[loop
+ 1] - '0')
300 max_token
= str
[loop
+ 1] - '0';
303 if (str
[loop
] == '$' && str
[loop
+ 1] == '{') {
307 if (strutils_is_char_escaped (str
, &str
[loop
]))
311 loop
+ tmp_len
+ 2 < len
&& (str
[loop
+ 2 + tmp_len
] & (char) 0xf0) == 0x30;
313 if (str
[loop
+ 2 + tmp_len
] == '}') {
314 tmp_str
= g_strndup (&str
[loop
+ 2], tmp_len
);
315 tmp_token
= atoi (tmp_str
);
316 if (max_token
< tmp_token
)
317 max_token
= tmp_token
;
325 /* --------------------------------------------------------------------------------------------- */
328 mc_search_regex__get_token_by_num (const mc_search_t
* lc_mc_search
, gsize lc_index
)
330 int fnd_start
= 0, fnd_end
= 0;
332 #ifdef SEARCH_TYPE_GLIB
333 g_match_info_fetch_pos (lc_mc_search
->regex_match_info
, lc_index
, &fnd_start
, &fnd_end
);
334 #else /* SEARCH_TYPE_GLIB */
335 fnd_start
= lc_mc_search
->iovector
[lc_index
* 2 + 0];
336 fnd_end
= lc_mc_search
->iovector
[lc_index
* 2 + 1];
337 #endif /* SEARCH_TYPE_GLIB */
339 if (fnd_end
- fnd_start
== 0)
342 return g_strndup (lc_mc_search
->regex_buffer
->str
+ fnd_start
, fnd_end
- fnd_start
);
346 /* --------------------------------------------------------------------------------------------- */
348 mc_search_regex__process_replace_str (const GString
* replace_str
, const gsize current_pos
,
349 gsize
* skip_len
, replace_transform_type_t
* replace_flags
)
353 const char *curr_str
= &(replace_str
->str
[current_pos
]);
355 if (current_pos
> replace_str
->len
)
360 if (*curr_str
== '$' && *(curr_str
+ 1) == '{' && (*(curr_str
+ 2) & (char) 0xf0) == 0x30) {
361 if (strutils_is_char_escaped (replace_str
->str
, curr_str
)) {
367 current_pos
+ *skip_len
+ 2 < replace_str
->len
368 && (*(curr_str
+ 2 + *skip_len
) & (char) 0xf0) == 0x30; (*skip_len
)++);
370 if (*(curr_str
+ 2 + *skip_len
) != '}')
373 tmp_str
= g_strndup (curr_str
+ 2, *skip_len
);
377 ret
= atoi (tmp_str
);
380 *skip_len
+= 3; /* ${} */
384 if (*curr_str
== '\\') {
385 if (strutils_is_char_escaped (replace_str
->str
, curr_str
)) {
390 if ((*(curr_str
+ 1) & (char) 0xf0) == 0x30) {
391 ret
= *(curr_str
+ 1) - '0';
392 *skip_len
= 2; /* \\ and one digit */
397 switch (*(curr_str
+ 1)) {
399 *replace_flags
|= REPLACE_T_UPP_TRANSFORM
;
400 *replace_flags
&= ~REPLACE_T_LOW_TRANSFORM
;
403 *replace_flags
|= REPLACE_T_UPP_TRANSFORM_CHAR
;
406 *replace_flags
|= REPLACE_T_LOW_TRANSFORM
;
407 *replace_flags
&= ~REPLACE_T_UPP_TRANSFORM
;
410 *replace_flags
|= REPLACE_T_LOW_TRANSFORM_CHAR
;
413 *replace_flags
= REPLACE_T_NO_TRANSFORM
;
423 mc_search_regex__process_append_str (GString
* dest_str
, const char *from
, gsize len
,
424 replace_transform_type_t
* replace_flags
)
431 if (len
== (gsize
) -1)
434 if (*replace_flags
== REPLACE_T_NO_TRANSFORM
) {
435 g_string_append_len (dest_str
, from
, len
);
439 tmp_str
= mc_search__get_one_symbol (NULL
, from
+ loop
, len
- loop
, NULL
);
440 char_len
= strlen (tmp_str
);
441 if (*replace_flags
& REPLACE_T_UPP_TRANSFORM_CHAR
) {
442 *replace_flags
&= !REPLACE_T_UPP_TRANSFORM_CHAR
;
443 tmp_string
= mc_search__toupper_case_str (NULL
, tmp_str
, char_len
);
444 g_string_append (dest_str
, tmp_string
->str
);
445 g_string_free (tmp_string
, TRUE
);
447 } else if (*replace_flags
& REPLACE_T_LOW_TRANSFORM_CHAR
) {
448 *replace_flags
&= !REPLACE_T_LOW_TRANSFORM_CHAR
;
449 tmp_string
= mc_search__toupper_case_str (NULL
, tmp_str
, char_len
);
450 g_string_append (dest_str
, tmp_string
->str
);
451 g_string_free (tmp_string
, TRUE
);
453 } else if (*replace_flags
& REPLACE_T_UPP_TRANSFORM
) {
454 tmp_string
= mc_search__toupper_case_str (NULL
, tmp_str
, char_len
);
455 g_string_append (dest_str
, tmp_string
->str
);
456 g_string_free (tmp_string
, TRUE
);
458 } else if (*replace_flags
& REPLACE_T_LOW_TRANSFORM
) {
459 tmp_string
= mc_search__tolower_case_str (NULL
, tmp_str
, char_len
);
460 g_string_append (dest_str
, tmp_string
->str
);
461 g_string_free (tmp_string
, TRUE
);
464 g_string_append (dest_str
, tmp_str
);
472 /*** public functions ****************************************************************************/
475 mc_search__cond_struct_new_init_regex (const char *charset
, mc_search_t
* lc_mc_search
,
476 mc_search_cond_t
* mc_search_cond
)
479 #ifdef SEARCH_TYPE_GLIB
480 GError
*error
= NULL
;
481 #else /* SEARCH_TYPE_GLIB */
484 #endif /* SEARCH_TYPE_GLIB */
486 if (!lc_mc_search
->is_case_sentitive
) {
487 tmp
= g_string_new_len (mc_search_cond
->str
->str
, mc_search_cond
->str
->len
);
488 g_string_free (mc_search_cond
->str
, TRUE
);
489 mc_search_cond
->str
= mc_search__cond_struct_new_regex_ci_str (charset
, tmp
->str
, tmp
->len
);
490 g_string_free (tmp
, TRUE
);
492 #ifdef SEARCH_TYPE_GLIB
493 mc_search_cond
->regex_handle
=
494 g_regex_new (mc_search_cond
->str
->str
, G_REGEX_OPTIMIZE
| G_REGEX_RAW
| G_REGEX_DOTALL
, 0,
498 lc_mc_search
->error
= MC_SEARCH_E_REGEX_COMPILE
;
499 lc_mc_search
->error_str
= str_conv_gerror_message (error
, _(" Regular expression error "));
500 g_error_free (error
);
503 #else /* SEARCH_TYPE_GLIB */
504 mc_search_cond
->regex_handle
=
505 pcre_compile (mc_search_cond
->str
->str
, PCRE_EXTRA
, &error
, &erroffset
, NULL
);
506 if (mc_search_cond
->regex_handle
== NULL
) {
507 lc_mc_search
->error
= MC_SEARCH_E_REGEX_COMPILE
;
508 lc_mc_search
->error_str
= g_strdup (error
);
511 lc_mc_search
->regex_match_info
= pcre_study (mc_search_cond
->regex_handle
, 0, &error
);
512 if (lc_mc_search
->regex_match_info
== NULL
) {
514 lc_mc_search
->error
= MC_SEARCH_E_REGEX_COMPILE
;
515 lc_mc_search
->error_str
= g_strdup (error
);
516 g_free (mc_search_cond
->regex_handle
);
517 mc_search_cond
->regex_handle
= NULL
;
521 #endif /* SEARCH_TYPE_GLIB */
524 /* --------------------------------------------------------------------------------------------- */
527 mc_search__run_regex (mc_search_t
* lc_mc_search
, const void *user_data
,
528 gsize start_search
, gsize end_search
, gsize
* found_len
)
530 gsize current_pos
, virtual_pos
;
535 if (lc_mc_search
->regex_buffer
!= NULL
)
536 g_string_free (lc_mc_search
->regex_buffer
, TRUE
);
538 lc_mc_search
->regex_buffer
= g_string_new ("");
540 virtual_pos
= current_pos
= start_search
;
541 while (virtual_pos
<= end_search
) {
542 g_string_set_size (lc_mc_search
->regex_buffer
, 0);
543 lc_mc_search
->start_buffer
= current_pos
;
546 current_chr
= mc_search__get_char (lc_mc_search
, user_data
, current_pos
);
547 if (current_chr
== MC_SEARCH_CB_ABORT
)
552 if (current_chr
== MC_SEARCH_CB_SKIP
)
557 g_string_append_c (lc_mc_search
->regex_buffer
, (char) current_chr
);
560 if (current_chr
== 0 || (char) current_chr
== '\n')
563 if (virtual_pos
> end_search
)
567 switch (mc_search__regex_found_cond (lc_mc_search
, lc_mc_search
->regex_buffer
)) {
569 #ifdef SEARCH_TYPE_GLIB
570 g_match_info_fetch_pos (lc_mc_search
->regex_match_info
, 0, &start_pos
, &end_pos
);
571 #else /* SEARCH_TYPE_GLIB */
572 start_pos
= lc_mc_search
->iovector
[0];
573 end_pos
= lc_mc_search
->iovector
[1];
574 #endif /* SEARCH_TYPE_GLIB */
576 *found_len
= end_pos
- start_pos
;
577 lc_mc_search
->normal_offset
= lc_mc_search
->start_buffer
+ start_pos
;
580 case COND__NOT_ALL_FOUND
:
583 g_string_free (lc_mc_search
->regex_buffer
, TRUE
);
584 lc_mc_search
->regex_buffer
= NULL
;
588 if (( lc_mc_search
->update_fn
!= NULL
) &&
589 ( (lc_mc_search
->update_fn
) (user_data
, current_pos
) == MC_SEARCH_CB_ABORT
))
590 current_chr
= MC_SEARCH_CB_ABORT
;
592 if (current_chr
== MC_SEARCH_CB_ABORT
)
595 g_string_free (lc_mc_search
->regex_buffer
, TRUE
);
596 lc_mc_search
->regex_buffer
= NULL
;
597 lc_mc_search
->error
= MC_SEARCH_E_NOTFOUND
;
599 if (current_chr
!= MC_SEARCH_CB_ABORT
)
600 lc_mc_search
->error_str
= g_strdup (_(STR_E_NOTFOUND
));
602 lc_mc_search
->error_str
= NULL
;
607 /* --------------------------------------------------------------------------------------------- */
609 mc_search_regex_prepare_replace_str (mc_search_t
* lc_mc_search
, GString
* replace_str
)
614 int num_replace_tokens
, lc_index
;
618 replace_transform_type_t replace_flags
= REPLACE_T_NO_TRANSFORM
;
621 mc_search_regex__get_max_num_of_replace_tokens (replace_str
->str
, replace_str
->len
);
623 if (lc_mc_search
->num_rezults
< 0)
624 return g_string_new_len (replace_str
->str
, replace_str
->len
);
626 if (num_replace_tokens
> lc_mc_search
->num_rezults
- 1
627 || num_replace_tokens
> MC_SEARCH__NUM_REPLACE_ARGS
) {
628 lc_mc_search
->error
= MC_SEARCH_E_REGEX_REPLACE
;
629 lc_mc_search
->error_str
= g_strdup (STR_E_RPL_NOT_EQ_TO_FOUND
);
633 ret
= g_string_new ("");
634 prev_str
= replace_str
->str
;
635 for (loop
= 0; loop
< replace_str
->len
- 1; loop
++) {
636 lc_index
= mc_search_regex__process_replace_str (replace_str
, loop
, &len
, &replace_flags
);
638 if (lc_index
== -1) {
640 mc_search_regex__process_append_str (ret
, prev_str
,
641 replace_str
->str
- prev_str
+ loop
,
643 mc_search_regex__process_append_str (ret
, replace_str
->str
+ loop
+ 1, len
- 1,
645 prev_str
= replace_str
->str
+ loop
+ len
;
651 if (lc_index
== -2) {
653 mc_search_regex__process_append_str (ret
, prev_str
,
654 replace_str
->str
- prev_str
+ loop
,
656 prev_str
= replace_str
->str
+ loop
+ len
;
661 if (lc_index
> lc_mc_search
->num_rezults
) {
662 g_string_free (ret
, TRUE
);
663 lc_mc_search
->error
= MC_SEARCH_E_REGEX_REPLACE
;
664 lc_mc_search
->error_str
= g_strdup_printf (STR_E_RPL_INVALID_TOKEN
, lc_index
);
668 tmp_str
= mc_search_regex__get_token_by_num (lc_mc_search
, lc_index
);
673 mc_search_regex__process_append_str (ret
, prev_str
, replace_str
->str
- prev_str
+ loop
,
675 prev_str
= replace_str
->str
+ loop
+ len
;
677 mc_search_regex__process_append_str (ret
, tmp_str
, -1, &replace_flags
);
681 mc_search_regex__process_append_str (ret
, prev_str
,
682 replace_str
->str
- prev_str
+ replace_str
->len
,