5 Copyright (C) 2009-2024
6 Free Software Foundation, Inc.
9 Slava Zanko <slavazanko@gmail.com>, 2009, 2010, 2011, 2013
10 Vitaliy Filippov <vitalif@yourcmc.ru>, 2011
11 Andrew Borodin <aborodin@vmail.ru>, 2013-2015
13 This file is part of the Midnight Commander.
15 The Midnight Commander is free software: you can redistribute it
16 and/or modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation, either version 3 of the License,
18 or (at your option) any later version.
20 The Midnight Commander is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program. If not, see <http://www.gnu.org/licenses/>.
33 #include "lib/global.h"
34 #include "lib/strutil.h"
35 #include "lib/search.h"
36 #include "lib/util.h" /* MC_PTR_FREE */
40 /*** global variables ****************************************************************************/
42 /*** file scope macro definitions ****************************************************************/
44 #define REPLACE_PREPARE_T_NOTHING_SPECIAL -1
45 #define REPLACE_PREPARE_T_REPLACE_FLAG -2
46 #define REPLACE_PREPARE_T_ESCAPE_SEQ -3
48 /*** file scope type declarations ****************************************************************/
52 REPLACE_T_NO_TRANSFORM
= 0,
53 REPLACE_T_UPP_TRANSFORM_CHAR
= 1,
54 REPLACE_T_LOW_TRANSFORM_CHAR
= 2,
55 REPLACE_T_UPP_TRANSFORM
= 4,
56 REPLACE_T_LOW_TRANSFORM
= 8
57 } replace_transform_type_t
;
59 /*** forward declarations (file scope functions) *************************************************/
61 /*** file scope variables ************************************************************************/
63 /* --------------------------------------------------------------------------------------------- */
64 /*** file scope functions ************************************************************************/
65 /* --------------------------------------------------------------------------------------------- */
68 mc_search__regex_str_append_if_special (GString
* copy_to
, const GString
* regex_str
,
71 const char *special_chars
[] = {
88 const char **spec_chr
;
90 tmp_regex_str
= &(regex_str
->str
[*offset
]);
92 for (spec_chr
= special_chars
; *spec_chr
!= NULL
; spec_chr
++)
96 spec_chr_len
= strlen (*spec_chr
);
98 if (strncmp (tmp_regex_str
, *spec_chr
, spec_chr_len
) == 0
99 && !str_is_char_escaped (regex_str
->str
, tmp_regex_str
))
101 if (strncmp ("\\x", *spec_chr
, spec_chr_len
) == 0)
103 if (tmp_regex_str
[spec_chr_len
] != '{')
107 while ((spec_chr_len
< regex_str
->len
- *offset
)
108 && tmp_regex_str
[spec_chr_len
] != '}')
110 if (tmp_regex_str
[spec_chr_len
] == '}')
114 g_string_append_len (copy_to
, tmp_regex_str
, spec_chr_len
);
115 *offset
+= spec_chr_len
;
123 /* --------------------------------------------------------------------------------------------- */
126 mc_search__cond_struct_new_regex_hex_add (const char *charset
, GString
* str_to
,
127 const GString
* one_char
)
132 upp
= mc_search__toupper_case_str (charset
, one_char
);
133 low
= mc_search__tolower_case_str (charset
, one_char
);
135 for (loop
= 0; loop
< upp
->len
; loop
++)
137 gchar tmp_str
[10 + 1]; /* longest content is "[\\x%02X\\x%02X]" */
140 if (loop
>= low
->len
|| upp
->str
[loop
] == low
->str
[loop
])
142 g_snprintf (tmp_str
, sizeof (tmp_str
), "\\x%02X", (unsigned char) upp
->str
[loop
]);
145 g_snprintf (tmp_str
, sizeof (tmp_str
), "[\\x%02X\\x%02X]",
146 (unsigned char) upp
->str
[loop
], (unsigned char) low
->str
[loop
]);
148 g_string_append_len (str_to
, tmp_str
, tmp_len
);
151 g_string_free (upp
, TRUE
);
152 g_string_free (low
, TRUE
);
155 /* --------------------------------------------------------------------------------------------- */
158 mc_search__cond_struct_new_regex_accum_append (const char *charset
, GString
* str_to
,
161 GString
*recoded_part
;
164 recoded_part
= g_string_sized_new (32);
166 while (loop
< str_from
->len
)
169 gboolean just_letters
;
172 mc_search__get_one_symbol (charset
, str_from
->str
+ loop
,
173 MIN (str_from
->len
- loop
, 6), &just_letters
);
175 if (one_char
->len
== 0)
179 loop
+= one_char
->len
;
182 mc_search__cond_struct_new_regex_hex_add (charset
, recoded_part
, one_char
);
184 g_string_append_len (recoded_part
, one_char
->str
, one_char
->len
);
187 g_string_free (one_char
, TRUE
);
190 g_string_append_len (str_to
, recoded_part
->str
, recoded_part
->len
);
191 g_string_free (recoded_part
, TRUE
);
192 g_string_set_size (str_from
, 0);
195 /* --------------------------------------------------------------------------------------------- */
198 * Creates a case-insensitive version of a regex pattern.
200 * For example (assuming ASCII charset): given "\\bHello!\\xAB", returns
201 * "\\b[Hh][Ee][Ll][Ll][Oo]!\\xAB" (this example is for easier reading; in
202 * reality hex codes are used instead of letters).
204 * This function knows not to ruin special regex symbols.
206 * This function is used when working with non-UTF-8 charsets: GLib's
207 * regex engine doesn't understand such charsets and therefore can't do
211 mc_search__cond_struct_new_regex_ci_str (const char *charset
, const GString
* astr
)
213 GString
*accumulator
, *spec_char
, *ret_str
;
216 ret_str
= g_string_sized_new (64);
217 accumulator
= g_string_sized_new (64);
218 spec_char
= g_string_sized_new (64);
221 while (loop
< astr
->len
)
223 if (mc_search__regex_str_append_if_special (spec_char
, astr
, &loop
))
225 mc_search__cond_struct_new_regex_accum_append (charset
, ret_str
, accumulator
);
226 g_string_append_len (ret_str
, spec_char
->str
, spec_char
->len
);
227 g_string_set_size (spec_char
, 0);
231 if (astr
->str
[loop
] == '[' && !str_is_char_escaped (astr
->str
, &(astr
->str
[loop
])))
233 mc_search__cond_struct_new_regex_accum_append (charset
, ret_str
, accumulator
);
235 while (loop
< astr
->len
&& !(astr
->str
[loop
] == ']'
236 && !str_is_char_escaped (astr
->str
, &(astr
->str
[loop
]))))
238 g_string_append_c (ret_str
, astr
->str
[loop
]);
242 g_string_append_c (ret_str
, astr
->str
[loop
]);
249 g_string_append_c (accumulator
, astr
->str
[loop
]);
252 mc_search__cond_struct_new_regex_accum_append (charset
, ret_str
, accumulator
);
254 g_string_free (accumulator
, TRUE
);
255 g_string_free (spec_char
, TRUE
);
260 /* --------------------------------------------------------------------------------------------- */
262 #ifdef SEARCH_TYPE_GLIB
263 /* A thin wrapper above g_regex_match_full that makes sure the string passed
264 * to it is valid UTF-8 (unless G_REGEX_RAW compile flag was set), as it is a
265 * requirement by glib and it might crash otherwise. See: mc ticket 3449.
266 * Be careful: there might be embedded NULs in the strings. */
268 mc_search__g_regex_match_full_safe (const GRegex
* regex
,
269 const gchar
* string
,
272 GRegexMatchFlags match_options
,
273 GMatchInfo
** match_info
, GError
** error
)
275 char *string_safe
, *p
, *end
;
279 string_len
= strlen (string
);
281 if ((g_regex_get_compile_flags (regex
) & G_REGEX_RAW
)
282 || g_utf8_validate (string
, string_len
, NULL
))
284 return g_regex_match_full (regex
, string
, string_len
, start_position
, match_options
,
288 /* Correctly handle embedded NULs while copying */
289 p
= string_safe
= g_malloc (string_len
+ 1);
290 memcpy (string_safe
, string
, string_len
);
291 string_safe
[string_len
] = '\0';
292 end
= p
+ string_len
;
296 gunichar c
= g_utf8_get_char_validated (p
, -1);
297 if (c
!= (gunichar
) (-1) && c
!= (gunichar
) (-2))
299 p
= g_utf8_next_char (p
);
303 /* U+FFFD would be the proper choice, but then we'd have to
304 maintain mapping between old and new offsets.
305 So rather do a byte by byte replacement. */
311 g_regex_match_full (regex
, string_safe
, string_len
, start_position
, match_options
,
313 g_free (string_safe
);
316 #endif /* SEARCH_TYPE_GLIB */
318 /* --------------------------------------------------------------------------------------------- */
320 static mc_search__found_cond_t
321 mc_search__regex_found_cond_one (mc_search_t
* lc_mc_search
, mc_search_regex_t
* regex
,
322 GString
* search_str
)
324 #ifdef SEARCH_TYPE_GLIB
325 GError
*mcerror
= NULL
;
327 if (!mc_search__g_regex_match_full_safe
328 (regex
, search_str
->str
, search_str
->len
, 0, G_REGEX_MATCH_NEWLINE_ANY
,
329 &lc_mc_search
->regex_match_info
, &mcerror
))
331 g_match_info_free (lc_mc_search
->regex_match_info
);
332 lc_mc_search
->regex_match_info
= NULL
;
335 lc_mc_search
->error
= MC_SEARCH_E_REGEX
;
336 g_free (lc_mc_search
->error_str
);
337 lc_mc_search
->error_str
=
338 str_conv_gerror_message (mcerror
, _("Regular expression error"));
339 g_error_free (mcerror
);
340 return COND__FOUND_ERROR
;
342 return COND__NOT_FOUND
;
344 lc_mc_search
->num_results
= g_match_info_get_match_count (lc_mc_search
->regex_match_info
);
345 #else /* SEARCH_TYPE_GLIB */
347 lc_mc_search
->num_results
=
349 pcre2_match (regex
, (unsigned char *) search_str
->str
, search_str
->len
, 0, 0,
350 lc_mc_search
->regex_match_info
, NULL
);
352 pcre_exec (regex
, lc_mc_search
->regex_match_info
, search_str
->str
, search_str
->len
, 0, 0,
353 lc_mc_search
->iovector
, MC_SEARCH__NUM_REPLACE_ARGS
);
355 if (lc_mc_search
->num_results
< 0)
357 return COND__NOT_FOUND
;
359 #endif /* SEARCH_TYPE_GLIB */
360 return COND__FOUND_OK
;
364 /* --------------------------------------------------------------------------------------------- */
366 static mc_search__found_cond_t
367 mc_search__regex_found_cond (mc_search_t
* lc_mc_search
, GString
* search_str
)
371 for (loop1
= 0; loop1
< lc_mc_search
->prepared
.conditions
->len
; loop1
++)
373 mc_search_cond_t
*mc_search_cond
;
374 mc_search__found_cond_t ret
;
377 (mc_search_cond_t
*) g_ptr_array_index (lc_mc_search
->prepared
.conditions
, loop1
);
379 if (!mc_search_cond
->regex_handle
)
383 mc_search__regex_found_cond_one (lc_mc_search
, mc_search_cond
->regex_handle
,
385 if (ret
!= COND__NOT_FOUND
)
388 return COND__NOT_ALL_FOUND
;
391 /* --------------------------------------------------------------------------------------------- */
394 mc_search_regex__get_max_num_of_replace_tokens (const gchar
* str
, gsize len
)
399 for (loop
= 0; loop
< len
- 1; loop
++)
400 if (str
[loop
] == '\\' && g_ascii_isdigit (str
[loop
+ 1]))
402 if (str_is_char_escaped (str
, &str
[loop
]))
404 if (max_token
< str
[loop
+ 1] - '0')
405 max_token
= str
[loop
+ 1] - '0';
407 else if (str
[loop
] == '$' && str
[loop
+ 1] == '{')
411 if (str_is_char_escaped (str
, &str
[loop
]))
415 loop
+ tmp_len
+ 2 < len
&& (str
[loop
+ 2 + tmp_len
] & (char) 0xf0) == 0x30;
418 if (str
[loop
+ 2 + tmp_len
] == '}')
423 tmp_str
= g_strndup (&str
[loop
+ 2], tmp_len
);
424 tmp_token
= atoi (tmp_str
);
425 if (max_token
< tmp_token
)
426 max_token
= tmp_token
;
434 /* --------------------------------------------------------------------------------------------- */
437 mc_search_regex__get_token_by_num (const mc_search_t
* lc_mc_search
, gsize lc_index
)
439 int fnd_start
= 0, fnd_end
= 0;
441 #ifdef SEARCH_TYPE_GLIB
442 g_match_info_fetch_pos (lc_mc_search
->regex_match_info
, lc_index
, &fnd_start
, &fnd_end
);
443 #else /* SEARCH_TYPE_GLIB */
444 fnd_start
= lc_mc_search
->iovector
[lc_index
* 2 + 0];
445 fnd_end
= lc_mc_search
->iovector
[lc_index
* 2 + 1];
446 #endif /* SEARCH_TYPE_GLIB */
448 if (fnd_end
== fnd_start
)
449 return g_strdup ("");
451 return g_strndup (lc_mc_search
->regex_buffer
->str
+ fnd_start
, fnd_end
- fnd_start
);
455 /* --------------------------------------------------------------------------------------------- */
458 mc_search_regex__replace_handle_esc_seq (const GString
* replace_str
, const gsize current_pos
,
459 gsize
* skip_len
, int *ret
)
461 char *curr_str
= &(replace_str
->str
[current_pos
]);
462 char c
= curr_str
[1];
464 if (replace_str
->len
> current_pos
+ 2)
468 for (*skip_len
= 2; /* \{ */
469 current_pos
+ *skip_len
< replace_str
->len
&& curr_str
[*skip_len
] >= '0'
470 && curr_str
[*skip_len
] <= '7'; (*skip_len
)++)
473 if (current_pos
+ *skip_len
< replace_str
->len
&& curr_str
[*skip_len
] == '}')
476 *ret
= REPLACE_PREPARE_T_ESCAPE_SEQ
;
481 *ret
= REPLACE_PREPARE_T_NOTHING_SPECIAL
;
488 *skip_len
= 2; /* \x */
492 for (*skip_len
= 3; /* \x{ */
493 current_pos
+ *skip_len
< replace_str
->len
494 && g_ascii_isxdigit ((guchar
) curr_str
[*skip_len
]); (*skip_len
)++)
497 if (current_pos
+ *skip_len
< replace_str
->len
&& curr_str
[*skip_len
] == '}')
500 *ret
= REPLACE_PREPARE_T_ESCAPE_SEQ
;
505 *ret
= REPLACE_PREPARE_T_NOTHING_SPECIAL
;
509 else if (!g_ascii_isxdigit ((guchar
) c
))
511 *skip_len
= 2; /* \x without number behind */
512 *ret
= REPLACE_PREPARE_T_NOTHING_SPECIAL
;
518 if (!g_ascii_isxdigit ((guchar
) c
))
519 *skip_len
= 3; /* \xH */
521 *skip_len
= 4; /* \xHH */
522 *ret
= REPLACE_PREPARE_T_ESCAPE_SEQ
;
528 if (strchr ("ntvbrfa", c
) != NULL
)
531 *ret
= REPLACE_PREPARE_T_ESCAPE_SEQ
;
537 /* --------------------------------------------------------------------------------------------- */
540 mc_search_regex__process_replace_str (const GString
* replace_str
, const gsize current_pos
,
541 gsize
* skip_len
, replace_transform_type_t
* replace_flags
)
544 const char *curr_str
= &(replace_str
->str
[current_pos
]);
546 if (current_pos
> replace_str
->len
)
547 return REPLACE_PREPARE_T_NOTHING_SPECIAL
;
551 if (replace_str
->len
> current_pos
+ 2 && curr_str
[0] == '$' && curr_str
[1] == '{'
552 && (curr_str
[2] & (char) 0xf0) == 0x30)
556 if (str_is_char_escaped (replace_str
->str
, curr_str
))
559 return REPLACE_PREPARE_T_NOTHING_SPECIAL
;
563 current_pos
+ *skip_len
+ 2 < replace_str
->len
564 && (curr_str
[2 + *skip_len
] & (char) 0xf0) == 0x30; (*skip_len
)++)
567 if (curr_str
[2 + *skip_len
] != '}')
568 return REPLACE_PREPARE_T_NOTHING_SPECIAL
;
570 tmp_str
= g_strndup (curr_str
+ 2, *skip_len
);
572 return REPLACE_PREPARE_T_NOTHING_SPECIAL
;
574 ret
= atoi (tmp_str
);
577 *skip_len
+= 3; /* ${} */
578 return ret
; /* capture buffer index >= 0 */
581 if (curr_str
[0] == '\\' && replace_str
->len
> current_pos
+ 1)
583 if (str_is_char_escaped (replace_str
->str
, curr_str
))
586 return REPLACE_PREPARE_T_NOTHING_SPECIAL
;
589 if (g_ascii_isdigit (curr_str
[1]))
591 ret
= g_ascii_digit_value (curr_str
[1]); /* capture buffer index >= 0 */
592 *skip_len
= 2; /* \\ and one digit */
596 if (!mc_search_regex__replace_handle_esc_seq (replace_str
, current_pos
, skip_len
, &ret
))
599 ret
= REPLACE_PREPARE_T_REPLACE_FLAG
;
605 *replace_flags
|= REPLACE_T_UPP_TRANSFORM
;
606 *replace_flags
&= ~REPLACE_T_LOW_TRANSFORM
;
609 *replace_flags
|= REPLACE_T_UPP_TRANSFORM_CHAR
;
612 *replace_flags
|= REPLACE_T_LOW_TRANSFORM
;
613 *replace_flags
&= ~REPLACE_T_UPP_TRANSFORM
;
616 *replace_flags
|= REPLACE_T_LOW_TRANSFORM_CHAR
;
619 *replace_flags
= REPLACE_T_NO_TRANSFORM
;
622 ret
= REPLACE_PREPARE_T_NOTHING_SPECIAL
;
629 /* --------------------------------------------------------------------------------------------- */
632 mc_search_regex__process_append_str (GString
* dest_str
, const char *from
, gsize len
,
633 replace_transform_type_t
* replace_flags
)
638 if (len
== (gsize
) (-1))
641 if (*replace_flags
== REPLACE_T_NO_TRANSFORM
)
643 g_string_append_len (dest_str
, from
, len
);
647 for (loop
= 0; loop
< len
; loop
+= char_len
)
649 GString
*tmp_string
= NULL
;
652 s
= mc_search__get_one_symbol (NULL
, from
+ loop
, len
- loop
, NULL
);
655 if ((*replace_flags
& REPLACE_T_UPP_TRANSFORM_CHAR
) != 0)
657 *replace_flags
&= ~REPLACE_T_UPP_TRANSFORM_CHAR
;
658 tmp_string
= mc_search__toupper_case_str (NULL
, s
);
659 g_string_append_len (dest_str
, tmp_string
->str
, tmp_string
->len
);
661 else if ((*replace_flags
& REPLACE_T_LOW_TRANSFORM_CHAR
) != 0)
663 *replace_flags
&= ~REPLACE_T_LOW_TRANSFORM_CHAR
;
664 tmp_string
= mc_search__tolower_case_str (NULL
, s
);
665 g_string_append_len (dest_str
, tmp_string
->str
, tmp_string
->len
);
667 else if ((*replace_flags
& REPLACE_T_UPP_TRANSFORM
) != 0)
669 tmp_string
= mc_search__toupper_case_str (NULL
, s
);
670 g_string_append_len (dest_str
, tmp_string
->str
, tmp_string
->len
);
672 else if ((*replace_flags
& REPLACE_T_LOW_TRANSFORM
) != 0)
674 tmp_string
= mc_search__tolower_case_str (NULL
, s
);
675 g_string_append_len (dest_str
, tmp_string
->str
, tmp_string
->len
);
678 g_string_free (s
, TRUE
);
679 if (tmp_string
!= NULL
)
680 g_string_free (tmp_string
, TRUE
);
684 /* --------------------------------------------------------------------------------------------- */
687 mc_search_regex__process_escape_sequence (GString
* dest_str
, const char *from
, gsize len
,
688 replace_transform_type_t
* replace_flags
,
695 if (len
== (gsize
) (-1))
708 if (i
< len
&& from
[i
] == '{')
712 if (from
[i
] >= '0' && from
[i
] <= '9')
713 c
= c
* 16 + from
[i
] - '0';
714 else if (from
[i
] >= 'a' && from
[i
] <= 'f')
715 c
= c
* 16 + 10 + from
[i
] - 'a';
716 else if (from
[i
] >= 'A' && from
[i
] <= 'F')
717 c
= c
* 16 + 10 + from
[i
] - 'A';
722 else if (from
[i
] >= '0' && from
[i
] <= '7')
723 for (; i
< len
&& from
[i
] >= '0' && from
[i
] <= '7'; i
++)
724 c
= c
* 8 + from
[i
] - '0';
751 mc_search_regex__process_append_str (dest_str
, from
, len
, replace_flags
);
756 if (c
< 0x80 || !is_utf8
)
757 g_string_append_c (dest_str
, (char) c
);
761 g_string_append_c (dest_str
, b
);
762 b
= 0x80 | (c
& 0x3F);
763 g_string_append_c (dest_str
, b
);
765 else if (c
< 0x10000)
767 b
= 0xE0 | (c
>> 12);
768 g_string_append_c (dest_str
, b
);
769 b
= 0x80 | ((c
>> 6) & 0x3F);
770 g_string_append_c (dest_str
, b
);
771 b
= 0x80 | (c
& 0x3F);
772 g_string_append_c (dest_str
, b
);
774 else if (c
< 0x10FFFF)
776 b
= 0xF0 | (c
>> 16);
777 g_string_append_c (dest_str
, b
);
778 b
= 0x80 | ((c
>> 12) & 0x3F);
779 g_string_append_c (dest_str
, b
);
780 b
= 0x80 | ((c
>> 6) & 0x3F);
781 g_string_append_c (dest_str
, b
);
782 b
= 0x80 | (c
& 0x3F);
783 g_string_append_c (dest_str
, b
);
787 /* --------------------------------------------------------------------------------------------- */
788 /*** public functions ****************************************************************************/
789 /* --------------------------------------------------------------------------------------------- */
792 mc_search__cond_struct_new_init_regex (const char *charset
, mc_search_t
* lc_mc_search
,
793 mc_search_cond_t
* mc_search_cond
)
795 if (lc_mc_search
->whole_words
&& !lc_mc_search
->is_entire_line
)
797 /* NOTE: \b as word boundary doesn't allow search
798 * whole words with non-ASCII symbols.
799 * Update: Is it still true nowadays? Probably not. #2396, #3524 */
800 g_string_prepend (mc_search_cond
->str
, "(?<![\\p{L}\\p{N}_])");
801 g_string_append (mc_search_cond
->str
, "(?![\\p{L}\\p{N}_])");
805 #ifdef SEARCH_TYPE_GLIB
806 GError
*mcerror
= NULL
;
807 GRegexCompileFlags g_regex_options
= G_REGEX_OPTIMIZE
| G_REGEX_DOTALL
;
809 if (str_isutf8 (charset
) && mc_global
.utf8_display
)
811 if (!lc_mc_search
->is_case_sensitive
)
812 g_regex_options
|= G_REGEX_CASELESS
;
816 g_regex_options
|= G_REGEX_RAW
;
818 if (!lc_mc_search
->is_case_sensitive
)
822 tmp
= mc_search_cond
->str
;
823 mc_search_cond
->str
= mc_search__cond_struct_new_regex_ci_str (charset
, tmp
);
824 g_string_free (tmp
, TRUE
);
828 mc_search_cond
->regex_handle
=
829 g_regex_new (mc_search_cond
->str
->str
, g_regex_options
, 0, &mcerror
);
833 lc_mc_search
->error
= MC_SEARCH_E_REGEX_COMPILE
;
834 g_free (lc_mc_search
->error_str
);
835 lc_mc_search
->error_str
=
836 str_conv_gerror_message (mcerror
, _("Regular expression error"));
837 g_error_free (mcerror
);
840 #else /* SEARCH_TYPE_GLIB */
844 char error
[BUF_SMALL
];
846 int pcre_options
= PCRE2_MULTILINE
;
850 int pcre_options
= PCRE_EXTRA
| PCRE_MULTILINE
;
853 if (str_isutf8 (charset
) && mc_global
.utf8_display
)
856 pcre_options
|= PCRE2_UTF
;
857 if (!lc_mc_search
->is_case_sensitive
)
858 pcre_options
|= PCRE2_CASELESS
;
860 pcre_options
|= PCRE_UTF8
;
861 if (!lc_mc_search
->is_case_sensitive
)
862 pcre_options
|= PCRE_CASELESS
;
865 else if (!lc_mc_search
->is_case_sensitive
)
869 tmp
= mc_search_cond
->str
;
870 mc_search_cond
->str
= mc_search__cond_struct_new_regex_ci_str (charset
, tmp
);
871 g_string_free (tmp
, TRUE
);
874 mc_search_cond
->regex_handle
=
876 pcre2_compile ((unsigned char *) mc_search_cond
->str
->str
, PCRE2_ZERO_TERMINATED
,
877 pcre_options
, &errcode
, &erroffset
, NULL
);
879 pcre_compile (mc_search_cond
->str
->str
, pcre_options
, &error
, &erroffset
, NULL
);
881 if (mc_search_cond
->regex_handle
== NULL
)
884 pcre2_get_error_message (errcode
, (unsigned char *) error
, sizeof (error
));
886 mc_search_set_error (lc_mc_search
, MC_SEARCH_E_REGEX_COMPILE
, "%s", error
);
890 if (pcre2_jit_compile (mc_search_cond
->regex_handle
, PCRE2_JIT_COMPLETE
) && *error
!= '\0')
892 lc_mc_search
->regex_match_info
= pcre_study (mc_search_cond
->regex_handle
, 0, &error
);
893 if (lc_mc_search
->regex_match_info
== NULL
&& error
!= NULL
)
896 mc_search_set_error (lc_mc_search
, MC_SEARCH_E_REGEX_COMPILE
, "%s", error
);
897 MC_PTR_FREE (mc_search_cond
->regex_handle
);
900 #endif /* SEARCH_TYPE_GLIB */
903 lc_mc_search
->is_utf8
= str_isutf8 (charset
);
906 /* --------------------------------------------------------------------------------------------- */
909 mc_search__run_regex (mc_search_t
* lc_mc_search
, const void *user_data
,
910 gsize start_search
, gsize end_search
, gsize
* found_len
)
912 mc_search_cbret_t ret
= MC_SEARCH_CB_NOTFOUND
;
913 gsize current_pos
, virtual_pos
;
917 if (lc_mc_search
->regex_buffer
!= NULL
)
918 g_string_set_size (lc_mc_search
->regex_buffer
, 0);
920 lc_mc_search
->regex_buffer
= g_string_sized_new (64);
922 virtual_pos
= current_pos
= start_search
;
923 while (virtual_pos
<= end_search
)
925 g_string_set_size (lc_mc_search
->regex_buffer
, 0);
926 lc_mc_search
->start_buffer
= current_pos
;
928 if (lc_mc_search
->search_fn
!= NULL
)
932 int current_chr
= '\n'; /* stop search symbol */
934 ret
= lc_mc_search
->search_fn (user_data
, current_pos
, ¤t_chr
);
936 if (ret
== MC_SEARCH_CB_ABORT
)
939 if (ret
== MC_SEARCH_CB_INVALID
)
944 if (ret
== MC_SEARCH_CB_SKIP
)
949 g_string_append_c (lc_mc_search
->regex_buffer
, (char) current_chr
);
951 if ((char) current_chr
== '\n' || virtual_pos
> end_search
)
957 /* optimization for standard case (for search from file manager)
958 * where there is no MC_SEARCH_CB_INVALID or MC_SEARCH_CB_SKIP
959 * return codes, so we can copy line at regex buffer all at once
963 const char current_chr
= ((const char *) user_data
)[current_pos
];
965 if (current_chr
== '\0')
970 if (current_chr
== '\n' || current_pos
> end_search
)
974 /* use virtual_pos as index of start of current chunk */
975 g_string_append_len (lc_mc_search
->regex_buffer
, (const char *) user_data
+ virtual_pos
,
976 current_pos
- virtual_pos
);
977 virtual_pos
= current_pos
;
980 switch (mc_search__regex_found_cond (lc_mc_search
, lc_mc_search
->regex_buffer
))
983 #ifdef SEARCH_TYPE_GLIB
984 g_match_info_fetch_pos (lc_mc_search
->regex_match_info
, 0, &start_pos
, &end_pos
);
985 #else /* SEARCH_TYPE_GLIB */
986 start_pos
= lc_mc_search
->iovector
[0];
987 end_pos
= lc_mc_search
->iovector
[1];
988 #endif /* SEARCH_TYPE_GLIB */
989 if (found_len
!= NULL
)
990 *found_len
= end_pos
- start_pos
;
991 lc_mc_search
->normal_offset
= lc_mc_search
->start_buffer
+ start_pos
;
993 case COND__NOT_ALL_FOUND
:
996 g_string_free (lc_mc_search
->regex_buffer
, TRUE
);
997 lc_mc_search
->regex_buffer
= NULL
;
1001 if ((lc_mc_search
->update_fn
!= NULL
) &&
1002 ((lc_mc_search
->update_fn
) (user_data
, current_pos
) == MC_SEARCH_CB_ABORT
))
1003 ret
= MC_SEARCH_CB_ABORT
;
1005 if (ret
== MC_SEARCH_CB_ABORT
|| ret
== MC_SEARCH_CB_NOTFOUND
)
1009 g_string_free (lc_mc_search
->regex_buffer
, TRUE
);
1010 lc_mc_search
->regex_buffer
= NULL
;
1012 MC_PTR_FREE (lc_mc_search
->error_str
);
1013 lc_mc_search
->error
= ret
== MC_SEARCH_CB_ABORT
? MC_SEARCH_E_ABORT
: MC_SEARCH_E_NOTFOUND
;
1018 /* --------------------------------------------------------------------------------------------- */
1021 mc_search_regex_prepare_replace_str (mc_search_t
* lc_mc_search
, GString
* replace_str
)
1025 int num_replace_tokens
;
1028 replace_transform_type_t replace_flags
= REPLACE_T_NO_TRANSFORM
;
1030 num_replace_tokens
=
1031 mc_search_regex__get_max_num_of_replace_tokens (replace_str
->str
, replace_str
->len
);
1033 if (lc_mc_search
->num_results
< 0)
1034 return mc_g_string_dup (replace_str
);
1036 if (num_replace_tokens
> lc_mc_search
->num_results
- 1
1037 || num_replace_tokens
> MC_SEARCH__NUM_REPLACE_ARGS
)
1039 mc_search_set_error (lc_mc_search
, MC_SEARCH_E_REGEX_REPLACE
, "%s",
1040 _(STR_E_RPL_NOT_EQ_TO_FOUND
));
1044 ret
= g_string_sized_new (64);
1046 for (loop
= 0; loop
< replace_str
->len
- 1; loop
++)
1052 lc_index
= mc_search_regex__process_replace_str (replace_str
, loop
, &len
, &replace_flags
);
1054 if (lc_index
== REPLACE_PREPARE_T_NOTHING_SPECIAL
)
1058 mc_search_regex__process_append_str (ret
, replace_str
->str
+ prev
, loop
- prev
,
1060 mc_search_regex__process_append_str (ret
, replace_str
->str
+ loop
+ 1, len
- 1,
1063 loop
= prev
- 1; /* prepare to loop++ */
1069 if (lc_index
== REPLACE_PREPARE_T_REPLACE_FLAG
)
1072 mc_search_regex__process_append_str (ret
, replace_str
->str
+ prev
, loop
- prev
,
1075 loop
= prev
- 1; /* prepare to loop++ */
1079 /* escape sequence */
1080 if (lc_index
== REPLACE_PREPARE_T_ESCAPE_SEQ
)
1082 mc_search_regex__process_append_str (ret
, replace_str
->str
+ prev
, loop
- prev
,
1084 /* call process_escape_sequence without starting '\\' */
1085 mc_search_regex__process_escape_sequence (ret
, replace_str
->str
+ loop
+ 1, len
- 1,
1086 &replace_flags
, lc_mc_search
->is_utf8
);
1088 loop
= prev
- 1; /* prepare to loop++ */
1092 /* invalid capture buffer number */
1093 if (lc_index
> lc_mc_search
->num_results
)
1095 g_string_free (ret
, TRUE
);
1096 mc_search_set_error (lc_mc_search
, MC_SEARCH_E_REGEX_REPLACE
,
1097 _(STR_E_RPL_INVALID_TOKEN
), lc_index
);
1101 tmp_str
= mc_search_regex__get_token_by_num (lc_mc_search
, lc_index
);
1104 mc_search_regex__process_append_str (ret
, replace_str
->str
+ prev
, loop
- prev
,
1107 mc_search_regex__process_append_str (ret
, tmp_str
, -1, &replace_flags
);
1111 loop
= prev
- 1; /* prepare to loop++ */
1114 mc_search_regex__process_append_str (ret
, replace_str
->str
+ prev
, replace_str
->len
- prev
,