Ticket #2795: hex search: can't find 00 (zeroes) in patterns.
[midnight-commander.git] / lib / search / regex.c
blob0c3d6d8aa7ca36fe7ac3c1a963647087c07bc195
1 /*
2 Search text engine.
3 Regex search
5 Copyright (C) 2009, 2011
6 The Free Software Foundation, Inc.
8 Written by:
9 Slava Zanko <slavazanko@gmail.com>, 2009,2010,2011
10 Vitaliy Filippov <vitalif@yourcmc.ru>, 2011
12 This file is part of the Midnight Commander.
14 The Midnight Commander is free software: you can redistribute it
15 and/or modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation, either version 3 of the License,
17 or (at your option) any later version.
19 The Midnight Commander is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program. If not, see <http://www.gnu.org/licenses/>.
28 #include <config.h>
30 #include <stdlib.h>
32 #include "lib/global.h"
33 #include "lib/strutil.h"
34 #include "lib/search.h"
35 #include "lib/strescape.h"
37 #include "lib/charsets.h"
39 #include "internal.h"
41 /*** global variables ****************************************************************************/
43 /*** file scope macro definitions ****************************************************************/
45 #define REPLACE_PREPARE_T_NOTHING_SPECIAL -1
46 #define REPLACE_PREPARE_T_REPLACE_FLAG -2
47 #define REPLACE_PREPARE_T_ESCAPE_SEQ -3
49 /*** file scope type declarations ****************************************************************/
51 typedef enum
53 REPLACE_T_NO_TRANSFORM = 0,
54 REPLACE_T_UPP_TRANSFORM_CHAR = 1,
55 REPLACE_T_LOW_TRANSFORM_CHAR = 2,
56 REPLACE_T_UPP_TRANSFORM = 4,
57 REPLACE_T_LOW_TRANSFORM = 8
58 } replace_transform_type_t;
61 /*** file scope variables ************************************************************************/
63 /*** file scope functions ************************************************************************/
65 static gboolean
66 mc_search__regex_str_append_if_special (GString * copy_to, const GString * regex_str,
67 gsize * offset)
69 char *tmp_regex_str;
70 gsize spec_chr_len;
71 const char **spec_chr;
72 const char *special_chars[] = {
73 "\\s", "\\S",
74 "\\d", "\\D",
75 "\\b", "\\B",
76 "\\w", "\\W",
77 "\\t", "\\n",
78 "\\r", "\\f",
79 "\\a", "\\e",
80 "\\x", "\\X",
81 "\\c", "\\C",
82 "\\l", "\\L",
83 "\\u", "\\U",
84 "\\E", "\\Q",
85 NULL
87 spec_chr = special_chars;
89 tmp_regex_str = &(regex_str->str[*offset]);
91 while (*spec_chr)
93 spec_chr_len = strlen (*spec_chr);
94 if (!strncmp (tmp_regex_str, *spec_chr, spec_chr_len))
96 if (!strutils_is_char_escaped (regex_str->str, tmp_regex_str))
98 if (!strncmp ("\\x", *spec_chr, spec_chr_len))
100 if (*(tmp_regex_str + spec_chr_len) == '{')
102 while ((spec_chr_len < regex_str->len - *offset)
103 && *(tmp_regex_str + spec_chr_len) != '}')
104 spec_chr_len++;
105 if (*(tmp_regex_str + spec_chr_len) == '}')
106 spec_chr_len++;
108 else
109 spec_chr_len += 2;
111 g_string_append_len (copy_to, tmp_regex_str, spec_chr_len);
112 *offset += spec_chr_len;
113 return TRUE;
116 spec_chr++;
118 return FALSE;
122 /* --------------------------------------------------------------------------------------------- */
123 static void
124 mc_search__cond_struct_new_regex_hex_add (const char *charset, GString * str_to,
125 const char *one_char, gsize str_len)
127 GString *upp, *low;
128 gchar *tmp_str;
129 gsize loop;
131 upp = mc_search__toupper_case_str (charset, one_char, str_len);
132 low = mc_search__tolower_case_str (charset, one_char, str_len);
134 for (loop = 0; loop < upp->len; loop++)
137 if (loop < low->len)
139 if (upp->str[loop] == low->str[loop])
140 tmp_str = g_strdup_printf ("\\x%02X", (unsigned char) upp->str[loop]);
141 else
142 tmp_str =
143 g_strdup_printf ("[\\x%02X\\x%02X]", (unsigned char) upp->str[loop],
144 (unsigned char) low->str[loop]);
146 else
148 tmp_str = g_strdup_printf ("\\x%02X", (unsigned char) upp->str[loop]);
150 g_string_append (str_to, tmp_str);
151 g_free (tmp_str);
153 g_string_free (upp, TRUE);
154 g_string_free (low, TRUE);
157 /* --------------------------------------------------------------------------------------------- */
159 static void
160 mc_search__cond_struct_new_regex_accum_append (const char *charset, GString * str_to,
161 GString * str_from)
163 GString *recoded_part;
164 gsize loop = 0;
166 recoded_part = g_string_sized_new (32);
168 while (loop < str_from->len)
170 gchar *one_char;
171 gsize one_char_len;
172 gboolean just_letters;
174 one_char =
175 mc_search__get_one_symbol (charset, &(str_from->str[loop]),
176 min (str_from->len - loop, 6), &just_letters);
177 one_char_len = strlen (one_char);
179 if (one_char_len == 0)
180 loop++;
181 else
183 loop += one_char_len;
185 if (just_letters)
186 mc_search__cond_struct_new_regex_hex_add (charset, recoded_part, one_char,
187 one_char_len);
188 else
189 g_string_append_len (recoded_part, one_char, one_char_len);
192 g_free (one_char);
195 g_string_append (str_to, recoded_part->str);
196 g_string_free (recoded_part, TRUE);
197 g_string_set_size (str_from, 0);
200 /* --------------------------------------------------------------------------------------------- */
202 static GString *
203 mc_search__cond_struct_new_regex_ci_str (const char *charset, const GString * astr)
205 GString *accumulator, *spec_char, *ret_str;
206 gsize loop;
208 ret_str = g_string_sized_new (64);
209 accumulator = g_string_sized_new (64);
210 spec_char = g_string_sized_new (64);
211 loop = 0;
213 while (loop <= astr->len)
215 if (mc_search__regex_str_append_if_special (spec_char, astr, &loop))
217 mc_search__cond_struct_new_regex_accum_append (charset, ret_str, accumulator);
218 g_string_append_len (ret_str, spec_char->str, spec_char->len);
219 g_string_set_size (spec_char, 0);
220 continue;
223 if (astr->str[loop] == '[' && !strutils_is_char_escaped (astr->str, &(astr->str[loop])))
225 mc_search__cond_struct_new_regex_accum_append (charset, ret_str, accumulator);
227 while (loop < astr->len && !(astr->str[loop] == ']'
228 && !strutils_is_char_escaped (astr->str,
229 &(astr->str[loop]))))
231 g_string_append_c (ret_str, astr->str[loop]);
232 loop++;
235 g_string_append_c (ret_str, astr->str[loop]);
236 loop++;
237 continue;
240 TODO: handle [ and ]
242 g_string_append_c (accumulator, astr->str[loop]);
243 loop++;
245 mc_search__cond_struct_new_regex_accum_append (charset, ret_str, accumulator);
247 g_string_free (accumulator, TRUE);
248 g_string_free (spec_char, TRUE);
250 return ret_str;
253 /* --------------------------------------------------------------------------------------------- */
255 static mc_search__found_cond_t
256 mc_search__regex_found_cond_one (mc_search_t * lc_mc_search, mc_search_regex_t * regex,
257 GString * search_str)
259 #ifdef SEARCH_TYPE_GLIB
260 GError *error = NULL;
262 if (!g_regex_match_full (regex, search_str->str, search_str->len, 0, G_REGEX_MATCH_NEWLINE_ANY,
263 &lc_mc_search->regex_match_info, &error))
265 g_match_info_free (lc_mc_search->regex_match_info);
266 lc_mc_search->regex_match_info = NULL;
267 if (error)
269 lc_mc_search->error = MC_SEARCH_E_REGEX;
270 lc_mc_search->error_str =
271 str_conv_gerror_message (error, _("Regular expression error"));
272 g_error_free (error);
273 return COND__FOUND_ERROR;
275 return COND__NOT_FOUND;
277 lc_mc_search->num_results = g_match_info_get_match_count (lc_mc_search->regex_match_info);
278 #else /* SEARCH_TYPE_GLIB */
279 lc_mc_search->num_results = pcre_exec (regex, lc_mc_search->regex_match_info,
280 search_str->str, search_str->len, 0, 0,
281 lc_mc_search->iovector, MC_SEARCH__NUM_REPLACE_ARGS);
282 if (lc_mc_search->num_results < 0)
284 return COND__NOT_FOUND;
286 #endif /* SEARCH_TYPE_GLIB */
287 return COND__FOUND_OK;
291 /* --------------------------------------------------------------------------------------------- */
293 static mc_search__found_cond_t
294 mc_search__regex_found_cond (mc_search_t * lc_mc_search, GString * search_str)
296 gsize loop1;
297 mc_search_cond_t *mc_search_cond;
298 mc_search__found_cond_t ret;
300 for (loop1 = 0; loop1 < lc_mc_search->conditions->len; loop1++)
302 mc_search_cond = (mc_search_cond_t *) g_ptr_array_index (lc_mc_search->conditions, loop1);
304 if (!mc_search_cond->regex_handle)
305 continue;
307 ret =
308 mc_search__regex_found_cond_one (lc_mc_search, mc_search_cond->regex_handle,
309 search_str);
311 if (ret != COND__NOT_FOUND)
312 return ret;
314 return COND__NOT_ALL_FOUND;
317 /* --------------------------------------------------------------------------------------------- */
319 static int
320 mc_search_regex__get_max_num_of_replace_tokens (const gchar * str, gsize len)
322 int max_token = 0;
323 gsize loop;
324 for (loop = 0; loop < len - 1; loop++)
326 if (str[loop] == '\\' && g_ascii_isdigit (str[loop + 1]))
328 if (strutils_is_char_escaped (str, &str[loop]))
329 continue;
330 if (max_token < str[loop + 1] - '0')
331 max_token = str[loop + 1] - '0';
332 continue;
334 if (str[loop] == '$' && str[loop + 1] == '{')
336 gsize tmp_len;
337 char *tmp_str;
338 int tmp_token;
339 if (strutils_is_char_escaped (str, &str[loop]))
340 continue;
342 for (tmp_len = 0;
343 loop + tmp_len + 2 < len && (str[loop + 2 + tmp_len] & (char) 0xf0) == 0x30;
344 tmp_len++);
345 if (str[loop + 2 + tmp_len] == '}')
347 tmp_str = g_strndup (&str[loop + 2], tmp_len);
348 tmp_token = atoi (tmp_str);
349 if (max_token < tmp_token)
350 max_token = tmp_token;
351 g_free (tmp_str);
355 return max_token;
358 /* --------------------------------------------------------------------------------------------- */
360 static char *
361 mc_search_regex__get_token_by_num (const mc_search_t * lc_mc_search, gsize lc_index)
363 int fnd_start = 0, fnd_end = 0;
365 #ifdef SEARCH_TYPE_GLIB
366 g_match_info_fetch_pos (lc_mc_search->regex_match_info, lc_index, &fnd_start, &fnd_end);
367 #else /* SEARCH_TYPE_GLIB */
368 fnd_start = lc_mc_search->iovector[lc_index * 2 + 0];
369 fnd_end = lc_mc_search->iovector[lc_index * 2 + 1];
370 #endif /* SEARCH_TYPE_GLIB */
372 if (fnd_end - fnd_start == 0)
373 return NULL;
375 return g_strndup (lc_mc_search->regex_buffer->str + fnd_start, fnd_end - fnd_start);
379 /* --------------------------------------------------------------------------------------------- */
381 static gboolean
382 mc_search_regex__replace_handle_esc_seq (const GString * replace_str, const gsize current_pos,
383 gsize * skip_len, int *ret)
385 char *curr_str = &(replace_str->str[current_pos]);
386 char c = *(curr_str + 1);
388 if (replace_str->len > current_pos + 2)
390 if (c == '{')
392 for (*skip_len = 2; /* \{ */
393 current_pos + *skip_len < replace_str->len
394 && *(curr_str + *skip_len) >= '0'
395 && *(curr_str + *skip_len) <= '7'; (*skip_len)++);
396 if (current_pos + *skip_len < replace_str->len && *(curr_str + *skip_len) == '}')
398 (*skip_len)++;
399 *ret = REPLACE_PREPARE_T_ESCAPE_SEQ;
400 return FALSE;
402 else
404 *ret = REPLACE_PREPARE_T_NOTHING_SPECIAL;
405 return TRUE;
409 if (c == 'x')
411 *skip_len = 2; /* \x */
412 c = *(curr_str + 2);
413 if (c == '{')
415 for (*skip_len = 3; /* \x{ */
416 current_pos + *skip_len < replace_str->len
417 && g_ascii_isxdigit ((guchar) * (curr_str + *skip_len)); (*skip_len)++);
418 if (current_pos + *skip_len < replace_str->len && *(curr_str + *skip_len) == '}')
420 (*skip_len)++;
421 *ret = REPLACE_PREPARE_T_ESCAPE_SEQ;
422 return FALSE;
424 else
426 *ret = REPLACE_PREPARE_T_NOTHING_SPECIAL;
427 return TRUE;
430 else if (!g_ascii_isxdigit ((guchar) c))
432 *skip_len = 2; /* \x without number behind */
433 *ret = REPLACE_PREPARE_T_NOTHING_SPECIAL;
434 return FALSE;
436 else
438 c = *(curr_str + 3);
439 if (!g_ascii_isxdigit ((guchar) c))
440 *skip_len = 3; /* \xH */
441 else
442 *skip_len = 4; /* \xHH */
443 *ret = REPLACE_PREPARE_T_ESCAPE_SEQ;
444 return FALSE;
449 if (strchr ("ntvbrfa", c) != NULL)
451 *skip_len = 2;
452 *ret = REPLACE_PREPARE_T_ESCAPE_SEQ;
453 return FALSE;
455 return TRUE;
458 /* --------------------------------------------------------------------------------------------- */
460 static int
461 mc_search_regex__process_replace_str (const GString * replace_str, const gsize current_pos,
462 gsize * skip_len, replace_transform_type_t * replace_flags)
464 int ret = -1;
465 char *tmp_str;
466 const char *curr_str = &(replace_str->str[current_pos]);
468 if (current_pos > replace_str->len)
469 return REPLACE_PREPARE_T_NOTHING_SPECIAL;
471 *skip_len = 0;
473 if ((*curr_str == '$') && (*(curr_str + 1) == '{') && ((*(curr_str + 2) & (char) 0xf0) == 0x30)
474 && (replace_str->len > current_pos + 2))
476 if (strutils_is_char_escaped (replace_str->str, curr_str))
478 *skip_len = 1;
479 return REPLACE_PREPARE_T_NOTHING_SPECIAL;
482 for (*skip_len = 0;
483 current_pos + *skip_len + 2 < replace_str->len
484 && (*(curr_str + 2 + *skip_len) & (char) 0xf0) == 0x30; (*skip_len)++);
486 if (*(curr_str + 2 + *skip_len) != '}')
487 return REPLACE_PREPARE_T_NOTHING_SPECIAL;
489 tmp_str = g_strndup (curr_str + 2, *skip_len);
490 if (tmp_str == NULL)
491 return REPLACE_PREPARE_T_NOTHING_SPECIAL;
493 ret = atoi (tmp_str);
494 g_free (tmp_str);
496 *skip_len += 3; /* ${} */
497 return ret; /* capture buffer index >= 0 */
500 if ((*curr_str == '\\') && (replace_str->len > current_pos + 1))
502 if (strutils_is_char_escaped (replace_str->str, curr_str))
504 *skip_len = 1;
505 return REPLACE_PREPARE_T_NOTHING_SPECIAL;
508 if (g_ascii_isdigit (*(curr_str + 1)))
510 ret = g_ascii_digit_value (*(curr_str + 1)); /* capture buffer index >= 0 */
511 *skip_len = 2; /* \\ and one digit */
512 return ret;
515 if (!mc_search_regex__replace_handle_esc_seq (replace_str, current_pos, skip_len, &ret))
516 return ret;
518 ret = REPLACE_PREPARE_T_REPLACE_FLAG;
519 *skip_len += 2;
520 switch (*(curr_str + 1))
522 case 'U':
523 *replace_flags |= REPLACE_T_UPP_TRANSFORM;
524 *replace_flags &= ~REPLACE_T_LOW_TRANSFORM;
525 break;
526 case 'u':
527 *replace_flags |= REPLACE_T_UPP_TRANSFORM_CHAR;
528 break;
529 case 'L':
530 *replace_flags |= REPLACE_T_LOW_TRANSFORM;
531 *replace_flags &= ~REPLACE_T_UPP_TRANSFORM;
532 break;
533 case 'l':
534 *replace_flags |= REPLACE_T_LOW_TRANSFORM_CHAR;
535 break;
536 case 'E':
537 *replace_flags = REPLACE_T_NO_TRANSFORM;
538 break;
539 default:
540 ret = REPLACE_PREPARE_T_NOTHING_SPECIAL;
541 break;
544 return ret;
547 /* --------------------------------------------------------------------------------------------- */
549 static void
550 mc_search_regex__process_append_str (GString * dest_str, const char *from, gsize len,
551 replace_transform_type_t * replace_flags)
553 gsize loop = 0;
554 gsize char_len;
555 char *tmp_str;
556 GString *tmp_string;
558 if (len == (gsize) - 1)
559 len = strlen (from);
561 if (*replace_flags == REPLACE_T_NO_TRANSFORM)
563 g_string_append_len (dest_str, from, len);
564 return;
566 while (loop < len)
568 tmp_str = mc_search__get_one_symbol (NULL, from + loop, len - loop, NULL);
569 char_len = strlen (tmp_str);
570 if (*replace_flags & REPLACE_T_UPP_TRANSFORM_CHAR)
572 *replace_flags &= ~REPLACE_T_UPP_TRANSFORM_CHAR;
573 tmp_string = mc_search__toupper_case_str (NULL, tmp_str, char_len);
574 g_string_append (dest_str, tmp_string->str);
575 g_string_free (tmp_string, TRUE);
578 else if (*replace_flags & REPLACE_T_LOW_TRANSFORM_CHAR)
580 *replace_flags &= ~REPLACE_T_LOW_TRANSFORM_CHAR;
581 tmp_string = mc_search__toupper_case_str (NULL, tmp_str, char_len);
582 g_string_append (dest_str, tmp_string->str);
583 g_string_free (tmp_string, TRUE);
586 else if (*replace_flags & REPLACE_T_UPP_TRANSFORM)
588 tmp_string = mc_search__toupper_case_str (NULL, tmp_str, char_len);
589 g_string_append (dest_str, tmp_string->str);
590 g_string_free (tmp_string, TRUE);
593 else if (*replace_flags & REPLACE_T_LOW_TRANSFORM)
595 tmp_string = mc_search__tolower_case_str (NULL, tmp_str, char_len);
596 g_string_append (dest_str, tmp_string->str);
597 g_string_free (tmp_string, TRUE);
600 else
602 g_string_append (dest_str, tmp_str);
604 g_free (tmp_str);
605 loop += char_len;
610 /* --------------------------------------------------------------------------------------------- */
612 static void
613 mc_search_regex__process_escape_sequence (GString * dest_str, const char *from, gsize len,
614 replace_transform_type_t * replace_flags,
615 gboolean is_utf8)
617 gsize i = 0;
618 unsigned int c = 0;
619 char b;
621 if (len == (gsize) (-1))
622 len = strlen (from);
623 if (len == 0)
624 return;
625 if (from[i] == '{')
626 i++;
627 if (i >= len)
628 return;
630 if (from[i] == 'x')
632 i++;
633 if (i < len && from[i] == '{')
634 i++;
635 for (; i < len; i++)
637 if (from[i] >= '0' && from[i] <= '9')
638 c = c * 16 + from[i] - '0';
639 else if (from[i] >= 'a' && from[i] <= 'f')
640 c = c * 16 + 10 + from[i] - 'a';
641 else if (from[i] >= 'A' && from[i] <= 'F')
642 c = c * 16 + 10 + from[i] - 'A';
643 else
644 break;
647 else if (from[i] >= '0' && from[i] <= '7')
648 for (; i < len && from[i] >= '0' && from[i] <= '7'; i++)
649 c = c * 8 + from[i] - '0';
650 else
652 switch (from[i])
654 case 'n':
655 c = '\n';
656 break;
657 case 't':
658 c = '\t';
659 break;
660 case 'v':
661 c = '\v';
662 break;
663 case 'b':
664 c = '\b';
665 break;
666 case 'r':
667 c = '\r';
668 break;
669 case 'f':
670 c = '\f';
671 break;
672 case 'a':
673 c = '\a';
674 break;
675 default:
676 mc_search_regex__process_append_str (dest_str, from, len, replace_flags);
677 return;
681 if (c < 0x80 || !is_utf8)
682 g_string_append_c (dest_str, (char) c);
683 else if (c < 0x800)
685 b = 0xC0 | (c >> 6);
686 g_string_append_c (dest_str, b);
687 b = 0x80 | (c & 0x3F);
688 g_string_append_c (dest_str, b);
690 else if (c < 0x10000)
692 b = 0xE0 | (c >> 12);
693 g_string_append_c (dest_str, b);
694 b = 0x80 | ((c >> 6) & 0x3F);
695 g_string_append_c (dest_str, b);
696 b = 0x80 | (c & 0x3F);
697 g_string_append_c (dest_str, b);
699 else if (c < 0x10FFFF)
701 b = 0xF0 | (c >> 16);
702 g_string_append_c (dest_str, b);
703 b = 0x80 | ((c >> 12) & 0x3F);
704 g_string_append_c (dest_str, b);
705 b = 0x80 | ((c >> 6) & 0x3F);
706 g_string_append_c (dest_str, b);
707 b = 0x80 | (c & 0x3F);
708 g_string_append_c (dest_str, b);
712 /* --------------------------------------------------------------------------------------------- */
713 /*** public functions ****************************************************************************/
714 /* --------------------------------------------------------------------------------------------- */
716 void
717 mc_search__cond_struct_new_init_regex (const char *charset, mc_search_t * lc_mc_search,
718 mc_search_cond_t * mc_search_cond)
720 #ifdef SEARCH_TYPE_GLIB
721 GError *error = NULL;
723 if (!lc_mc_search->is_case_sensitive)
725 GString *tmp;
727 tmp = mc_search_cond->str;
728 mc_search_cond->str = mc_search__cond_struct_new_regex_ci_str (charset, tmp);
729 g_string_free (tmp, TRUE);
731 mc_search_cond->regex_handle =
732 g_regex_new (mc_search_cond->str->str, G_REGEX_OPTIMIZE | G_REGEX_RAW | G_REGEX_DOTALL,
733 0, &error);
735 if (error != NULL)
737 lc_mc_search->error = MC_SEARCH_E_REGEX_COMPILE;
738 lc_mc_search->error_str = str_conv_gerror_message (error, _("Regular expression error"));
739 g_error_free (error);
740 return;
742 #else /* SEARCH_TYPE_GLIB */
743 const char *error;
744 int erroffset;
745 int pcre_options = PCRE_EXTRA | PCRE_MULTILINE;
747 if (str_isutf8 (charset) && mc_global.utf8_display)
749 pcre_options |= PCRE_UTF8;
750 if (!lc_mc_search->is_case_sensitive)
751 pcre_options |= PCRE_CASELESS;
753 else
755 if (!lc_mc_search->is_case_sensitive)
757 GString *tmp;
759 tmp = mc_search_cond->str;
760 mc_search_cond->str = mc_search__cond_struct_new_regex_ci_str (charset, tmp);
761 g_string_free (tmp, TRUE);
765 mc_search_cond->regex_handle =
766 pcre_compile (mc_search_cond->str->str, pcre_options, &error, &erroffset, NULL);
767 if (mc_search_cond->regex_handle == NULL)
769 lc_mc_search->error = MC_SEARCH_E_REGEX_COMPILE;
770 lc_mc_search->error_str = g_strdup (error);
771 return;
773 lc_mc_search->regex_match_info = pcre_study (mc_search_cond->regex_handle, 0, &error);
774 if (lc_mc_search->regex_match_info == NULL)
776 if (error)
778 lc_mc_search->error = MC_SEARCH_E_REGEX_COMPILE;
779 lc_mc_search->error_str = g_strdup (error);
780 g_free (mc_search_cond->regex_handle);
781 mc_search_cond->regex_handle = NULL;
782 return;
785 #endif /* SEARCH_TYPE_GLIB */
786 lc_mc_search->is_utf8 = str_isutf8 (charset);
789 /* --------------------------------------------------------------------------------------------- */
791 gboolean
792 mc_search__run_regex (mc_search_t * lc_mc_search, const void *user_data,
793 gsize start_search, gsize end_search, gsize * found_len)
795 gsize current_pos, virtual_pos;
796 int current_chr = 0;
797 gint start_pos;
798 gint end_pos;
800 if (lc_mc_search->regex_buffer != NULL)
801 g_string_free (lc_mc_search->regex_buffer, TRUE);
803 lc_mc_search->regex_buffer = g_string_sized_new (64);
805 virtual_pos = current_pos = start_search;
806 while (virtual_pos <= end_search)
808 g_string_set_size (lc_mc_search->regex_buffer, 0);
809 lc_mc_search->start_buffer = current_pos;
811 while (1)
813 current_chr = mc_search__get_char (lc_mc_search, user_data, current_pos);
814 if (current_chr == MC_SEARCH_CB_ABORT)
815 break;
817 if (current_chr == MC_SEARCH_CB_INVALID)
818 continue;
820 current_pos++;
822 if (current_chr == MC_SEARCH_CB_SKIP)
823 continue;
825 virtual_pos++;
827 g_string_append_c (lc_mc_search->regex_buffer, (char) current_chr);
829 if ((char) current_chr == '\n' || virtual_pos > end_search)
830 break;
832 switch (mc_search__regex_found_cond (lc_mc_search, lc_mc_search->regex_buffer))
834 case COND__FOUND_OK:
835 #ifdef SEARCH_TYPE_GLIB
836 if (lc_mc_search->whole_words)
838 g_match_info_fetch_pos (lc_mc_search->regex_match_info, 2, &start_pos, &end_pos);
840 else
842 g_match_info_fetch_pos (lc_mc_search->regex_match_info, 0, &start_pos, &end_pos);
844 #else /* SEARCH_TYPE_GLIB */
845 if (lc_mc_search->whole_words)
847 start_pos = lc_mc_search->iovector[4];
848 end_pos = lc_mc_search->iovector[5];
850 else
852 start_pos = lc_mc_search->iovector[0];
853 end_pos = lc_mc_search->iovector[1];
855 #endif /* SEARCH_TYPE_GLIB */
856 if (found_len)
857 *found_len = end_pos - start_pos;
858 lc_mc_search->normal_offset = lc_mc_search->start_buffer + start_pos;
859 return TRUE;
860 case COND__NOT_ALL_FOUND:
861 break;
862 default:
863 g_string_free (lc_mc_search->regex_buffer, TRUE);
864 lc_mc_search->regex_buffer = NULL;
865 return FALSE;
867 if ((lc_mc_search->update_fn != NULL) &&
868 ((lc_mc_search->update_fn) (user_data, current_pos) == MC_SEARCH_CB_ABORT))
869 current_chr = MC_SEARCH_CB_ABORT;
871 if (current_chr == MC_SEARCH_CB_ABORT)
872 break;
874 g_string_free (lc_mc_search->regex_buffer, TRUE);
875 lc_mc_search->regex_buffer = NULL;
876 lc_mc_search->error = MC_SEARCH_E_NOTFOUND;
878 if (current_chr != MC_SEARCH_CB_ABORT)
879 lc_mc_search->error_str = g_strdup (_(STR_E_NOTFOUND));
880 else
881 lc_mc_search->error_str = NULL;
883 return FALSE;
886 /* --------------------------------------------------------------------------------------------- */
888 GString *
889 mc_search_regex_prepare_replace_str (mc_search_t * lc_mc_search, GString * replace_str)
891 GString *ret;
892 gchar *tmp_str;
894 int num_replace_tokens, lc_index;
895 gsize loop;
896 gsize len = 0;
897 gchar *prev_str;
898 replace_transform_type_t replace_flags = REPLACE_T_NO_TRANSFORM;
900 num_replace_tokens =
901 mc_search_regex__get_max_num_of_replace_tokens (replace_str->str, replace_str->len);
903 if (lc_mc_search->num_results < 0)
904 return g_string_new_len (replace_str->str, replace_str->len);
906 if (num_replace_tokens > lc_mc_search->num_results - 1
907 || num_replace_tokens > MC_SEARCH__NUM_REPLACE_ARGS)
909 lc_mc_search->error = MC_SEARCH_E_REGEX_REPLACE;
910 lc_mc_search->error_str = g_strdup (_(STR_E_RPL_NOT_EQ_TO_FOUND));
911 return NULL;
914 ret = g_string_sized_new (64);
915 prev_str = replace_str->str;
917 for (loop = 0; loop < replace_str->len - 1; loop++)
919 lc_index = mc_search_regex__process_replace_str (replace_str, loop, &len, &replace_flags);
921 if (lc_index == REPLACE_PREPARE_T_NOTHING_SPECIAL)
923 if (len != 0)
925 mc_search_regex__process_append_str (ret, prev_str,
926 replace_str->str - prev_str + loop,
927 &replace_flags);
928 mc_search_regex__process_append_str (ret, replace_str->str + loop + 1, len - 1,
929 &replace_flags);
930 prev_str = replace_str->str + loop + len;
931 loop += len - 1;
933 continue;
936 if (lc_index == REPLACE_PREPARE_T_REPLACE_FLAG)
938 if (loop)
939 mc_search_regex__process_append_str (ret, prev_str,
940 replace_str->str - prev_str + loop,
941 &replace_flags);
942 prev_str = replace_str->str + loop + len;
943 loop += len - 1;
944 continue;
947 /* escape sequence */
948 if (lc_index == REPLACE_PREPARE_T_ESCAPE_SEQ)
950 mc_search_regex__process_append_str (ret, prev_str,
951 replace_str->str + loop - prev_str,
952 &replace_flags);
953 /* call process_escape_sequence without starting '\\' */
954 mc_search_regex__process_escape_sequence (ret, replace_str->str + loop + 1, len - 1,
955 &replace_flags, lc_mc_search->is_utf8);
956 prev_str = replace_str->str + loop + len;
957 loop += len - 1;
958 continue;
961 /* invalid capture buffer number */
962 if (lc_index > lc_mc_search->num_results)
964 g_string_free (ret, TRUE);
965 lc_mc_search->error = MC_SEARCH_E_REGEX_REPLACE;
966 lc_mc_search->error_str = g_strdup_printf (_(STR_E_RPL_INVALID_TOKEN), lc_index);
967 return NULL;
970 tmp_str = mc_search_regex__get_token_by_num (lc_mc_search, lc_index);
971 if (tmp_str == NULL)
972 continue;
974 if (loop)
975 mc_search_regex__process_append_str (ret, prev_str, replace_str->str - prev_str + loop,
976 &replace_flags);
977 prev_str = replace_str->str + loop + len;
979 mc_search_regex__process_append_str (ret, tmp_str, -1, &replace_flags);
980 g_free (tmp_str);
981 loop += len - 1;
983 mc_search_regex__process_append_str (ret, prev_str,
984 replace_str->str - prev_str + replace_str->len,
985 &replace_flags);
987 return ret;