syntax: add missing SQL keywords (closes MidnightCommander/mc#129)
[midnight-commander.git] / lib / search / hex.c
blob383c92adb7473572fc58a72137d321a19880213b
1 /*
2 Search text engine.
3 HEX-style pattern matching
5 Copyright (C) 2009-2017
6 Free Software Foundation, Inc.
8 Written by:
9 Slava Zanko <slavazanko@gmail.com>, 2009.
11 This file is part of the Midnight Commander.
13 The Midnight Commander is free software: you can redistribute it
14 and/or modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation, either version 3 of the License,
16 or (at your option) any later version.
18 The Midnight Commander is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program. If not, see <http://www.gnu.org/licenses/>.
27 #include <config.h>
29 #include <stdio.h>
31 #include "lib/global.h"
32 #include "lib/strutil.h"
33 #include "lib/search.h"
34 #include "lib/strescape.h"
36 #include "internal.h"
38 /*** global variables ****************************************************************************/
40 /*** file scope macro definitions ****************************************************************/
42 typedef enum
44 MC_SEARCH_HEX_E_OK,
45 MC_SEARCH_HEX_E_NUM_OUT_OF_RANGE,
46 MC_SEARCH_HEX_E_INVALID_CHARACTER,
47 MC_SEARCH_HEX_E_UNMATCHED_QUOTES
48 } mc_search_hex_parse_error_t;
50 /*** file scope type declarations ****************************************************************/
52 /*** file scope variables ************************************************************************/
54 /*** file scope functions ************************************************************************/
56 static GString *
57 mc_search__hex_translate_to_regex (const GString * astr, mc_search_hex_parse_error_t * error_ptr,
58 int *error_pos_ptr)
60 GString *buff;
61 const char *str;
62 gsize str_len;
63 gsize loop = 0;
64 mc_search_hex_parse_error_t error = MC_SEARCH_HEX_E_OK;
66 buff = g_string_sized_new (64);
67 str = astr->str;
68 str_len = astr->len;
70 while (loop < str_len && error == MC_SEARCH_HEX_E_OK)
72 unsigned int val;
73 int ptr;
75 if (g_ascii_isspace (str[loop]))
77 /* Eat-up whitespace between tokens. */
78 while (g_ascii_isspace (str[loop]))
79 loop++;
81 /* cppcheck-suppress invalidscanf */
82 else if (sscanf (str + loop, "%x%n", &val, &ptr) == 1)
84 if (val > 255)
85 error = MC_SEARCH_HEX_E_NUM_OUT_OF_RANGE;
86 else
88 g_string_append_printf (buff, "\\x%02X", val);
89 loop += ptr;
92 else if (str[loop] == '"')
94 gsize loop2;
96 loop2 = loop + 1;
98 while (loop2 < str_len)
100 if (str[loop2] == '"')
101 break;
102 if (str[loop2] == '\\' && loop2 + 1 < str_len)
103 loop2++;
104 g_string_append_c (buff, str[loop2]);
105 loop2++;
108 if (str[loop2] == '\0')
109 error = MC_SEARCH_HEX_E_UNMATCHED_QUOTES;
110 else
111 loop = loop2 + 1;
113 else
114 error = MC_SEARCH_HEX_E_INVALID_CHARACTER;
117 if (error != MC_SEARCH_HEX_E_OK)
119 g_string_free (buff, TRUE);
120 if (error_ptr != NULL)
121 *error_ptr = error;
122 if (error_pos_ptr != NULL)
123 *error_pos_ptr = loop;
124 return NULL;
127 return buff;
130 /*** public functions ****************************************************************************/
132 void
133 mc_search__cond_struct_new_init_hex (const char *charset, mc_search_t * lc_mc_search,
134 mc_search_cond_t * mc_search_cond)
136 GString *tmp;
137 mc_search_hex_parse_error_t error = MC_SEARCH_HEX_E_OK;
138 int error_pos = 0;
141 * We may be searching in binary data, which is often invalid UTF-8.
143 * We have to create a non UTF-8 regex (that is, G_REGEX_RAW) or else, as
144 * the data is invalid UTF-8, both GLib's PCRE and our
145 * mc_search__g_regex_match_full_safe() are going to fail us. The former by
146 * not finding all bytes, the latter by overwriting the supposedly invalid
147 * UTF-8 with NULs.
149 * To do this, we specify "ASCII" as the charset.
151 * In fact, we can specify any charset other than "UTF-8": any such charset
152 * will trigger G_REGEX_RAW (see [1]). The output of [2] will be the same
153 * for all charsets because it skips the \xXX symbols
154 * mc_search__hex_translate_to_regex() outputs.
156 * But "ASCII" is the best choice because a hex pattern may contain a
157 * quoted string: this way we know [2] will ignore any characters outside
158 * ASCII letters range (these ignored chars will be copied verbatim to the
159 * output and will match as-is; in other words, in a case-sensitive manner;
160 * If the user is interested in case-insensitive searches of international
161 * text, he shouldn't be using hex search in the first place.)
163 * Switching out of UTF-8 has another advantage:
165 * When doing case-insensitive searches, GLib treats \xXX symbols as normal
166 * letters and therefore matches both "a" and "A" for the hex pattern
167 * "0x61". When we switch out of UTF-8, we're switching to using [2], which
168 * doesn't have this issue.
170 * [1] mc_search__cond_struct_new_init_regex
171 * [2] mc_search__cond_struct_new_regex_ci_str
173 if (str_isutf8 (charset))
174 charset = "ASCII";
176 tmp = mc_search__hex_translate_to_regex (mc_search_cond->str, &error, &error_pos);
177 if (tmp != NULL)
179 g_string_free (mc_search_cond->str, TRUE);
180 mc_search_cond->str = tmp;
181 mc_search__cond_struct_new_init_regex (charset, lc_mc_search, mc_search_cond);
183 else
185 const char *desc;
187 switch (error)
189 case MC_SEARCH_HEX_E_NUM_OUT_OF_RANGE:
190 desc =
192 ("Number out of range (should be in byte range, 0 <= n <= 0xFF, expressed in hex)");
193 break;
194 case MC_SEARCH_HEX_E_INVALID_CHARACTER:
195 desc = _("Invalid character");
196 break;
197 case MC_SEARCH_HEX_E_UNMATCHED_QUOTES:
198 desc = _("Unmatched quotes character");
199 break;
200 default:
201 desc = "";
204 lc_mc_search->error = MC_SEARCH_E_INPUT;
205 lc_mc_search->error_str =
206 g_strdup_printf (_("Hex pattern error at position %d:\n%s."), error_pos + 1, desc);
210 /* --------------------------------------------------------------------------------------------- */
212 gboolean
213 mc_search__run_hex (mc_search_t * lc_mc_search, const void *user_data,
214 gsize start_search, gsize end_search, gsize * found_len)
216 return mc_search__run_regex (lc_mc_search, user_data, start_search, end_search, found_len);
219 /* --------------------------------------------------------------------------------------------- */
221 GString *
222 mc_search_hex_prepare_replace_str (mc_search_t * lc_mc_search, GString * replace_str)
224 (void) lc_mc_search;
225 return g_string_new_len (replace_str->str, replace_str->len);
228 /* --------------------------------------------------------------------------------------------- */