482504: Bad CapsLock grab on certain keyboard configurations
[nedit.git] / source / regexConvert.c
blob5ebe17595cba022f182b8b95daf65898308254b2
1 static const char CVSID[] = "$Id: regexConvert.c,v 1.5 2001/08/25 15:58:54 amai Exp $";
2 /*------------------------------------------------------------------------*
3 * `CompileRE', `ExecRE', and `ConvertSubstituteRE' -- regular expression parsing
5 * This is a HIGHLY ALTERED VERSION of Henry Spencer's `regcomp'
6 * code adapted for NEdit.
8 * .-------------------------------------------------------------------.
9 * | ORIGINAL COPYRIGHT NOTICE: |
10 * | |
11 * | Copyright (c) 1986 by University of Toronto. |
12 * | Written by Henry Spencer. Not derived from licensed software. |
13 * | |
14 * | Permission is granted to anyone to use this software for any |
15 * | purpose on any computer system, and to redistribute it freely, |
16 * | subject to the following restrictions: |
17 * | |
18 * | 1. The author is not responsible for the consequences of use of |
19 * | this software, no matter how awful, even if they arise |
20 * | from defects in it. |
21 * | |
22 * | 2. The origin of this software must not be misrepresented, either |
23 * | by explicit claim or by omission. |
24 * | |
25 * | 3. Altered versions must be plainly marked as such, and must not |
26 * | be misrepresented as being the original software. |
27 * `-------------------------------------------------------------------'
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <ctype.h>
34 #include <limits.h>
35 #include <X11/Intrinsic.h>
37 #include "regexConvert.h"
40 /* Utility definitions. */
42 #define NSUBEXP 50
44 #define CONVERT_FAIL(m) {*Error_Ptr = (m); return 0;}
45 #define IS_QUANTIFIER(c) ((c) == '*' || (c) == '+' || (c) == '?')
46 #define U_CHAR_AT(p) ((unsigned int) *(unsigned char *)(p))
48 /* Flags to be passed up and down via function parameters during compile. */
50 #define WORST 0 /* Worst case. No assumptions can be made.*/
51 #define HAS_WIDTH 1 /* Known never to match null string. */
52 #define SIMPLE 2 /* Simple enough to be STAR/PLUS operand. */
54 #define NO_PAREN 0 /* Only set by initial call to "chunk". */
55 #define PAREN 1 /* Used for normal capturing parentheses. */
57 #define REG_ZERO 0UL
58 #define REG_ONE 1UL
60 /* Global work variables for `ConvertRE'. */
62 static unsigned char *Reg_Parse; /* Input scan ptr (scans user's regex) */
63 static int Total_Paren; /* Parentheses, (), counter. */
64 static unsigned long Convert_Size; /* Address of this used as flag. */
65 static unsigned char *Code_Emit_Ptr; /* When Code_Emit_Ptr is set to
66 &Compute_Size no code is emitted.
67 Instead, the size of code that WOULD
68 have been generated is accumulated in
69 Convert_Size. Otherwise,
70 Code_Emit_Ptr points to where compiled
71 regex code is to be written. */
72 static unsigned char Compute_Size;
73 static char **Error_Ptr; /* Place to store error messages so
74 they can be returned by `ConvertRE' */
75 static char Error_Text [128];/* Sting to build error messages in. */
77 static unsigned char Meta_Char [] = ".*+?[(|)^<>$";
79 static unsigned char *Convert_Str;
81 /* Forward declarations for functions used by `ConvertRE'. */
83 static int alternative (int *flag_param);
84 static int chunk (int paren, int *flag_param);
85 static void emit_convert_byte (unsigned char c);
86 static unsigned char literal_escape (unsigned char c, int);
87 static int atom (int *flag_param);
88 static void reg_error (char *str);
89 static int piece (int *flag_param);
91 /*----------------------------------------------------------------------*
92 * ConvertRE
94 * Compiles a regular expression into the internal format used by
95 * `ExecRE'.
97 * Beware that the optimization and preparation code in here knows about
98 * some of the structure of the compiled regexp.
99 *----------------------------------------------------------------------*/
101 char * ConvertRE (const char *exp, char **errorText, char *cap_parens) {
103 int flags_local, pass;
105 /* Set up `errorText' to receive failure reports. */
107 Error_Ptr = errorText;
108 *Error_Ptr = "";
110 if (exp == NULL) CONVERT_FAIL ("NULL argument to `ConvertRE\'");
112 Code_Emit_Ptr = &Compute_Size;
113 Convert_Size = 0UL;
115 /* We can't allocate space until we know how big the compiled form will be,
116 but we can't compile it (and thus know how big it is) until we've got a
117 place to put the code. So we cheat: we compile it twice, once with code
118 generation turned off and size counting turned on, and once "for real".
119 This also means that we don't allocate space until we are sure that the
120 thing really will compile successfully, and we never have to move the
121 code and thus invalidate pointers into it. (Note that it has to be in
122 one piece because free() must be able to free it all.) */
124 for (pass = 1; pass <= 2; pass++) {
125 /*-------------------------------------------*
126 * FIRST PASS: Determine size and legality. *
127 * SECOND PASS: Emit converted code. *
128 *-------------------------------------------*/
130 Reg_Parse = (unsigned char *) exp;
131 Total_Paren = 1;
133 if (chunk (NO_PAREN, &flags_local) == 0) return (NULL); /* Something
134 went wrong */
135 emit_convert_byte ('\0');
137 if (pass == 1) {
138 /* Allocate memory. */
140 Convert_Str =
141 (unsigned char *) XtMalloc (sizeof (unsigned char) * Convert_Size);
143 if (Convert_Str == NULL) {
144 CONVERT_FAIL ("out of memory in `ConvertRE\'");
147 Code_Emit_Ptr = Convert_Str;
151 return (char *) Convert_Str;
154 /*----------------------------------------------------------------------*
155 * chunk *
157 * Process main body of regex or process a parenthesized "thing". *
159 * Caller must absorb opening parenthesis.
160 *----------------------------------------------------------------------*/
162 static int chunk (int paren, int *flag_param) {
164 register int this_branch;
165 int flags_local;
167 *flag_param = HAS_WIDTH; /* Tentatively. */
169 /* Make an OPEN node, if parenthesized. */
171 if (paren == PAREN) {
172 if (Total_Paren >= NSUBEXP) {
173 sprintf (Error_Text, "number of ()'s > %d", (int) NSUBEXP);
174 CONVERT_FAIL (Error_Text);
177 Total_Paren++;
180 /* Pick up the branches, linking them together. */
182 do {
183 this_branch = alternative (&flags_local);
185 if (this_branch == 0) return 0;
187 /* If any alternative could be zero width, consider the whole
188 parenthisized thing to be zero width. */
190 if (!(flags_local & HAS_WIDTH)) *flag_param &= ~HAS_WIDTH;
192 /* Are there more alternatives to process? */
194 if (*Reg_Parse != '|') break;
196 emit_convert_byte ('|');
198 Reg_Parse++;
199 } while (1);
201 /* Check for proper termination. */
203 if (paren != NO_PAREN && *Reg_Parse != ')') {
204 CONVERT_FAIL ("missing right parenthesis \')\'");
206 } else if (paren != NO_PAREN) {
207 emit_convert_byte (')');
208 Reg_Parse++;
210 } else if (paren == NO_PAREN && *Reg_Parse != '\0') {
211 if (*Reg_Parse == ')') {
212 CONVERT_FAIL ("missing left parenthesis \'(\'");
213 } else {
214 CONVERT_FAIL ("junk on end"); /* "Can't happen" - NOTREACHED */
218 return 1;
221 /*----------------------------------------------------------------------*
222 * alternative - Processes one alternative of an '|' operator.
223 *----------------------------------------------------------------------*/
225 static int alternative (int *flag_param) {
227 int ret_val;
228 int flags_local;
230 *flag_param = WORST; /* Tentatively. */
232 /* Loop until we hit the start of the next alternative, the end of this set
233 of alternatives (end of parentheses), or the end of the regex. */
235 while (*Reg_Parse != '|' && *Reg_Parse != ')' && *Reg_Parse != '\0') {
236 ret_val = piece (&flags_local);
238 if (ret_val == 0) return 0; /* Something went wrong. */
240 *flag_param |= flags_local & HAS_WIDTH;
243 return 1;
246 /*----------------------------------------------------------------------*
247 * piece - something followed by possible '*', '+', or '?'.
248 *----------------------------------------------------------------------*/
250 static int piece (int *flag_param) {
252 register int ret_val;
253 register unsigned char op_code;
254 unsigned long min_val = REG_ZERO;
255 int flags_local;
257 ret_val = atom (&flags_local);
259 if (ret_val == 0) return 0; /* Something went wrong. */
261 op_code = *Reg_Parse;
263 if (!IS_QUANTIFIER (op_code)) {
264 *flag_param = flags_local;
266 return (ret_val);
269 Reg_Parse++;
271 if (op_code == '+') min_val = REG_ONE;
273 /* It is dangerous to apply certain quantifiers to a possibly zero width
274 item. */
276 if (!(flags_local & HAS_WIDTH) && min_val > REG_ZERO) {
277 sprintf (Error_Text, "%c operand could be empty", op_code);
279 CONVERT_FAIL (Error_Text);
282 *flag_param = (min_val > REG_ZERO) ? (WORST | HAS_WIDTH) : WORST;
284 if ( !((op_code == '*') || (op_code == '+') || (op_code == '?')) ) {
285 /* We get here if the IS_QUANTIFIER macro is not coordinated properly
286 with this function. */
288 CONVERT_FAIL ("internal error #2, `piece\'");
291 if (IS_QUANTIFIER (*Reg_Parse)) {
292 sprintf (Error_Text, "nested quantifiers, %c%c", op_code, *Reg_Parse);
294 CONVERT_FAIL (Error_Text);
297 emit_convert_byte (op_code);
299 return (ret_val);
302 /*----------------------------------------------------------------------*
303 * atom - Process one regex item at the lowest level
304 *----------------------------------------------------------------------*/
306 static int atom (int *flag_param) {
307 int ret_val = 1;
308 unsigned char test;
309 int flags_local;
311 *flag_param = WORST; /* Tentatively. */
313 switch (*Reg_Parse++) {
314 case '^':
315 emit_convert_byte ('^');
316 break;
318 case '$':
319 emit_convert_byte ('$');
320 break;
322 case '<':
323 emit_convert_byte ('<');
324 break;
326 case '>':
327 emit_convert_byte ('>');
328 break;
330 case '.':
331 emit_convert_byte ('.');
333 *flag_param |= (HAS_WIDTH | SIMPLE); break;
335 case '(':
336 emit_convert_byte ('(');
338 ret_val = chunk (PAREN, &flags_local);
340 if (ret_val == 0) return 0; /* Something went wrong. */
342 /* Add HAS_WIDTH flag if it was set by call to chunk. */
344 *flag_param |= flags_local & HAS_WIDTH;
346 break;
348 case '\0':
349 case '|':
350 case ')':
351 CONVERT_FAIL ("internal error #3, `atom\'"); /* Supposed to be */
352 /* caught earlier. */
353 case '?':
354 case '+':
355 case '*':
356 sprintf (Error_Text, "%c follows nothing", *(Reg_Parse - 1));
357 CONVERT_FAIL (Error_Text);
359 case '{':
360 emit_convert_byte ('\\'); /* Quote braces. */
361 emit_convert_byte ('{');
363 break;
365 case '[':
367 register unsigned int last_value;
368 unsigned char last_emit = 0;
369 unsigned char buffer [500];
370 int head = 0;
371 int negated = 0;
372 int do_brackets = 1;
373 int a_z_flag = 0;
374 int A_Z_flag = 0;
375 int zero_nine = 0;
376 int u_score_flag = 0;
378 buffer [0] = '\0';
380 /* Handle characters that can only occur at the start of a class. */
382 if (*Reg_Parse == '^') { /* Complement of range. */
383 negated = 1;
385 Reg_Parse++;
388 if (*Reg_Parse == ']' || *Reg_Parse == '-') {
389 /* If '-' or ']' is the first character in a class,
390 it is a literal character in the class. */
392 last_emit = *Reg_Parse;
394 if (head >= 498) {
395 CONVERT_FAIL ("too much data in [] to convert.");
398 buffer [head++] = '\\'; /* Escape `]' and '-' for clarity. */
399 buffer [head++] = *Reg_Parse;
401 Reg_Parse++;
404 /* Handle the rest of the class characters. */
406 while (*Reg_Parse != '\0' && *Reg_Parse != ']') {
407 if (*Reg_Parse == '-') { /* Process a range, e.g [a-z]. */
408 Reg_Parse++;
410 if (*Reg_Parse == ']' || *Reg_Parse == '\0') {
411 /* If '-' is the last character in a class it is a literal
412 character. If `Reg_Parse' points to the end of the
413 regex string, an error will be generated later. */
415 last_emit = '-';
417 if (head >= 498) {
418 CONVERT_FAIL ("too much data in [] to convert.");
421 buffer [head++] = '\\'; /* Escape '-' for clarity. */
422 buffer [head++] = '-';
424 } else {
425 if (*Reg_Parse == '\\') {
426 /* Handle escaped characters within a class range. */
428 Reg_Parse++;
430 if ((test = literal_escape (*Reg_Parse, 0))) {
432 buffer [head++] = '-';
434 if (*Reg_Parse != '\"') {
435 emit_convert_byte ('\\');
438 buffer [head++] = *Reg_Parse;
439 last_value = (unsigned int) test;
440 } else {
441 sprintf (
442 Error_Text,
443 "\\%c is an invalid escape sequence(3)",
444 *Reg_Parse);
446 CONVERT_FAIL (Error_Text);
448 } else {
449 last_value = U_CHAR_AT (Reg_Parse);
451 if (last_emit == '0' && last_value == '9') {
452 zero_nine = 1;
453 head--;
454 } else if (last_emit == 'a' && last_value == 'z') {
455 a_z_flag = 1;
456 head--;
457 } else if (last_emit == 'A' && last_value == 'Z') {
458 A_Z_flag = 1;
459 head--;
460 } else {
461 buffer [head++] = '-';
463 if ((test = literal_escape (*Reg_Parse, 1))) {
464 /* Ordinary character matches an escape sequence;
465 convert it to the escape sequence. */
467 if (head >= 495) {
468 CONVERT_FAIL (
469 "too much data in [] to convert.");
472 buffer [head++] = '\\';
474 if (test == '0') { /* Make octal escape. */
475 test = *Reg_Parse;
476 buffer [head++] = '0';
477 buffer [head++] = ('0' + (test / 64));
478 test -= (test / 64) * 64;
479 buffer [head++] = ('0' + (test / 8));
480 test -= (test / 8) * 8;
481 buffer [head++] = ('0' + test);
482 } else {
483 buffer [head++] = test;
485 } else {
486 buffer [head++] = last_value;
491 if (last_emit > last_value) {
492 CONVERT_FAIL ("invalid [] range");
495 last_emit = (unsigned char) last_value;
497 Reg_Parse++;
499 } /* End class character range code. */
500 } else if (*Reg_Parse == '\\') {
501 Reg_Parse++;
503 if ((test = literal_escape (*Reg_Parse, 0)) != '\0') {
504 last_emit = test;
506 if (head >= 498) {
507 CONVERT_FAIL ("too much data in [] to convert.");
510 if (*Reg_Parse != '\"') {
511 buffer [head++] = '\\';
514 buffer [head++] = *Reg_Parse;
516 } else {
517 sprintf (Error_Text,
518 "\\%c is an invalid escape sequence(1)",
519 *Reg_Parse);
521 CONVERT_FAIL (Error_Text);
524 Reg_Parse++;
526 /* End of class escaped sequence code */
527 } else {
528 last_emit = *Reg_Parse;
530 if (*Reg_Parse == '_') {
531 u_score_flag = 1; /* Emit later if we can't do `\w'. */
533 } else if ((test = literal_escape (*Reg_Parse, 1))) {
534 /* Ordinary character matches an escape sequence;
535 convert it to the escape sequence. */
537 if (head >= 495) {
538 CONVERT_FAIL ("too much data in [] to convert.");
541 buffer [head++] = '\\';
543 if (test == '0') { /* Make octal escape. */
544 test = *Reg_Parse;
545 buffer [head++] = '0';
546 buffer [head++] = ('0' + (test / 64));
547 test -= (test / 64) * 64;
548 buffer [head++] = ('0' + (test / 8));
549 test -= (test / 8) * 8;
550 buffer [head++] = ('0' + test);
551 } else {
552 if (head >= 499) {
553 CONVERT_FAIL ("too much data in [] to convert.");
556 buffer [head++] = test;
558 } else {
559 if (head >= 499) {
560 CONVERT_FAIL ("too much data in [] to convert.");
563 buffer [head++] = *Reg_Parse;
566 Reg_Parse++;
568 } /* End of while (*Reg_Parse != '\0' && *Reg_Parse != ']') */
570 if (*Reg_Parse != ']') CONVERT_FAIL ("missing right \']\'");
572 buffer [head] = '\0';
574 /* NOTE: it is impossible to specify an empty class. This is
575 because [] would be interpreted as "begin character class"
576 followed by a literal ']' character and no "end character class"
577 delimiter (']'). Because of this, it is always safe to assume
578 that a class HAS_WIDTH. */
580 Reg_Parse++; *flag_param |= HAS_WIDTH | SIMPLE;
582 if (head == 0) {
583 if (( a_z_flag && A_Z_flag && zero_nine && u_score_flag) ||
584 ( a_z_flag && A_Z_flag && !zero_nine && !u_score_flag) ||
585 (!a_z_flag && !A_Z_flag && zero_nine && !u_score_flag)) {
587 do_brackets = 0;
591 if (do_brackets) {
592 emit_convert_byte ('[');
593 if (negated) emit_convert_byte ('^');
596 /* Output any shortcut escapes if we can. */
598 while (a_z_flag || A_Z_flag || zero_nine || u_score_flag) {
599 if (a_z_flag && A_Z_flag && zero_nine && u_score_flag) {
600 emit_convert_byte ('\\');
602 if (negated && !do_brackets) {
603 emit_convert_byte ('W');
604 } else {
605 emit_convert_byte ('w');
608 a_z_flag = A_Z_flag = zero_nine = u_score_flag = 0;
609 } else if (a_z_flag && A_Z_flag) {
610 emit_convert_byte ('\\');
612 if (negated && !do_brackets) {
613 emit_convert_byte ('L');
614 } else {
615 emit_convert_byte ('l');
618 a_z_flag = A_Z_flag = 0;
619 } else if (zero_nine) {
620 emit_convert_byte ('\\');
622 if (negated && !do_brackets) {
623 emit_convert_byte ('D');
624 } else {
625 emit_convert_byte ('d');
628 zero_nine = 0;
629 } else if (a_z_flag) {
630 emit_convert_byte ('a');
631 emit_convert_byte ('-');
632 emit_convert_byte ('z');
634 a_z_flag = 0;
635 } else if (A_Z_flag) {
636 emit_convert_byte ('A');
637 emit_convert_byte ('-');
638 emit_convert_byte ('Z');
640 A_Z_flag = 0;
641 } else if (u_score_flag) {
642 emit_convert_byte ('_');
644 u_score_flag = 0;
648 /* Output our buffered class characters. */
650 for (head = 0; buffer [head] != '\0'; head++) {
651 emit_convert_byte (buffer [head]);
654 if (do_brackets) {
655 emit_convert_byte (']');
659 break; /* End of character class code. */
661 /* Fall through to Default case to handle literal escapes. */
663 default:
664 Reg_Parse--; /* If we fell through from the above code, we are now
665 pointing at the back slash (\) character. */
667 unsigned char *parse_save, *emit_save;
668 int emit_diff, len = 0;
670 /* Loop until we find a meta character or end of regex string. */
672 for (; *Reg_Parse != '\0' &&
673 !strchr ((char *) Meta_Char, (int) *Reg_Parse);
674 len++) {
676 /* Save where we are in case we have to back
677 this character out. */
679 parse_save = Reg_Parse;
680 emit_save = Code_Emit_Ptr;
682 if (*Reg_Parse == '\\') {
683 if ((test = literal_escape (*(Reg_Parse + 1), 0))) {
684 if (*(Reg_Parse + 1) != '\"') {
685 emit_convert_byte ('\\');
688 Reg_Parse++; /* Point to escaped character */
689 emit_convert_byte (*Reg_Parse);
691 } else {
692 sprintf (Error_Text,
693 "\\%c is an invalid escape sequence(2)",
694 *(Reg_Parse + 1));
696 CONVERT_FAIL (Error_Text);
699 Reg_Parse++;
700 } else {
701 /* Ordinary character */
703 if ((test = literal_escape (*Reg_Parse, 1))) {
704 /* Ordinary character matches an escape sequence;
705 convert it to the escape sequence. */
707 emit_convert_byte ('\\');
709 if (test == '0') {
710 test = *Reg_Parse;
711 emit_convert_byte ('0');
712 emit_convert_byte ('0' + (test / 64));
713 test -= (test / 64) * 64;
714 emit_convert_byte ('0' + (test / 8));
715 test -= (test / 8) * 8;
716 emit_convert_byte ('0' + test);
717 } else {
718 emit_convert_byte (test);
720 } else {
721 emit_convert_byte (*Reg_Parse);
724 Reg_Parse++;
727 /* If next regex token is a quantifier (?, +. *, or {m,n}) and
728 our EXACTLY node so far is more than one character, leave the
729 last character to be made into an EXACTLY node one character
730 wide for the multiplier to act on. For example 'abcd* would
731 have an EXACTLY node with an 'abc' operand followed by a STAR
732 node followed by another EXACTLY node with a 'd' operand. */
734 if (IS_QUANTIFIER (*Reg_Parse) && len > 0) {
735 Reg_Parse = parse_save; /* Point to previous regex token. */
736 emit_diff = (Code_Emit_Ptr - emit_save);
738 if (Code_Emit_Ptr == &Compute_Size) {
739 Convert_Size -= emit_diff;
740 } else { /* Write over previously emitted byte. */
741 Code_Emit_Ptr = emit_save;
744 break;
748 if (len <= 0) CONVERT_FAIL ("internal error #4, `atom\'");
750 *flag_param |= HAS_WIDTH;
752 if (len == 1) *flag_param |= SIMPLE;
754 } /* END switch (*Reg_Parse++) */
756 return (ret_val);
759 /*----------------------------------------------------------------------*
760 * emit_convert_byte
762 * Emit (if appropriate) a byte of converted code.
763 *----------------------------------------------------------------------*/
765 static void emit_convert_byte (unsigned char c) {
767 if (Code_Emit_Ptr == &Compute_Size) {
768 Convert_Size++;
769 } else {
770 *Code_Emit_Ptr++ = c;
774 /*--------------------------------------------------------------------*
775 * literal_escape
777 * Recognize escaped literal characters (prefixed with backslash),
778 * and translate them into the corresponding character.
780 * Returns the proper character value or NULL if not a valid literal
781 * escape.
782 *--------------------------------------------------------------------*/
784 static unsigned char literal_escape (unsigned char c, int action) {
786 static unsigned char control_escape [] = {
787 'a', 'b',
788 'e',
789 'f', 'n', 'r', 't', 'v', '\0'
792 static unsigned char control_actual [] = {
793 '\a', '\b',
794 #ifdef EBCDIC_CHARSET
795 0x27, /* Escape character in IBM's EBCDIC character set. */
796 #else
797 0x1B, /* Escape character in ASCII character set. */
798 #endif
799 '\f', '\n', '\r', '\t', '\v', '\0'
802 static unsigned char valid_escape [] = {
803 'a', 'b', 'f', 'n', 'r', 't', 'v', '(', ')', '[',
804 ']', '<', '>', '.', '\\', '|', '^', '$', '*', '+',
805 '?', '&', '\"', '\0'
808 static unsigned char value [] = {
809 '\a', '\b', '\f', '\n', '\r', '\t', '\v', '(', ')', '[',
810 ']', '<', '>', '.', '\\', '|', '^', '$', '*', '+',
811 '?', '&', '\"', '\0'
814 int i;
816 if (action == 0) {
817 for (i = 0; valid_escape [i] != '\0'; i++) {
818 if (c == valid_escape [i]) return value [i];
820 } else if (action == 1) {
821 for (i = 0; control_actual [i] != '\0'; i++) {
822 if (c == control_actual [i]) {
823 return control_escape [i];
828 if (action == 1) {
829 if (!isprint (c)) {
830 /* Signal to generate an numeric (octal) escape. */
831 return '0';
835 return 0;
838 /*----------------------------------------------------------------------*
839 * ConvertSubstituteRE - Perform substitutions after a `regexp' match.
840 *----------------------------------------------------------------------*/
842 void ConvertSubstituteRE (
843 const char *source,
844 char *dest,
845 int max) {
847 register unsigned char *src;
848 register unsigned char *dst;
849 register unsigned char c;
850 register unsigned char test;
852 if (source == NULL || dest == NULL) {
853 reg_error ("NULL parm to `ConvertSubstituteRE\'");
855 return;
858 src = (unsigned char *) source;
859 dst = (unsigned char *) dest;
861 while ((c = *src++) != '\0') {
863 if (c == '\\') {
864 /* Process any case altering tokens, i.e \u, \U, \l, \L. */
866 if (*src == 'u' || *src == 'U' || *src == 'l' || *src == 'L') {
867 *dst++ = '\\';
868 c = *src++;
869 *dst++ = c;
871 if (c == '\0') {
872 break;
873 } else {
874 c = *src++;
879 if (c == '&') {
880 *dst++ = '&';
882 } else if (c == '\\') {
883 if (*src == '0') {
884 /* Convert `\0' to `&' */
886 *dst++ = '&'; src++;
888 } else if ('1' <= *src && *src <= '9') {
889 *dst++ = '\\';
890 *dst++ = *src++;
892 } else if ((test = literal_escape (*src, 0)) != '\0') {
893 *dst++ = '\\';
894 *dst++ = *src++;
896 } else if (*src == '\0') {
897 /* If '\' is the last character of the replacement string, it is
898 interpreted as a literal backslash. */
900 *dst++ = '\\';
901 } else {
902 /* Old regex's allowed any escape sequence. Convert these to
903 unescaped characters that replace themselves; i.e. they don't
904 need to be escaped. */
906 *dst++ = *src++;
908 } else {
909 /* Ordinary character. */
911 if (((char *) dst - (char *) dest) >= (max - 1)) {
912 break;
913 } else {
914 if ((test = literal_escape (c, 1))) {
915 /* Ordinary character matches an escape sequence;
916 convert it to the escape sequence. */
918 *dst++ = '\\';
920 if (test == '0') { /* Make octal escape. */
921 test = c;
922 *dst++ = '0';
923 *dst++ = ('0' + (test / 64));
924 test -= (test / 64) * 64;
925 *dst++ = ('0' + (test / 8));
926 test -= (test / 8) * 8;
927 *dst++ = ('0' + test);
928 } else {
929 *dst++ = test;
932 } else {
933 *dst++ = c;
939 *dst = '\0';
942 /*----------------------------------------------------------------------*
943 * reg_error
944 *----------------------------------------------------------------------*/
946 static void reg_error (char *str) {
948 fprintf (
949 stderr,
950 "NEdit: Internal error processing regular expression (%s)\n",
951 str);