1 From: Tony Balinski <ajbj@free.fr>
2 Subject: Unlimited macro string literal length and single-quoted strings
6 http://sourceforge.net/tracker/?func=detail&atid=311005&aid=1598271&group_id=11005
7 [ 1598271 ] Unlimited macro string literal length, single-quoted strings
8 macroStringLiterals.diff 2006-11-21
10 String literals are scanned twice, firstly to calculate their space
11 requirements, secondly to read their contents into allocated memory.
13 Separate string literals that follow one another are combined into one,
14 avoiding run-time concatenation of the pieces. Also single-quoted string
15 literals are allowed, within which backslash ('\') has no special meaning
16 (so you can't include a single-quote in a single-quoted string).
18 Note that a double-quoted string can be continued over multiple lines by
19 ending each line but the last with a backslash, like in C.
23 Fixed adjacent string literal merging to allow concatenation with "".
27 doc/help.etx | 41 +++++---
28 source/parse.y | 291 ++++++++++++++++++++++++++++++++++-----------------------
29 2 files changed, 206 insertions(+), 126 deletions(-)
31 diff --quilt old/doc/help.etx new/doc/help.etx
34 @@ -1947,11 +1947,12 @@ Macro Language
36 Blank lines and comments are also allowed. Comments begin with a "#" and end
37 with a newline, and can appear either on a line by themselves, or at the end
39 + of a statement line.
41 Statements which are too long to fit on a single line may be split across
42 several lines, by placing a backslash "\" character at the end of each line
44 + to be continued. Note that a comment with a backslash at the end is treated
45 + as a continuation in this way too.
49 @@ -1973,12 +1974,14 @@ Macro Language
51 4>Character String Constants
53 - Character string constants are enclosed in double quotes. For example:
54 + Character string constants are enclosed in single or double quotes, but the
55 + start and end quotes must be the same character. For example:
58 - dialog("Hi there!", "OK")
59 + dialog('Hi there!', "OK")
61 - Strings may also include C-language style escape sequences:
62 + A double-quoted string may also include C-language style escape
65 \\ Backslash \t Tab \f Form feed
66 \" Double quote \b Backspace \a Alert
67 @@ -1996,11 +1999,11 @@ Macro Language
68 t_print("a = " a "\n")
70 Other characters can be expressed as backslash-escape sequences in macro
71 - strings. The format is the same as for regular expressions, described in the
72 - paragraphs headed "Octal and Hex Escape Sequences" of the section
73 - "Metacharacters_", except that an octal escape sequence can start with any
74 - octal digit, not just 0, so the single character string "\0033" is the same
75 - as "\33", "\x1B" and "\e" (for an ASCII version of NEdit).
76 + double-quoted strings. The format is the same as for regular expressions,
77 + described in the paragraphs headed "Octal and Hex Escape Sequences" of the
78 + section "Metacharacters_", except that an octal escape sequence can start with
79 + any octal digit, not just 0, so the single character string "\0033" is the
80 + same as "\33", "\x1B" and "\e" (for an ASCII version of NEdit).
82 Note that if you want to define a regular expression in a macro string,
83 you need to "double-up" the backslashes for the metacharacters with
84 @@ -2009,7 +2012,7 @@ Macro Language
85 (?N(\s|/\*(?n(?:(?!\*/).)*)\*/|//.*\n|\n)+)
87 which matches whitespace or C/C++/Java-style comments, should be written as
89 + a macro double-quoted string as
91 "(?N(\\s|/\\*(?n(?:(?!\\*/).)*)\\*/|//.*\n|\n)+)"
93 @@ -2018,6 +2021,22 @@ Macro Language
94 also interpret the sequence "\\n" as a newline, although the macro string here
95 would then contain a literal backslash followed by a lowercase `N'.)
97 + Alternatively, if you don't need special escapes or a single quote
98 + (apostrophe) in your string (true for this example), just turn the expression
99 + into a single-quoted string, as
101 + '(?N(\s|/\*(?n(?:(?!\*/).)*)\*/|//.*\n|\n)+)'
103 + Neighboring string literals (separated by whitespace or line continuations)
104 + are combined, as if by the concatenation operation before use. For example
106 + "The backslash '" '\' "' is an " \
107 + 'escape only in "double-quoted" strings' "\n"
109 + is treated as a single string ending with a newline character, looking like
111 + The backslash '\' is an escape only in "double-quoted" strings
116 diff --quilt old/source/parse.y new/source/parse.y
117 --- old/source/parse.y
118 +++ new/source/parse.y
119 @@ -69,6 +69,7 @@ static int follow(char expect, int yes,
120 static int follow2(char expect1, int yes1, char expect2, int yes2, int no);
121 static int follow_non_whitespace(char expect, int yes, int no);
122 static Symbol *matchesActionRoutine(char **inPtr);
123 +static int scanString(void);
127 @@ -675,12 +676,6 @@ static int yylex(void)
130 static DataValue value = {NO_TAG, {0}};
131 - static char escape[] = "\\\"ntbrfave";
132 -#ifdef EBCDIC_CHARSET
133 - static char replace[] = "\\\"\n\t\b\r\f\a\v\x27"; /* EBCDIC escape */
135 - static char replace[] = "\\\"\n\t\b\r\f\a\v\x1B"; /* ASCII escape */
140 @@ -739,115 +734,10 @@ static int yylex(void)
144 - /* Process quoted strings with embedded escape sequences:
145 - For backslashes we recognise hexadecimal values with initial 'x' such
146 - as "\x1B"; octal value (upto 3 oct digits with a possible leading zero)
147 - such as "\33", "\033" or "\0033", and the C escapes: \", \', \n, \t, \b,
148 - \r, \f, \a, \v, and the added \e for the escape character, as for REs.
149 - Disallow hex/octal zero values (NUL): instead ignore the introductory
150 - backslash, eg "\x0xyz" becomes "x0xyz" and "\0000hello" becomes
153 - if (*InPtr == '\"') {
154 - char string[MAX_STRING_CONST_LEN], *p = string;
157 - while (*InPtr != '\0' && *InPtr != '\"' && *InPtr != '\n') {
158 - if (p >= string + MAX_STRING_CONST_LEN) {
162 - if (*InPtr == '\\') {
165 - if (*InPtr == '\n') {
169 - if (*InPtr == 'x') {
170 - /* a hex introducer */
172 - const char *hexDigits = "0123456789abcdef";
175 - if (*InPtr == '\0' ||
176 - (hexD = strchr(hexDigits, tolower(*InPtr))) == NULL) {
180 - hexValue = hexD - hexDigits;
182 - /* now do we have another digit? only accept one more */
183 - if (*InPtr != '\0' &&
184 - (hexD = strchr(hexDigits,tolower(*InPtr))) != NULL){
185 - hexValue = hexD - hexDigits + (hexValue << 4);
188 - if (hexValue != 0) {
189 - *p++ = (char)hexValue;
192 - InPtr = backslash + 1; /* just skip the backslash */
197 - /* the RE documentation requires \0 as the octal introducer;
198 - here you can start with any octal digit, but you are only
199 - allowed up to three (or four if the first is '0'). */
200 - if ('0' <= *InPtr && *InPtr <= '7') {
201 - if (*InPtr == '0') {
202 - InPtr++; /* octal introducer: don't count this digit */
204 - if ('0' <= *InPtr && *InPtr <= '7') {
205 - /* treat as octal - first digit */
206 - char octD = *InPtr++;
207 - int octValue = octD - '0';
208 - if ('0' <= *InPtr && *InPtr <= '7') {
211 - octValue = (octValue << 3) + octD - '0';
212 - /* now do we have another digit? can we add it?
213 - if value is going to be too big for char (greater
214 - than 0377), stop converting now before adding the
216 - if ('0' <= *InPtr && *InPtr <= '7' &&
218 - /* third digit is acceptable */
220 - octValue = (octValue << 3) + octD - '0';
223 - if (octValue != 0) {
224 - *p++ = (char)octValue;
227 - InPtr = backslash + 1; /* just skip the backslash */
230 - else { /* \0 followed by non-digits: go back to 0 */
231 - InPtr = backslash + 1; /* just skip the backslash */
235 - for (i=0; escape[i]!='\0'; i++) {
236 - if (escape[i] == *InPtr) {
242 - /* if we get here, we didn't recognise the character after
243 - the backslash: just copy it next time round the loop */
251 - yylval.sym = InstallStringConstSymbol(string);
253 + /* Process quoted strings */
255 + if (*InPtr == '\"' || *InPtr == '\'') {
256 + return scanString();
259 /* process remaining two character tokens or return single char as token */
260 @@ -951,6 +841,177 @@ static Symbol *matchesActionRoutine(char
264 +** Process quoted string literals. These can be in single or double quotes.
265 +** A sequence of string literals separated by whitespace (see skipWhitespace())
266 +** are read as a single string.
268 +** Double-quoted string literals allow embedded escape sequences:
269 +** For backslashes we recognise hexadecimal values with initial 'x' such
270 +** as "\x1B"; octal value (upto 3 oct digits with a possible leading zero)
271 +** such as "\33", "\033" or "\0033", and the C escapes: \", \', \n, \t, \b,
272 +** \r, \f, \a, \v, and the added \e for the escape character, as for REs.
273 +** We disallow hex/octal zero values (NUL): instead ignore the introductory
274 +** backslash, eg "\x0xyz" becomes "x0xyz" and "\0000hello" becomes "0000hello".
275 +** An escaped newline is elided, and the string content continues on the next
278 +static int scanString(void)
280 +# define SCANSTRING_WRITE_TO_STRING(p, len, val) \
281 + do { char mc = (val); if (p) { *p++ = mc; } else { ++len; } } while (0)
283 + /* scan the string twice: once to get its size, then again to build it */
284 + char *startPtr = InPtr;
285 + char *p = NULL, *string = NULL;
287 + char stopper, first_stopper = *startPtr;
289 + int handleBackslash;
291 + static char escape[] = "\\\"ntbrfave";
292 +#ifdef EBCDIC_CHARSET
293 + static char replace[] = "\\\"\n\t\b\r\f\a\v\x27"; /* EBCDIC escape */
295 + static char replace[] = "\\\"\n\t\b\r\f\a\v\x1B"; /* ASCII escape */
298 + if (first_stopper != '\"' && first_stopper != '\'')
299 + return yyerror("expected a string");
301 + for (scan = 0; scan < 2; ++scan)
304 + stopper = first_stopper;
305 + handleBackslash = (stopper == '\"');
308 + while (*InPtr != '\0' && *InPtr != '\n') {
309 + if (*InPtr == stopper) {
310 + char *endPtr = InPtr++;
312 + /* is this followed by another string literal? */
313 + if (*InPtr == '\"' || *InPtr == '\'') {
314 + stopper = *InPtr++; /* add it to the end of the first */
315 + handleBackslash = (stopper == '\"');
318 + InPtr = endPtr; /* no further string: restore position */
322 + else if (handleBackslash && *InPtr == '\\') {
325 + if (*InPtr == '\n') { /* allows newline to be skipped */
329 + if (*InPtr == 'x') {
330 + /* a hex introducer */
332 + const char *hexDigits = "0123456789abcdef";
335 + if (*InPtr == '\0')
337 + if ((hexD = strchr(hexDigits, tolower(*InPtr))) == NULL) {
338 + SCANSTRING_WRITE_TO_STRING(p, len, 'x');
341 + hexValue = hexD - hexDigits;
343 + if (*InPtr == '\0')
345 + /* now do we have another digit? only accept one more */
346 + if ((hexD = strchr(hexDigits,tolower(*InPtr))) != NULL){
347 + hexValue = hexD - hexDigits + (hexValue << 4);
350 + if (hexValue != 0) {
351 + SCANSTRING_WRITE_TO_STRING(p, len, (char)hexValue);
354 + InPtr = backslash + 1; /* just skip the backslash */
359 + /* the RE documentation requires \0 as the octal introducer;
360 + here you can start with any octal digit, but you are only
361 + allowed up to three (or four if the first is '0'). */
362 + if ('0' <= *InPtr && *InPtr <= '7') {
363 + if (*InPtr == '0') {
364 + InPtr++; /* octal introducer: don't count this digit */
366 + if ('0' <= *InPtr && *InPtr <= '7') {
367 + /* treat as octal - first digit */
368 + char octD = *InPtr++;
369 + int octValue = octD - '0';
370 + if ('0' <= *InPtr && *InPtr <= '7') {
373 + octValue = (octValue << 3) + octD - '0';
374 + /* now do we have another digit? can we add it?
375 + if value is going to be too big for char (greater
376 + than 0377), stop converting now before adding the
378 + if ('0' <= *InPtr && *InPtr <= '7' &&
380 + /* third digit is acceptable */
382 + octValue = (octValue << 3) + octD - '0';
385 + if (octValue != 0) {
386 + SCANSTRING_WRITE_TO_STRING(p, len, (char)octValue);
389 + InPtr = backslash + 1; /* just skip the backslash */
392 + else { /* \0 followed by non-digits: go back to 0 */
393 + InPtr = backslash + 1; /* just skip the backslash */
397 + /* check for a valid c-style escape character */
398 + for (i = 0; escape[i] != '\0'; i++) {
399 + if (escape[i] == *InPtr) {
400 + SCANSTRING_WRITE_TO_STRING(p, len, replace[i]);
405 + /* if we get here, we didn't recognise the character after
406 + the backslash: just copy it next time round the loop */
409 + SCANSTRING_WRITE_TO_STRING(p, len, *InPtr++);
412 + /* terminate the string content */
413 + SCANSTRING_WRITE_TO_STRING(p, len, '\0');
414 + if (*InPtr == stopper) {
416 + /* this was the size measurement and validation */
417 + p = string = AllocString(len);
420 + /* OK: string now contains our string text */
421 + InPtr++; /* skip past stopper */
422 + yylval.sym = InstallStringConstSymbol(string);
427 + /* failure: end quote doesn't match start quote */
431 + return yyerror("unterminated string");
435 ** Called by yacc to report errors (just stores for returning when
436 ** parsing is aborted. The error token action is to immediate abort
437 ** parsing, so this message is immediately reported to the caller