*** empty log message ***
[anjuta-git-plugin.git] / scintilla / LexPerl.cxx
blob67d60fc1ae8f5a290718a732147f04c950f0d706
1 // Scintilla source code edit control
2 /** @file LexPerl.cxx
3 ** Lexer for subset of Perl.
4 **/
5 // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my> 2003-2004
6 // Copyright 1998-2004 by Neil Hodgson <neilh@scintilla.org>
7 // The License.txt file describes the conditions under which this software may be distributed.
9 #include <stdlib.h>
10 #include <string.h>
11 #include <ctype.h>
12 #include <stdio.h>
13 #include <stdarg.h>
15 #include "Platform.h"
17 #include "PropSet.h"
18 #include "Accessor.h"
19 #include "KeyWords.h"
20 #include "Scintilla.h"
21 #include "SciLexer.h"
23 #define PERLNUM_DECIMAL 1
24 #define PERLNUM_NON_DEC 2
25 #define PERLNUM_FLOAT 3
26 #define PERLNUM_VECTOR 4
27 #define PERLNUM_V_VECTOR 5
29 #define HERE_DELIM_MAX 256
31 static inline bool isEOLChar(char ch) {
32 return (ch == '\r') || (ch == '\n');
35 static bool isSingleCharOp(char ch) {
36 char strCharSet[2];
37 strCharSet[0] = ch;
38 strCharSet[1] = '\0';
39 return (NULL != strstr("rwxoRWXOezsfdlpSbctugkTBMAC", strCharSet));
42 static inline bool isPerlOperator(char ch) {
43 if (ch == '^' || ch == '&' || ch == '\\' ||
44 ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
45 ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
46 ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
47 ch == '>' || ch == ',' ||
48 ch == '?' || ch == '!' || ch == '.' || ch == '~')
49 return true;
50 // these chars are already tested before this call
51 // ch == '%' || ch == '*' || ch == '<' || ch == '/' ||
52 return false;
55 static int classifyWordPerl(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
56 char s[100];
57 for (unsigned int i = 0; i < end - start + 1 && i < 30; i++) {
58 s[i] = styler[start + i];
59 s[i + 1] = '\0';
61 char chAttr = SCE_PL_IDENTIFIER;
62 if (keywords.InList(s))
63 chAttr = SCE_PL_WORD;
64 styler.ColourTo(end, chAttr);
65 return chAttr;
68 static inline bool isEndVar(char ch) {
69 return !isalnum(ch) && ch != '#' && ch != '$' &&
70 ch != '_' && ch != '\'';
74 static inline bool isNonQuote(char ch) {
75 return isalnum(ch) || ch == '_';
78 static inline char actualNumStyle(int numberStyle) {
79 switch (numberStyle) {
80 case PERLNUM_VECTOR:
81 case PERLNUM_V_VECTOR:
82 return SCE_PL_STRING;
83 case PERLNUM_DECIMAL:
84 case PERLNUM_NON_DEC:
85 case PERLNUM_FLOAT:
86 default:
87 return SCE_PL_NUMBER;
91 static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
92 if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
93 return false;
95 while (*val) {
96 if (*val != styler[pos++]) {
97 return false;
99 val++;
101 return true;
104 static char opposite(char ch) {
105 if (ch == '(')
106 return ')';
107 if (ch == '[')
108 return ']';
109 if (ch == '{')
110 return '}';
111 if (ch == '<')
112 return '>';
113 return ch;
116 static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
117 WordList *keywordlists[], Accessor &styler) {
119 // Lexer for perl often has to backtrack to start of current style to determine
120 // which characters are being used as quotes, how deeply nested is the
121 // start position and what the termination string is for here documents
123 WordList &keywords = *keywordlists[0];
125 class HereDocCls {
126 public:
127 int State; // 0: '<<' encountered
128 // 1: collect the delimiter
129 // 2: here doc text (lines after the delimiter)
130 char Quote; // the char after '<<'
131 bool Quoted; // true if Quote in ('\'','"','`')
132 int DelimiterLength; // strlen(Delimiter)
133 char *Delimiter; // the Delimiter, 256: sizeof PL_tokenbuf
134 HereDocCls() {
135 State = 0;
136 DelimiterLength = 0;
137 Delimiter = new char[HERE_DELIM_MAX];
138 Delimiter[0] = '\0';
140 ~HereDocCls() {
141 delete []Delimiter;
144 HereDocCls HereDoc; // TODO: FIFO for stacked here-docs
146 class QuoteCls {
147 public:
148 int Rep;
149 int Count;
150 char Up;
151 char Down;
152 QuoteCls() {
153 this->New(1);
155 void New(int r) {
156 Rep = r;
157 Count = 0;
158 Up = '\0';
159 Down = '\0';
161 void Open(char u) {
162 Count++;
163 Up = u;
164 Down = opposite(Up);
167 QuoteCls Quote;
169 int state = initStyle;
170 char numState = PERLNUM_DECIMAL;
171 int dotCount = 0;
172 unsigned int lengthDoc = startPos + length;
173 //int sookedpos = 0; // these have no apparent use, see POD state
174 //char sooked[100];
175 //sooked[sookedpos] = '\0';
177 // If in a long distance lexical state, seek to the beginning to find quote characters
178 // Perl strings can be multi-line with embedded newlines, so backtrack.
179 // Perl numbers have additional state during lexing, so backtrack too.
180 if (state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX) {
181 while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_PL_HERE_DELIM)) {
182 startPos--;
184 startPos = styler.LineStart(styler.GetLine(startPos));
185 state = styler.StyleAt(startPos - 1);
187 if ( state == SCE_PL_STRING_Q
188 || state == SCE_PL_STRING_QQ
189 || state == SCE_PL_STRING_QX
190 || state == SCE_PL_STRING_QR
191 || state == SCE_PL_STRING_QW
192 || state == SCE_PL_REGEX
193 || state == SCE_PL_REGSUBST
194 || state == SCE_PL_STRING
195 || state == SCE_PL_BACKTICKS
196 || state == SCE_PL_CHARACTER
197 || state == SCE_PL_NUMBER
198 || state == SCE_PL_IDENTIFIER
200 while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) {
201 startPos--;
203 state = SCE_PL_DEFAULT;
206 styler.StartAt(startPos);
207 char chPrev = styler.SafeGetCharAt(startPos - 1);
208 if (startPos == 0)
209 chPrev = '\n';
210 char chNext = styler[startPos];
211 styler.StartSegment(startPos);
213 for (unsigned int i = startPos; i < lengthDoc; i++) {
214 char ch = chNext;
215 // if the current character is not consumed due to the completion of an
216 // earlier style, lexing can be restarted via a simple goto
217 restartLexer:
218 chNext = styler.SafeGetCharAt(i + 1);
219 char chNext2 = styler.SafeGetCharAt(i + 2);
221 if (styler.IsLeadByte(ch)) {
222 chNext = styler.SafeGetCharAt(i + 2);
223 chPrev = ' ';
224 i += 1;
225 continue;
227 if ((chPrev == '\r' && ch == '\n')) { // skip on DOS/Windows
228 styler.ColourTo(i, state);
229 chPrev = ch;
230 continue;
233 if (HereDoc.State == 1 && isEOLChar(ch)) {
234 // Begin of here-doc (the line after the here-doc delimiter):
235 // Lexically, the here-doc starts from the next line after the >>, but the
236 // first line of here-doc seem to follow the style of the last EOL sequence
237 HereDoc.State = 2;
238 if (HereDoc.Quoted) {
239 if (state == SCE_PL_HERE_DELIM) {
240 // Missing quote at end of string! We are stricter than perl.
241 // Colour here-doc anyway while marking this bit as an error.
242 state = SCE_PL_ERROR;
244 styler.ColourTo(i - 1, state);
245 switch (HereDoc.Quote) {
246 case '\'':
247 state = SCE_PL_HERE_Q ;
248 break;
249 case '"':
250 state = SCE_PL_HERE_QQ;
251 break;
252 case '`':
253 state = SCE_PL_HERE_QX;
254 break;
256 } else {
257 styler.ColourTo(i - 1, state);
258 switch (HereDoc.Quote) {
259 case '\\':
260 state = SCE_PL_HERE_Q ;
261 break;
262 default :
263 state = SCE_PL_HERE_QQ;
268 if (state == SCE_PL_DEFAULT) {
269 if (isdigit(ch) || (isdigit(chNext) &&
270 (ch == '.' || ch == 'v'))) {
271 state = SCE_PL_NUMBER;
272 numState = PERLNUM_DECIMAL;
273 dotCount = 0;
274 if (ch == '0') { // hex,bin,octal
275 if (chNext == 'x' || chNext == 'b' || isdigit(chNext)) {
276 numState = PERLNUM_NON_DEC;
278 } else if (ch == 'v') { // vector
279 numState = PERLNUM_V_VECTOR;
281 } else if (iswordstart(ch)) {
282 if (chPrev == '>' && styler.SafeGetCharAt(i - 2) == '-') {
283 state = SCE_PL_IDENTIFIER; // part of "->" expr
284 if ((!iswordchar(chNext) && chNext != '\'')
285 || (chNext == '.' && chNext2 == '.')) {
286 // We need that if length of word == 1!
287 styler.ColourTo(i, SCE_PL_IDENTIFIER);
288 state = SCE_PL_DEFAULT;
290 } else if (ch == 's' && !isNonQuote(chNext)) {
291 state = SCE_PL_REGSUBST;
292 Quote.New(2);
293 } else if (ch == 'm' && !isNonQuote(chNext)) {
294 state = SCE_PL_REGEX;
295 Quote.New(1);
296 } else if (ch == 'q' && !isNonQuote(chNext)) {
297 state = SCE_PL_STRING_Q;
298 Quote.New(1);
299 } else if (ch == 'y' && !isNonQuote(chNext)) {
300 state = SCE_PL_REGSUBST;
301 Quote.New(2);
302 } else if (ch == 't' && chNext == 'r' && !isNonQuote(chNext2)) {
303 state = SCE_PL_REGSUBST;
304 Quote.New(2);
305 i++;
306 chNext = chNext2;
307 } else if (ch == 'q' && (chNext == 'q' || chNext == 'r' || chNext == 'w' || chNext == 'x') && !isNonQuote(chNext2)) {
308 if (chNext == 'q') state = SCE_PL_STRING_QQ;
309 else if (chNext == 'x') state = SCE_PL_STRING_QX;
310 else if (chNext == 'r') state = SCE_PL_STRING_QR;
311 else if (chNext == 'w') state = SCE_PL_STRING_QW;
312 i++;
313 chNext = chNext2;
314 Quote.New(1);
315 } else if (ch == 'x' && (chNext == '=' || // repetition
316 (chNext != '_' && !isalnum(chNext)) ||
317 (isdigit(chPrev) && isdigit(chNext)))) {
318 styler.ColourTo(i, SCE_PL_OPERATOR);
319 } else {
320 state = SCE_PL_WORD;
321 if ((!iswordchar(chNext) && chNext != '\'')
322 || (chNext == '.' && chNext2 == '.')) {
323 // We need that if length of word == 1!
324 // This test is copied from the SCE_PL_WORD handler.
325 classifyWordPerl(styler.GetStartSegment(), i, keywords, styler);
326 state = SCE_PL_DEFAULT;
329 } else if (ch == '#') {
330 state = SCE_PL_COMMENTLINE;
331 } else if (ch == '\"') {
332 state = SCE_PL_STRING;
333 Quote.New(1);
334 Quote.Open(ch);
335 } else if (ch == '\'') {
336 if (chPrev == '&') {
337 // Archaic call
338 styler.ColourTo(i, state);
339 } else {
340 state = SCE_PL_CHARACTER;
341 Quote.New(1);
342 Quote.Open(ch);
344 } else if (ch == '`') {
345 state = SCE_PL_BACKTICKS;
346 Quote.New(1);
347 Quote.Open(ch);
348 } else if (ch == '$') {
349 if ((chNext == '{') || isspacechar(chNext)) {
350 styler.ColourTo(i, SCE_PL_SCALAR);
351 } else {
352 state = SCE_PL_SCALAR;
353 if (chNext == '`' && chNext2 == '`') {
354 i += 2;
355 ch = styler.SafeGetCharAt(i);
356 chNext = styler.SafeGetCharAt(i + 1);
357 } else {
358 i++;
359 ch = chNext;
360 chNext = chNext2;
363 } else if (ch == '@') {
364 if (isalpha(chNext) || chNext == '#' || chNext == '$'
365 || chNext == '_' || chNext == '+') {
366 state = SCE_PL_ARRAY;
367 } else if (chNext != '{' && chNext != '[') {
368 styler.ColourTo(i, SCE_PL_ARRAY);
369 i++;
370 ch = ' ';
371 } else {
372 styler.ColourTo(i, SCE_PL_ARRAY);
374 } else if (ch == '%') {
375 if (isalpha(chNext) || chNext == '#' || chNext == '$' || chNext == '_') {
376 state = SCE_PL_HASH;
377 } else if (chNext == '{') {
378 styler.ColourTo(i, SCE_PL_HASH);
379 } else {
380 styler.ColourTo(i, SCE_PL_OPERATOR);
382 } else if (ch == '*') {
383 if (isalpha(chNext) || chNext == '_' || chNext == '{') {
384 state = SCE_PL_SYMBOLTABLE;
385 } else {
386 if (chNext == '*') { // exponentiation
387 i++;
388 ch = chNext;
389 chNext = chNext2;
391 styler.ColourTo(i, SCE_PL_OPERATOR);
393 } else if (ch == '/') {
394 // Explicit backward peeking to set a consistent preferRE for
395 // any slash found, so no longer need to track preferRE state.
396 // Find first previous significant lexed element and interpret.
397 bool preferRE = false;
398 unsigned int bk = (i > 0)? i - 1: 0;
399 char bkch;
400 styler.Flush();
401 while ((bk > 0) && (styler.StyleAt(bk) == SCE_PL_DEFAULT ||
402 styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) {
403 bk--;
405 if (bk == 0) {
406 // position 0 won't really be checked; rarely happens
407 // hard to fix due to an unsigned index i
408 preferRE = true;
409 } else {
410 int bkstyle = styler.StyleAt(bk);
411 bkch = styler.SafeGetCharAt(bk);
412 switch(bkstyle) {
413 case SCE_PL_OPERATOR:
414 preferRE = true;
415 if (bkch == ')' || bkch == ']') {
416 preferRE = false;
417 } else if (bkch == '}') {
418 // backtrack further, count balanced brace pairs
419 // if a brace pair found, see if it's a variable
420 int braceCount = 1;
421 while (--bk > 0) {
422 bkstyle = styler.StyleAt(bk);
423 if (bkstyle == SCE_PL_OPERATOR) {
424 bkch = styler.SafeGetCharAt(bk);
425 if (bkch == ';') { // early out
426 break;
427 } else if (bkch == '}') {
428 braceCount++;
429 } else if (bkch == '{') {
430 if (--braceCount == 0)
431 break;
435 if (bk == 0) {
436 // at beginning, true
437 } else if (braceCount == 0) {
438 // balanced { found, bk>0, skip more whitespace
439 if (styler.StyleAt(--bk) == SCE_PL_DEFAULT) {
440 while (bk > 0) {
441 bkstyle = styler.StyleAt(--bk);
442 if (bkstyle != SCE_PL_DEFAULT)
443 break;
446 bkstyle = styler.StyleAt(bk);
447 if (bkstyle == SCE_PL_SCALAR
448 || bkstyle == SCE_PL_ARRAY
449 || bkstyle == SCE_PL_HASH
450 || bkstyle == SCE_PL_SYMBOLTABLE
451 || bkstyle == SCE_PL_OPERATOR) {
452 preferRE = false;
456 break;
457 case SCE_PL_IDENTIFIER:
458 preferRE = true;
459 if (bkch == '>') { // inputsymbol
460 preferRE = false;
461 break;
463 // backtrack to find "->" or "::" before identifier
464 while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) {
465 bk--;
467 while (bk > 0) {
468 bkstyle = styler.StyleAt(bk);
469 if (bkstyle == SCE_PL_DEFAULT ||
470 bkstyle == SCE_PL_COMMENTLINE) {
471 } else if (bkstyle == SCE_PL_OPERATOR) {
472 // gcc 3.2.3 bloats if more compact form used
473 bkch = styler.SafeGetCharAt(bk);
474 if (bkch == '>') { // "->"
475 if (styler.SafeGetCharAt(bk - 1) == '-') {
476 preferRE = false;
477 break;
479 } else if (bkch == ':') { // "::"
480 if (styler.SafeGetCharAt(bk - 1) == ':') {
481 preferRE = false;
482 break;
485 } else {// bare identifier, usually a function call but Perl
486 // optimizes them as pseudo-constants, then the next
487 // '/' will be a divide; favour divide over regex
488 // if there is a whitespace after the '/'
489 if (isspacechar(chNext)) {
490 preferRE = false;
492 break;
494 bk--;
496 break;
497 // other styles uses the default, preferRE=false
498 case SCE_PL_WORD:
499 case SCE_PL_POD:
500 case SCE_PL_HERE_Q:
501 case SCE_PL_HERE_QQ:
502 case SCE_PL_HERE_QX:
503 preferRE = true;
504 break;
507 if (preferRE) {
508 state = SCE_PL_REGEX;
509 Quote.New(1);
510 Quote.Open(ch);
511 } else {
512 styler.ColourTo(i, SCE_PL_OPERATOR);
514 } else if (ch == '<' && chNext == '<') {
515 state = SCE_PL_HERE_DELIM;
516 HereDoc.State = 0;
517 } else if (ch == '<') {
518 // looks forward for matching > on same line
519 unsigned int fw = i + 1;
520 while (fw < lengthDoc) {
521 char fwch = styler.SafeGetCharAt(fw);
522 if (isEOLChar(fwch) || isspacechar(fwch))
523 break;
524 else if (fwch == '>') {
525 if ((fw - i) == 2 && // '<=>' case
526 styler.SafeGetCharAt(fw-1) == '=') {
527 styler.ColourTo(fw, SCE_PL_OPERATOR);
528 } else {
529 styler.ColourTo(fw, SCE_PL_IDENTIFIER);
531 i = fw;
532 ch = fwch;
533 chNext = styler.SafeGetCharAt(i+1);
535 fw++;
537 styler.ColourTo(i, SCE_PL_OPERATOR);
538 } else if (ch == '=' // POD
539 && isalpha(chNext)
540 && (isEOLChar(chPrev))) {
541 state = SCE_PL_POD;
542 //sookedpos = 0;
543 //sooked[sookedpos] = '\0';
544 } else if (ch == '-' // file test operators
545 && isSingleCharOp(chNext)
546 && !isalnum((chNext2 = styler.SafeGetCharAt(i+2)))) {
547 styler.ColourTo(i + 1, SCE_PL_WORD);
548 state = SCE_PL_DEFAULT;
549 i++;
550 ch = chNext;
551 chNext = chNext2;
552 } else if (isPerlOperator(ch)) {
553 if (ch == '.' && chNext == '.') { // .. and ...
554 i++;
555 if (chNext2 == '.') { i++; }
556 state = SCE_PL_DEFAULT;
557 ch = styler.SafeGetCharAt(i);
558 chNext = styler.SafeGetCharAt(i + 1);
560 styler.ColourTo(i, SCE_PL_OPERATOR);
561 } else {
562 // keep colouring defaults to make restart easier
563 styler.ColourTo(i, SCE_PL_DEFAULT);
565 } else if (state == SCE_PL_NUMBER) {
566 if (ch == '.') {
567 if (chNext == '.') {
568 // double dot is always an operator
569 goto numAtEnd;
570 } else if (numState == PERLNUM_NON_DEC || numState == PERLNUM_FLOAT) {
571 // non-decimal number or float exponent, consume next dot
572 styler.ColourTo(i - 1, SCE_PL_NUMBER);
573 styler.ColourTo(i, SCE_PL_OPERATOR);
574 state = SCE_PL_DEFAULT;
575 } else { // decimal or vectors allows dots
576 dotCount++;
577 if (numState == PERLNUM_DECIMAL) {
578 if (dotCount > 1) {
579 if (isdigit(chNext)) { // really a vector
580 numState = PERLNUM_VECTOR;
581 } else // number then dot
582 goto numAtEnd;
584 } else { // vectors
585 if (!isdigit(chNext)) // vector then dot
586 goto numAtEnd;
589 } else if (ch == '_' && numState == PERLNUM_DECIMAL) {
590 if (!isdigit(chNext)) {
591 goto numAtEnd;
593 } else if (isalnum(ch)) {
594 if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
595 if (isalpha(ch)) {
596 if (dotCount == 0) { // change to word
597 state = SCE_PL_WORD;
598 } else { // vector then word
599 goto numAtEnd;
602 } else if (numState == PERLNUM_DECIMAL) {
603 if (ch == 'E' || ch == 'e') { // exponent
604 numState = PERLNUM_FLOAT;
605 if (chNext == '+' || chNext == '-') {
606 i++;
607 ch = chNext;
608 chNext = chNext2;
610 } else if (!isdigit(ch)) { // number then word
611 goto numAtEnd;
613 } else if (numState == PERLNUM_FLOAT) {
614 if (!isdigit(ch)) { // float then word
615 goto numAtEnd;
617 } else {// (numState == PERLNUM_NON_DEC)
618 // allow alphanum for bin,hex,oct for now
620 } else {
621 // complete current number or vector
622 numAtEnd:
623 styler.ColourTo(i - 1, actualNumStyle(numState));
624 state = SCE_PL_DEFAULT;
625 goto restartLexer;
627 } else if (state == SCE_PL_WORD) {
628 if ((!iswordchar(chNext) && chNext != '\'')
629 || chNext == '.') {
630 // ".." is always an operator if preceded by a SCE_PL_WORD.
631 // "." never used in Perl variable names
632 // Archaic Perl has quotes inside names
633 if (isMatch(styler, lengthDoc, styler.GetStartSegment(), "__DATA__")
634 || isMatch(styler, lengthDoc, styler.GetStartSegment(), "__END__")) {
635 styler.ColourTo(i, SCE_PL_DATASECTION);
636 state = SCE_PL_DATASECTION;
637 } else {
638 classifyWordPerl(styler.GetStartSegment(), i, keywords, styler);
639 state = SCE_PL_DEFAULT;
640 ch = ' ';
643 } else if (state == SCE_PL_IDENTIFIER) {
644 if ((!iswordchar(chNext) && chNext != '\'')
645 || chNext == '.') {
646 styler.ColourTo(i, SCE_PL_IDENTIFIER);
647 state = SCE_PL_DEFAULT;
648 ch = ' ';
650 } else {
651 if (state == SCE_PL_COMMENTLINE) {
652 if (isEOLChar(ch)) {
653 styler.ColourTo(i - 1, state);
654 state = SCE_PL_DEFAULT;
655 goto restartLexer;
656 } else if (isEOLChar(chNext)) {
657 styler.ColourTo(i, state);
658 state = SCE_PL_DEFAULT;
660 } else if (state == SCE_PL_HERE_DELIM) {
662 // From perldata.pod:
663 // ------------------
664 // A line-oriented form of quoting is based on the shell ``here-doc''
665 // syntax.
666 // Following a << you specify a string to terminate the quoted material,
667 // and all lines following the current line down to the terminating
668 // string are the value of the item.
669 // The terminating string may be either an identifier (a word),
670 // or some quoted text.
671 // If quoted, the type of quotes you use determines the treatment of
672 // the text, just as in regular quoting.
673 // An unquoted identifier works like double quotes.
674 // There must be no space between the << and the identifier.
675 // (If you put a space it will be treated as a null identifier,
676 // which is valid, and matches the first empty line.)
677 // (This is deprecated, -w warns of this syntax)
678 // The terminating string must appear by itself (unquoted and with no
679 // surrounding whitespace) on the terminating line.
681 // From Bash info:
682 // ---------------
683 // Specifier format is: <<[-]WORD
684 // Optional '-' is for removal of leading tabs from here-doc.
685 // Whitespace acceptable after <<[-] operator.
687 if (HereDoc.State == 0) { // '<<' encountered
688 HereDoc.State = 1;
689 HereDoc.Quote = chNext;
690 HereDoc.Quoted = false;
691 HereDoc.DelimiterLength = 0;
692 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
693 if (chNext == '\'' || chNext == '"' || chNext == '`') { // a quoted here-doc delimiter
694 i++;
695 ch = chNext;
696 chNext = chNext2;
697 HereDoc.Quoted = true;
698 } else if (isalpha(chNext) || chNext == '_') {
699 // an unquoted here-doc delimiter, no special handling
700 } else if (isspacechar(chNext) || isdigit(chNext) || chNext == '\\'
701 || chNext == '=' || chNext == '$' || chNext == '@') {
702 // left shift << or <<= operator cases
703 styler.ColourTo(i, SCE_PL_OPERATOR);
704 state = SCE_PL_DEFAULT;
705 HereDoc.State = 0;
706 } else {
707 // symbols terminates; deprecated zero-length delimiter
710 } else if (HereDoc.State == 1) { // collect the delimiter
711 if (HereDoc.Quoted) { // a quoted here-doc delimiter
712 if (ch == HereDoc.Quote) { // closing quote => end of delimiter
713 styler.ColourTo(i, state);
714 state = SCE_PL_DEFAULT;
715 } else {
716 if (ch == '\\' && chNext == HereDoc.Quote) { // escaped quote
717 i++;
718 ch = chNext;
719 chNext = chNext2;
721 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
722 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
724 } else { // an unquoted here-doc delimiter
725 if (isalnum(ch) || ch == '_') {
726 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
727 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
728 } else {
729 styler.ColourTo(i - 1, state);
730 state = SCE_PL_DEFAULT;
731 goto restartLexer;
734 if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
735 styler.ColourTo(i - 1, state);
736 state = SCE_PL_ERROR;
737 goto restartLexer;
740 } else if (HereDoc.State == 2) {
741 // state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX
742 if (isEOLChar(chPrev) && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
743 i += HereDoc.DelimiterLength;
744 chPrev = styler.SafeGetCharAt(i - 1);
745 ch = styler.SafeGetCharAt(i);
746 if (isEOLChar(ch)) {
747 styler.ColourTo(i - 1, state);
748 state = SCE_PL_DEFAULT;
749 HereDoc.State = 0;
750 goto restartLexer;
752 chNext = styler.SafeGetCharAt(i + 1);
754 } else if (state == SCE_PL_POD) {
755 if (ch == '=' && isEOLChar(chPrev)) {
756 if (isMatch(styler, lengthDoc, i, "=cut")) {
757 styler.ColourTo(i - 1 + 4, state);
758 i += 4;
759 state = SCE_PL_DEFAULT;
760 ch = styler.SafeGetCharAt(i);
761 //chNext = styler.SafeGetCharAt(i + 1);
762 goto restartLexer;
765 } else if (state == SCE_PL_SCALAR // variable names
766 || state == SCE_PL_ARRAY
767 || state == SCE_PL_HASH
768 || state == SCE_PL_SYMBOLTABLE) {
769 if (ch == ':' && chNext == ':') { // skip ::
770 i++;
771 ch = chNext;
772 chNext = chNext2;
774 else if (isEndVar(ch)) {
775 if ((state == SCE_PL_SCALAR || state == SCE_PL_ARRAY)
776 && i == (styler.GetStartSegment() + 1)) {
777 // Special variable: $(, $_ etc.
778 styler.ColourTo(i, state);
779 state = SCE_PL_DEFAULT;
780 } else {
781 styler.ColourTo(i - 1, state);
782 state = SCE_PL_DEFAULT;
783 goto restartLexer;
786 } else if (state == SCE_PL_REGEX
787 || state == SCE_PL_STRING_QR
789 if (!Quote.Up && !isspacechar(ch)) {
790 Quote.Open(ch);
791 } else if (ch == '\\' && Quote.Up != '\\') {
792 // SG: Is it save to skip *every* escaped char?
793 i++;
794 ch = chNext;
795 chNext = styler.SafeGetCharAt(i + 1);
796 } else {
797 if (ch == Quote.Down /*&& chPrev != '\\'*/) {
798 Quote.Count--;
799 if (Quote.Count == 0) {
800 Quote.Rep--;
801 if (Quote.Up == Quote.Down) {
802 Quote.Count++;
805 if (!isalpha(chNext)) {
806 if (Quote.Rep <= 0) {
807 styler.ColourTo(i, state);
808 state = SCE_PL_DEFAULT;
809 ch = ' ';
812 } else if (ch == Quote.Up /*&& chPrev != '\\'*/) {
813 Quote.Count++;
814 } else if (!isalpha(chNext)) {
815 if (Quote.Rep <= 0) {
816 styler.ColourTo(i, state);
817 state = SCE_PL_DEFAULT;
818 ch = ' ';
822 } else if (state == SCE_PL_REGSUBST) {
823 if (!Quote.Up && !isspacechar(ch)) {
824 Quote.Open(ch);
825 } else if (ch == '\\' && Quote.Up != '\\') {
826 // SG: Is it save to skip *every* escaped char?
827 i++;
828 ch = chNext;
829 chNext = styler.SafeGetCharAt(i + 1);
830 } else {
831 if (Quote.Count == 0 && Quote.Rep == 1) {
832 /* We matched something like s(...) or tr{...}
833 * and are looking for the next matcher characters,
834 * which could be either bracketed ({...}) or non-bracketed
835 * (/.../).
837 * Number-signs are problematic. If they occur after
838 * the close of the first part, treat them like
839 * a Quote.Up char, even if they actually start comments.
841 * If we find an alnum, we end the regsubst, and punt.
843 * Eric Promislow ericp@activestate.com Aug 9,2000
845 if (isspacechar(ch)) {
846 // Keep going
848 else if (isalnum(ch)) {
849 styler.ColourTo(i, state);
850 state = SCE_PL_DEFAULT;
851 ch = ' ';
852 } else {
853 Quote.Open(ch);
855 } else if (ch == Quote.Down /*&& chPrev != '\\'*/) {
856 Quote.Count--;
857 if (Quote.Count == 0) {
858 Quote.Rep--;
860 if (!isalpha(chNext)) {
861 if (Quote.Rep <= 0) {
862 styler.ColourTo(i, state);
863 state = SCE_PL_DEFAULT;
864 ch = ' ';
867 if (Quote.Up == Quote.Down) {
868 Quote.Count++;
870 } else if (ch == Quote.Up /*&& chPrev != '\\'*/) {
871 Quote.Count++;
872 } else if (!isalpha(chNext)) {
873 if (Quote.Rep <= 0) {
874 styler.ColourTo(i, state);
875 state = SCE_PL_DEFAULT;
876 ch = ' ';
880 } else if (state == SCE_PL_STRING_Q
881 || state == SCE_PL_STRING_QQ
882 || state == SCE_PL_STRING_QX
883 || state == SCE_PL_STRING_QW
884 || state == SCE_PL_STRING
885 || state == SCE_PL_CHARACTER
886 || state == SCE_PL_BACKTICKS
888 if (!Quote.Down && !isspacechar(ch)) {
889 Quote.Open(ch);
890 } else if (ch == '\\' && Quote.Up != '\\') {
891 i++;
892 ch = chNext;
893 chNext = styler.SafeGetCharAt(i + 1);
894 } else if (ch == Quote.Down) {
895 Quote.Count--;
896 if (Quote.Count == 0) {
897 Quote.Rep--;
898 if (Quote.Rep <= 0) {
899 styler.ColourTo(i, state);
900 state = SCE_PL_DEFAULT;
901 ch = ' ';
903 if (Quote.Up == Quote.Down) {
904 Quote.Count++;
907 } else if (ch == Quote.Up) {
908 Quote.Count++;
912 if (state == SCE_PL_ERROR) {
913 break;
915 chPrev = ch;
917 styler.ColourTo(lengthDoc - 1, state);
920 static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[],
921 Accessor &styler) {
922 bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
923 bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
924 unsigned int endPos = startPos + length;
925 int visibleChars = 0;
926 int lineCurrent = styler.GetLine(startPos);
927 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
928 int levelCurrent = levelPrev;
929 char chNext = styler[startPos];
930 int styleNext = styler.StyleAt(startPos);
931 for (unsigned int i = startPos; i < endPos; i++) {
932 char ch = chNext;
933 chNext = styler.SafeGetCharAt(i + 1);
934 int style = styleNext;
935 styleNext = styler.StyleAt(i + 1);
936 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
937 if (foldComment && (style == SCE_PL_COMMENTLINE)) {
938 if ((ch == '/') && (chNext == '/')) {
939 char chNext2 = styler.SafeGetCharAt(i + 2);
940 if (chNext2 == '{') {
941 levelCurrent++;
942 } else if (chNext2 == '}') {
943 levelCurrent--;
947 if (style == SCE_C_OPERATOR) {
948 if (ch == '{') {
949 levelCurrent++;
950 } else if (ch == '}') {
951 levelCurrent--;
954 if (atEOL) {
955 int lev = levelPrev;
956 if (visibleChars == 0 && foldCompact)
957 lev |= SC_FOLDLEVELWHITEFLAG;
958 if ((levelCurrent > levelPrev) && (visibleChars > 0))
959 lev |= SC_FOLDLEVELHEADERFLAG;
960 if (lev != styler.LevelAt(lineCurrent)) {
961 styler.SetLevel(lineCurrent, lev);
963 lineCurrent++;
964 levelPrev = levelCurrent;
965 visibleChars = 0;
967 if (!isspacechar(ch))
968 visibleChars++;
970 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
971 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
972 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
975 static const char * const perlWordListDesc[] = {
976 "Keywords",
980 LexerModule lmPerl(SCLEX_PERL, ColourisePerlDoc, "perl", FoldPerlDoc, perlWordListDesc);