updated Scintilla to 2.29
[TortoiseGit.git] / ext / scintilla / lexers / LexHTML.cxx
blobd20d688b12c67bcad4ee2898511acfe40d917fa5
1 // Scintilla source code edit control
2 /** @file LexHTML.cxx
3 ** Lexer for HTML.
4 **/
5 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <stdarg.h>
12 #include <assert.h>
13 #include <ctype.h>
15 #include "ILexer.h"
16 #include "Scintilla.h"
17 #include "SciLexer.h"
19 #include "WordList.h"
20 #include "LexAccessor.h"
21 #include "Accessor.h"
22 #include "StyleContext.h"
23 #include "CharacterSet.h"
24 #include "LexerModule.h"
26 #ifdef SCI_NAMESPACE
27 using namespace Scintilla;
28 #endif
30 #define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
31 #define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
32 #define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)
34 enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock, eScriptComment };
35 enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
37 static inline bool IsAWordChar(const int ch) {
38 return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
41 static inline bool IsAWordStart(const int ch) {
42 return (ch < 0x80) && (isalnum(ch) || ch == '_');
45 inline bool IsOperator(int ch) {
46 if (isascii(ch) && isalnum(ch))
47 return false;
48 // '.' left out as it is used to make up numbers
49 if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
50 ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
51 ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
52 ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
53 ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
54 ch == '?' || ch == '!' || ch == '.' || ch == '~')
55 return true;
56 return false;
59 static void GetTextSegment(Accessor &styler, unsigned int start, unsigned int end, char *s, size_t len) {
60 unsigned int i = 0;
61 for (; (i < end - start + 1) && (i < len-1); i++) {
62 s[i] = static_cast<char>(MakeLowerCase(styler[start + i]));
64 s[i] = '\0';
67 static const char *GetNextWord(Accessor &styler, unsigned int start, char *s, size_t sLen) {
69 unsigned int i = 0;
70 for (; i < sLen-1; i++) {
71 char ch = static_cast<char>(styler.SafeGetCharAt(start + i));
72 if ((i == 0) && !IsAWordStart(ch))
73 break;
74 if ((i > 0) && !IsAWordChar(ch))
75 break;
76 s[i] = ch;
78 s[i] = '\0';
80 return s;
83 static script_type segIsScriptingIndicator(Accessor &styler, unsigned int start, unsigned int end, script_type prevValue) {
84 char s[100];
85 GetTextSegment(styler, start, end, s, sizeof(s));
86 //Platform::DebugPrintf("Scripting indicator [%s]\n", s);
87 if (strstr(s, "src")) // External script
88 return eScriptNone;
89 if (strstr(s, "vbs"))
90 return eScriptVBS;
91 if (strstr(s, "pyth"))
92 return eScriptPython;
93 if (strstr(s, "javas"))
94 return eScriptJS;
95 if (strstr(s, "jscr"))
96 return eScriptJS;
97 if (strstr(s, "php"))
98 return eScriptPHP;
99 if (strstr(s, "xml")) {
100 const char *xml = strstr(s, "xml");
101 for (const char *t=s; t<xml; t++) {
102 if (!IsASpace(*t)) {
103 return prevValue;
106 return eScriptXML;
109 return prevValue;
112 static int PrintScriptingIndicatorOffset(Accessor &styler, unsigned int start, unsigned int end) {
113 int iResult = 0;
114 char s[100];
115 GetTextSegment(styler, start, end, s, sizeof(s));
116 if (0 == strncmp(s, "php", 3)) {
117 iResult = 3;
120 return iResult;
123 static script_type ScriptOfState(int state) {
124 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
125 return eScriptPython;
126 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
127 return eScriptVBS;
128 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
129 return eScriptJS;
130 } else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
131 return eScriptPHP;
132 } else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
133 return eScriptSGML;
134 } else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
135 return eScriptSGMLblock;
136 } else {
137 return eScriptNone;
141 static int statePrintForState(int state, script_mode inScriptType) {
142 int StateToPrint = state;
144 if (state >= SCE_HJ_START) {
145 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
146 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
147 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
148 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
149 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
150 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
154 return StateToPrint;
157 static int stateForPrintState(int StateToPrint) {
158 int state;
160 if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
161 state = StateToPrint - SCE_HA_PYTHON;
162 } else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
163 state = StateToPrint - SCE_HA_VBS;
164 } else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
165 state = StateToPrint - SCE_HA_JS;
166 } else {
167 state = StateToPrint;
170 return state;
173 static inline bool IsNumber(unsigned int start, Accessor &styler) {
174 return IsADigit(styler[start]) || (styler[start] == '.') ||
175 (styler[start] == '-') || (styler[start] == '#');
178 static inline bool isStringState(int state) {
179 bool bResult;
181 switch (state) {
182 case SCE_HJ_DOUBLESTRING:
183 case SCE_HJ_SINGLESTRING:
184 case SCE_HJA_DOUBLESTRING:
185 case SCE_HJA_SINGLESTRING:
186 case SCE_HB_STRING:
187 case SCE_HBA_STRING:
188 case SCE_HP_STRING:
189 case SCE_HP_CHARACTER:
190 case SCE_HP_TRIPLE:
191 case SCE_HP_TRIPLEDOUBLE:
192 case SCE_HPA_STRING:
193 case SCE_HPA_CHARACTER:
194 case SCE_HPA_TRIPLE:
195 case SCE_HPA_TRIPLEDOUBLE:
196 case SCE_HPHP_HSTRING:
197 case SCE_HPHP_SIMPLESTRING:
198 case SCE_HPHP_HSTRING_VARIABLE:
199 case SCE_HPHP_COMPLEX_VARIABLE:
200 bResult = true;
201 break;
202 default :
203 bResult = false;
204 break;
206 return bResult;
209 static inline bool stateAllowsTermination(int state) {
210 bool allowTermination = !isStringState(state);
211 if (allowTermination) {
212 switch (state) {
213 case SCE_HB_COMMENTLINE:
214 case SCE_HPHP_COMMENT:
215 case SCE_HP_COMMENTLINE:
216 case SCE_HPA_COMMENTLINE:
217 allowTermination = false;
220 return allowTermination;
223 // not really well done, since it's only comments that should lex the %> and <%
224 static inline bool isCommentASPState(int state) {
225 bool bResult;
227 switch (state) {
228 case SCE_HJ_COMMENT:
229 case SCE_HJ_COMMENTLINE:
230 case SCE_HJ_COMMENTDOC:
231 case SCE_HB_COMMENTLINE:
232 case SCE_HP_COMMENTLINE:
233 case SCE_HPHP_COMMENT:
234 case SCE_HPHP_COMMENTLINE:
235 bResult = true;
236 break;
237 default :
238 bResult = false;
239 break;
241 return bResult;
244 static void classifyAttribHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
245 bool wordIsNumber = IsNumber(start, styler);
246 char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
247 if (wordIsNumber) {
248 chAttr = SCE_H_NUMBER;
249 } else {
250 char s[100];
251 GetTextSegment(styler, start, end, s, sizeof(s));
252 if (keywords.InList(s))
253 chAttr = SCE_H_ATTRIBUTE;
255 if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
256 // No keywords -> all are known
257 chAttr = SCE_H_ATTRIBUTE;
258 styler.ColourTo(end, chAttr);
261 static int classifyTagHTML(unsigned int start, unsigned int end,
262 WordList &keywords, Accessor &styler, bool &tagDontFold,
263 bool caseSensitive, bool isXml, bool allowScripts) {
264 char s[30 + 2];
265 // Copy after the '<'
266 unsigned int i = 0;
267 for (unsigned int cPos = start; cPos <= end && i < 30; cPos++) {
268 char ch = styler[cPos];
269 if ((ch != '<') && (ch != '/')) {
270 s[i++] = caseSensitive ? ch : static_cast<char>(MakeLowerCase(ch));
274 //The following is only a quick hack, to see if this whole thing would work
275 //we first need the tagname with a trailing space...
276 s[i] = ' ';
277 s[i+1] = '\0';
279 // if the current language is XML, I can fold any tag
280 // if the current language is HTML, I don't want to fold certain tags (input, meta, etc.)
281 //...to find it in the list of no-container-tags
282 tagDontFold = (!isXml) && (NULL != strstr("meta link img area br hr input ", s));
284 //now we can remove the trailing space
285 s[i] = '\0';
287 // No keywords -> all are known
288 char chAttr = SCE_H_TAGUNKNOWN;
289 if (s[0] == '!') {
290 chAttr = SCE_H_SGML_DEFAULT;
291 } else if (!keywords || keywords.InList(s)) {
292 chAttr = SCE_H_TAG;
294 styler.ColourTo(end, chAttr);
295 if (chAttr == SCE_H_TAG) {
296 if (allowScripts && 0 == strcmp(s, "script")) {
297 // check to see if this is a self-closing tag by sniffing ahead
298 bool isSelfClose = false;
299 for (unsigned int cPos = end; cPos <= end + 100; cPos++) {
300 char ch = styler.SafeGetCharAt(cPos, '\0');
301 if (ch == '\0' || ch == '>')
302 break;
303 else if (ch == '/' && styler.SafeGetCharAt(cPos + 1, '\0') == '>') {
304 isSelfClose = true;
305 break;
309 // do not enter a script state if the tag self-closed
310 if (!isSelfClose)
311 chAttr = SCE_H_SCRIPT;
312 } else if (!isXml && 0 == strcmp(s, "comment")) {
313 chAttr = SCE_H_COMMENT;
316 return chAttr;
319 static void classifyWordHTJS(unsigned int start, unsigned int end,
320 WordList &keywords, Accessor &styler, script_mode inScriptType) {
321 char s[30 + 1];
322 unsigned int i = 0;
323 for (; i < end - start + 1 && i < 30; i++) {
324 s[i] = styler[start + i];
326 s[i] = '\0';
328 char chAttr = SCE_HJ_WORD;
329 bool wordIsNumber = IsADigit(s[0]) || ((s[0] == '.') && IsADigit(s[1]));
330 if (wordIsNumber) {
331 chAttr = SCE_HJ_NUMBER;
332 } else if (keywords.InList(s)) {
333 chAttr = SCE_HJ_KEYWORD;
335 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
338 static int classifyWordHTVB(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, script_mode inScriptType) {
339 char chAttr = SCE_HB_IDENTIFIER;
340 bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
341 if (wordIsNumber)
342 chAttr = SCE_HB_NUMBER;
343 else {
344 char s[100];
345 GetTextSegment(styler, start, end, s, sizeof(s));
346 if (keywords.InList(s)) {
347 chAttr = SCE_HB_WORD;
348 if (strcmp(s, "rem") == 0)
349 chAttr = SCE_HB_COMMENTLINE;
352 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
353 if (chAttr == SCE_HB_COMMENTLINE)
354 return SCE_HB_COMMENTLINE;
355 else
356 return SCE_HB_DEFAULT;
359 static void classifyWordHTPy(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord, script_mode inScriptType, bool isMako) {
360 bool wordIsNumber = IsADigit(styler[start]);
361 char s[30 + 1];
362 unsigned int i = 0;
363 for (; i < end - start + 1 && i < 30; i++) {
364 s[i] = styler[start + i];
366 s[i] = '\0';
367 char chAttr = SCE_HP_IDENTIFIER;
368 if (0 == strcmp(prevWord, "class"))
369 chAttr = SCE_HP_CLASSNAME;
370 else if (0 == strcmp(prevWord, "def"))
371 chAttr = SCE_HP_DEFNAME;
372 else if (wordIsNumber)
373 chAttr = SCE_HP_NUMBER;
374 else if (keywords.InList(s))
375 chAttr = SCE_HP_WORD;
376 else if (isMako && 0 == strcmp(s, "block"))
377 chAttr = SCE_HP_WORD;
378 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
379 strcpy(prevWord, s);
382 // Update the word colour to default or keyword
383 // Called when in a PHP word
384 static void classifyWordHTPHP(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
385 char chAttr = SCE_HPHP_DEFAULT;
386 bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
387 if (wordIsNumber)
388 chAttr = SCE_HPHP_NUMBER;
389 else {
390 char s[100];
391 GetTextSegment(styler, start, end, s, sizeof(s));
392 if (keywords.InList(s))
393 chAttr = SCE_HPHP_WORD;
395 styler.ColourTo(end, chAttr);
398 static bool isWordHSGML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
399 char s[30 + 1];
400 unsigned int i = 0;
401 for (; i < end - start + 1 && i < 30; i++) {
402 s[i] = styler[start + i];
404 s[i] = '\0';
405 return keywords.InList(s);
408 static bool isWordCdata(unsigned int start, unsigned int end, Accessor &styler) {
409 char s[30 + 1];
410 unsigned int i = 0;
411 for (; i < end - start + 1 && i < 30; i++) {
412 s[i] = styler[start + i];
414 s[i] = '\0';
415 return (0 == strcmp(s, "[CDATA["));
418 // Return the first state to reach when entering a scripting language
419 static int StateForScript(script_type scriptLanguage) {
420 int Result;
421 switch (scriptLanguage) {
422 case eScriptVBS:
423 Result = SCE_HB_START;
424 break;
425 case eScriptPython:
426 Result = SCE_HP_START;
427 break;
428 case eScriptPHP:
429 Result = SCE_HPHP_DEFAULT;
430 break;
431 case eScriptXML:
432 Result = SCE_H_TAGUNKNOWN;
433 break;
434 case eScriptSGML:
435 Result = SCE_H_SGML_DEFAULT;
436 break;
437 case eScriptComment:
438 Result = SCE_H_COMMENT;
439 break;
440 default :
441 Result = SCE_HJ_START;
442 break;
444 return Result;
447 static inline bool ishtmlwordchar(int ch) {
448 return !isascii(ch) ||
449 (isalnum(ch) || ch == '.' || ch == '-' || ch == '_' || ch == ':' || ch == '!' || ch == '#');
452 static inline bool issgmlwordchar(int ch) {
453 return !isascii(ch) ||
454 (isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
457 static inline bool IsPhpWordStart(int ch) {
458 return (isascii(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
461 static inline bool IsPhpWordChar(int ch) {
462 return IsADigit(ch) || IsPhpWordStart(ch);
465 static bool InTagState(int state) {
466 return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
467 state == SCE_H_SCRIPT ||
468 state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
469 state == SCE_H_NUMBER || state == SCE_H_OTHER ||
470 state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
473 static bool IsCommentState(const int state) {
474 return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
477 static bool IsScriptCommentState(const int state) {
478 return state == SCE_HJ_COMMENT || state == SCE_HJ_COMMENTLINE || state == SCE_HJA_COMMENT ||
479 state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
482 static bool isLineEnd(int ch) {
483 return ch == '\r' || ch == '\n';
486 static bool isOKBeforeRE(int ch) {
487 return (ch == '(') || (ch == '=') || (ch == ',');
490 static bool isMakoBlockEnd(const int ch, const int chNext, const char *blockType) {
491 if (strlen(blockType) == 0) {
492 return ((ch == '%') && (chNext == '>'));
493 } else if ((0 == strcmp(blockType, "inherit")) ||
494 (0 == strcmp(blockType, "namespace")) ||
495 (0 == strcmp(blockType, "include")) ||
496 (0 == strcmp(blockType, "page"))) {
497 return ((ch == '/') && (chNext == '>'));
498 } else if (0 == strcmp(blockType, "%")) {
499 if (ch == '/' && isLineEnd(chNext))
500 return 1;
501 else
502 return isLineEnd(ch);
503 } else if (0 == strcmp(blockType, "{")) {
504 return ch == '}';
505 } else {
506 return (ch == '>');
510 static bool isDjangoBlockEnd(const int ch, const int chNext, const char *blockType) {
511 if (strlen(blockType) == 0) {
512 return 0;
513 } else if (0 == strcmp(blockType, "%")) {
514 return ((ch == '%') && (chNext == '}'));
515 } else if (0 == strcmp(blockType, "{")) {
516 return ((ch == '}') && (chNext == '}'));
517 } else {
518 return 0;
522 static bool isPHPStringState(int state) {
523 return
524 (state == SCE_HPHP_HSTRING) ||
525 (state == SCE_HPHP_SIMPLESTRING) ||
526 (state == SCE_HPHP_HSTRING_VARIABLE) ||
527 (state == SCE_HPHP_COMPLEX_VARIABLE);
530 static int FindPhpStringDelimiter(char *phpStringDelimiter, const int phpStringDelimiterSize, int i, const int lengthDoc, Accessor &styler, bool &isSimpleString) {
531 int j;
532 const int beginning = i - 1;
533 bool isValidSimpleString = false;
535 while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t'))
536 i++;
538 char ch = styler.SafeGetCharAt(i);
539 const char chNext = styler.SafeGetCharAt(i + 1);
540 if (!IsPhpWordStart(ch)) {
541 if (ch == '\'' && IsPhpWordStart(chNext)) {
542 i++;
543 ch = chNext;
544 isSimpleString = true;
545 } else {
546 phpStringDelimiter[0] = '\0';
547 return beginning;
550 phpStringDelimiter[0] = ch;
551 i++;
553 for (j = i; j < lengthDoc && !isLineEnd(styler[j]); j++) {
554 if (!IsPhpWordChar(styler[j])) {
555 if (isSimpleString && (styler[j] == '\'') && isLineEnd(styler.SafeGetCharAt(j + 1))) {
556 isValidSimpleString = true;
557 j++;
558 break;
559 } else {
560 phpStringDelimiter[0] = '\0';
561 return beginning;
564 if (j - i < phpStringDelimiterSize - 2)
565 phpStringDelimiter[j-i+1] = styler[j];
566 else
567 i++;
569 if (isSimpleString && !isValidSimpleString) {
570 phpStringDelimiter[0] = '\0';
571 return beginning;
573 phpStringDelimiter[j-i+1 - (isSimpleString ? 1 : 0)] = '\0';
574 return j - 1;
577 static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
578 Accessor &styler, bool isXml) {
579 WordList &keywords = *keywordlists[0];
580 WordList &keywords2 = *keywordlists[1];
581 WordList &keywords3 = *keywordlists[2];
582 WordList &keywords4 = *keywordlists[3];
583 WordList &keywords5 = *keywordlists[4];
584 WordList &keywords6 = *keywordlists[5]; // SGML (DTD) keywords
586 // Lexer for HTML requires more lexical states (8 bits worth) than most lexers
587 styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
588 char prevWord[200];
589 prevWord[0] = '\0';
590 char phpStringDelimiter[200]; // PHP is not limited in length, we are
591 phpStringDelimiter[0] = '\0';
592 int StateToPrint = initStyle;
593 int state = stateForPrintState(StateToPrint);
594 char makoBlockType[200];
595 makoBlockType[0] = '\0';
596 int makoComment = 0;
597 char djangoBlockType[2];
598 djangoBlockType[0] = '\0';
600 // If inside a tag, it may be a script tag, so reread from the start to ensure any language tags are seen
601 if (InTagState(state)) {
602 while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
603 startPos--;
604 length++;
606 state = SCE_H_DEFAULT;
608 // String can be heredoc, must find a delimiter first. Reread from beginning of line containing the string, to get the correct lineState
609 if (isPHPStringState(state)) {
610 while (startPos > 0 && (isPHPStringState(state) || !isLineEnd(styler[startPos - 1]))) {
611 startPos--;
612 length++;
613 state = styler.StyleAt(startPos);
615 if (startPos == 0)
616 state = SCE_H_DEFAULT;
618 styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
620 int lineCurrent = styler.GetLine(startPos);
621 int lineState;
622 if (lineCurrent > 0) {
623 lineState = styler.GetLineState(lineCurrent-1);
624 } else {
625 // Default client and ASP scripting language is JavaScript
626 lineState = eScriptJS << 8;
628 // property asp.default.language
629 // Script in ASP code is initially assumed to be in JavaScript.
630 // To change this to VBScript set asp.default.language to 2. Python is 3.
631 lineState |= styler.GetPropertyInt("asp.default.language", eScriptJS) << 4;
633 script_mode inScriptType = script_mode((lineState >> 0) & 0x03); // 2 bits of scripting mode
634 bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
635 bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
636 bool tagDontFold = false; //some HTML tags should not be folded
637 script_type aspScript = script_type((lineState >> 4) & 0x0F); // 4 bits of script name
638 script_type clientScript = script_type((lineState >> 8) & 0x0F); // 4 bits of script name
639 int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
641 script_type scriptLanguage = ScriptOfState(state);
642 // If eNonHtmlScript coincides with SCE_H_COMMENT, assume eScriptComment
643 if (inScriptType == eNonHtmlScript && state == SCE_H_COMMENT) {
644 scriptLanguage = eScriptComment;
646 script_type beforeLanguage = ScriptOfState(beforePreProc);
648 // property fold.html
649 // Folding is turned on or off for HTML and XML files with this option.
650 // The fold option must also be on for folding to occur.
651 const bool foldHTML = styler.GetPropertyInt("fold.html", 0) != 0;
653 const bool fold = foldHTML && styler.GetPropertyInt("fold", 0);
655 // property fold.html.preprocessor
656 // Folding is turned on or off for scripts embedded in HTML files with this option.
657 // The default is on.
658 const bool foldHTMLPreprocessor = foldHTML && styler.GetPropertyInt("fold.html.preprocessor", 1);
660 const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
662 // property fold.hypertext.comment
663 // Allow folding for comments in scripts embedded in HTML.
664 // The default is off.
665 const bool foldComment = fold && styler.GetPropertyInt("fold.hypertext.comment", 0) != 0;
667 // property fold.hypertext.heredoc
668 // Allow folding for heredocs in scripts embedded in HTML.
669 // The default is off.
670 const bool foldHeredoc = fold && styler.GetPropertyInt("fold.hypertext.heredoc", 0) != 0;
672 // property html.tags.case.sensitive
673 // For XML and HTML, setting this property to 1 will make tags match in a case
674 // sensitive way which is the expected behaviour for XML and XHTML.
675 const bool caseSensitive = styler.GetPropertyInt("html.tags.case.sensitive", 0) != 0;
677 // property lexer.xml.allow.scripts
678 // Set to 0 to disable scripts in XML.
679 const bool allowScripts = styler.GetPropertyInt("lexer.xml.allow.scripts", 1) != 0;
681 // property lexer.html.mako
682 // Set to 1 to enable the mako template language.
683 const bool isMako = styler.GetPropertyInt("lexer.html.mako", 0) != 0;
685 // property lexer.html.django
686 // Set to 1 to enable the django template language.
687 const bool isDjango = styler.GetPropertyInt("lexer.html.django", 0) != 0;
689 const CharacterSet setHTMLWord(CharacterSet::setAlphaNum, ".-_:!#", 0x80, true);
690 const CharacterSet setTagContinue(CharacterSet::setAlphaNum, ".-_:!#[", 0x80, true);
691 const CharacterSet setAttributeContinue(CharacterSet::setAlphaNum, ".-_:!#/", 0x80, true);
693 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
694 int levelCurrent = levelPrev;
695 int visibleChars = 0;
696 int lineStartVisibleChars = 0;
698 int chPrev = ' ';
699 int ch = ' ';
700 int chPrevNonWhite = ' ';
701 // look back to set chPrevNonWhite properly for better regex colouring
702 if (scriptLanguage == eScriptJS && startPos > 0) {
703 int back = startPos;
704 int style = 0;
705 while (--back) {
706 style = styler.StyleAt(back);
707 if (style < SCE_HJ_DEFAULT || style > SCE_HJ_COMMENTDOC)
708 // includes SCE_HJ_COMMENT & SCE_HJ_COMMENTLINE
709 break;
711 if (style == SCE_HJ_SYMBOLS) {
712 chPrevNonWhite = static_cast<unsigned char>(styler.SafeGetCharAt(back));
716 styler.StartSegment(startPos);
717 const int lengthDoc = startPos + length;
718 for (int i = startPos; i < lengthDoc; i++) {
719 const int chPrev2 = chPrev;
720 chPrev = ch;
721 if (!IsASpace(ch) && state != SCE_HJ_COMMENT &&
722 state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
723 chPrevNonWhite = ch;
724 ch = static_cast<unsigned char>(styler[i]);
725 int chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
726 const int chNext2 = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2));
728 // Handle DBCS codepages
729 if (styler.IsLeadByte(static_cast<char>(ch))) {
730 chPrev = ' ';
731 i += 1;
732 continue;
735 if ((!IsASpace(ch) || !foldCompact) && fold)
736 visibleChars++;
737 if (!IsASpace(ch))
738 lineStartVisibleChars++;
740 // decide what is the current state to print (depending of the script tag)
741 StateToPrint = statePrintForState(state, inScriptType);
743 // handle script folding
744 if (fold) {
745 switch (scriptLanguage) {
746 case eScriptJS:
747 case eScriptPHP:
748 //not currently supported case eScriptVBS:
750 if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
751 //Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
752 //if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
753 if (ch == '#') {
754 int j = i + 1;
755 while ((j < lengthDoc) && IsASpaceOrTab(styler.SafeGetCharAt(j))) {
756 j++;
758 if (styler.Match(j, "region") || styler.Match(j, "if")) {
759 levelCurrent++;
760 } else if (styler.Match(j, "end")) {
761 levelCurrent--;
763 } else if ((ch == '{') || (ch == '}') || (foldComment && (ch == '/') && (chNext == '*'))) {
764 levelCurrent += (((ch == '{') || (ch == '/')) ? 1 : -1);
766 } else if (((state == SCE_HPHP_COMMENT) || (state == SCE_HJ_COMMENT)) && foldComment && (ch == '*') && (chNext == '/')) {
767 levelCurrent--;
769 break;
770 case eScriptPython:
771 if (state != SCE_HP_COMMENTLINE && !isMako) {
772 if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
773 levelCurrent++;
774 } else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
775 // check if the number of tabs is lower than the level
776 int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
777 for (int j = 0; Findlevel > 0; j++) {
778 char chTmp = styler.SafeGetCharAt(i + j + 1);
779 if (chTmp == '\t') {
780 Findlevel -= 8;
781 } else if (chTmp == ' ') {
782 Findlevel--;
783 } else {
784 break;
788 if (Findlevel > 0) {
789 levelCurrent -= Findlevel / 8;
790 if (Findlevel % 8)
791 levelCurrent--;
795 break;
796 default:
797 break;
801 if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
802 // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
803 // Avoid triggering two times on Dos/Win
804 // New line -> record any line state onto /next/ line
805 if (fold) {
806 int lev = levelPrev;
807 if (visibleChars == 0)
808 lev |= SC_FOLDLEVELWHITEFLAG;
809 if ((levelCurrent > levelPrev) && (visibleChars > 0))
810 lev |= SC_FOLDLEVELHEADERFLAG;
812 styler.SetLevel(lineCurrent, lev);
813 visibleChars = 0;
814 levelPrev = levelCurrent;
816 styler.SetLineState(lineCurrent,
817 ((inScriptType & 0x03) << 0) |
818 ((tagOpened & 0x01) << 2) |
819 ((tagClosing & 0x01) << 3) |
820 ((aspScript & 0x0F) << 4) |
821 ((clientScript & 0x0F) << 8) |
822 ((beforePreProc & 0xFF) << 12));
823 lineCurrent++;
824 lineStartVisibleChars = 0;
827 // handle start of Mako comment line
828 if (isMako && ch == '#' && chNext == '#') {
829 makoComment = 1;
832 // handle end of Mako comment line
833 else if (isMako && makoComment && (ch == '\r' || ch == '\n')) {
834 makoComment = 0;
835 styler.ColourTo(i, SCE_HP_COMMENTLINE);
836 state = SCE_HP_DEFAULT;
839 // Allow falling through to mako handling code if newline is going to end a block
840 if (((ch == '\r' && chNext != '\n') || (ch == '\n')) &&
841 (!isMako || (0 != strcmp(makoBlockType, "%")))) {
844 // generic end of script processing
845 else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
846 // Check if it's the end of the script tag (or any other HTML tag)
847 switch (state) {
848 // in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
849 case SCE_H_DOUBLESTRING:
850 case SCE_H_SINGLESTRING:
851 case SCE_HJ_COMMENT:
852 case SCE_HJ_COMMENTDOC:
853 //case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
854 // the end of script marker from some JS interpreters.
855 case SCE_HB_COMMENTLINE:
856 case SCE_HBA_COMMENTLINE:
857 case SCE_HJ_DOUBLESTRING:
858 case SCE_HJ_SINGLESTRING:
859 case SCE_HJ_REGEX:
860 case SCE_HB_STRING:
861 case SCE_HBA_STRING:
862 case SCE_HP_STRING:
863 case SCE_HP_TRIPLE:
864 case SCE_HP_TRIPLEDOUBLE:
865 case SCE_HPHP_HSTRING:
866 case SCE_HPHP_SIMPLESTRING:
867 case SCE_HPHP_COMMENT:
868 case SCE_HPHP_COMMENTLINE:
869 break;
870 default :
871 // check if the closing tag is a script tag
872 if (const char *tag =
873 state == SCE_HJ_COMMENTLINE || isXml ? "script" :
874 state == SCE_H_COMMENT ? "comment" : 0) {
875 int j = i + 2;
876 int chr;
877 do {
878 chr = static_cast<int>(*tag++);
879 } while (chr != 0 && chr == MakeLowerCase(styler.SafeGetCharAt(j++)));
880 if (chr != 0) break;
882 // closing tag of the script (it's a closing HTML tag anyway)
883 styler.ColourTo(i - 1, StateToPrint);
884 state = SCE_H_TAGUNKNOWN;
885 inScriptType = eHtml;
886 scriptLanguage = eScriptNone;
887 clientScript = eScriptJS;
888 i += 2;
889 visibleChars += 2;
890 tagClosing = true;
891 continue;
895 /////////////////////////////////////
896 // handle the start of PHP pre-processor = Non-HTML
897 else if ((state != SCE_H_ASPAT) &&
898 !isPHPStringState(state) &&
899 (state != SCE_HPHP_COMMENT) &&
900 (state != SCE_HPHP_COMMENTLINE) &&
901 (ch == '<') &&
902 (chNext == '?') &&
903 !IsScriptCommentState(state)) {
904 beforeLanguage = scriptLanguage;
905 scriptLanguage = segIsScriptingIndicator(styler, i + 2, i + 6, eScriptPHP);
906 if (scriptLanguage != eScriptPHP && isStringState(state)) continue;
907 styler.ColourTo(i - 1, StateToPrint);
908 beforePreProc = state;
909 i++;
910 visibleChars++;
911 i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 6);
912 if (scriptLanguage == eScriptXML)
913 styler.ColourTo(i, SCE_H_XMLSTART);
914 else
915 styler.ColourTo(i, SCE_H_QUESTION);
916 state = StateForScript(scriptLanguage);
917 if (inScriptType == eNonHtmlScript)
918 inScriptType = eNonHtmlScriptPreProc;
919 else
920 inScriptType = eNonHtmlPreProc;
921 // Fold whole script, but not if the XML first tag (all XML-like tags in this case)
922 if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
923 levelCurrent++;
925 // should be better
926 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
927 continue;
930 // handle the start Mako template Python code
931 else if (isMako && scriptLanguage == eScriptNone && ((ch == '<' && chNext == '%') ||
932 (lineStartVisibleChars == 1 && ch == '%') ||
933 (lineStartVisibleChars == 1 && ch == '/' && chNext == '%') ||
934 (ch == '$' && chNext == '{') ||
935 (ch == '<' && chNext == '/' && chNext2 == '%'))) {
936 if (ch == '%' || ch == '/')
937 strcpy(makoBlockType, "%");
938 else if (ch == '$')
939 strcpy(makoBlockType, "{");
940 else if (chNext == '/')
941 GetNextWord(styler, i+3, makoBlockType, sizeof(makoBlockType));
942 else
943 GetNextWord(styler, i+2, makoBlockType, sizeof(makoBlockType));
944 styler.ColourTo(i - 1, StateToPrint);
945 beforePreProc = state;
946 if (inScriptType == eNonHtmlScript)
947 inScriptType = eNonHtmlScriptPreProc;
948 else
949 inScriptType = eNonHtmlPreProc;
951 if (chNext == '/') {
952 i += 2;
953 visibleChars += 2;
954 } else if (ch != '%') {
955 i++;
956 visibleChars++;
958 state = SCE_HP_START;
959 scriptLanguage = eScriptPython;
960 styler.ColourTo(i, SCE_H_ASP);
962 if (ch != '%' && ch != '$' && ch != '/') {
963 i += static_cast<int>(strlen(makoBlockType));
964 visibleChars += static_cast<int>(strlen(makoBlockType));
965 if (keywords4.InList(makoBlockType))
966 styler.ColourTo(i, SCE_HP_WORD);
967 else
968 styler.ColourTo(i, SCE_H_TAGUNKNOWN);
971 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
972 continue;
975 // handle the start/end of Django comment
976 else if (isDjango && state != SCE_H_COMMENT && (ch == '{' && chNext == '#')) {
977 styler.ColourTo(i - 1, StateToPrint);
978 beforePreProc = state;
979 beforeLanguage = scriptLanguage;
980 if (inScriptType == eNonHtmlScript)
981 inScriptType = eNonHtmlScriptPreProc;
982 else
983 inScriptType = eNonHtmlPreProc;
984 i += 1;
985 visibleChars += 1;
986 scriptLanguage = eScriptComment;
987 state = SCE_H_COMMENT;
988 styler.ColourTo(i, SCE_H_ASP);
989 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
990 continue;
991 } else if (isDjango && state == SCE_H_COMMENT && (ch == '#' && chNext == '}')) {
992 styler.ColourTo(i - 1, StateToPrint);
993 i += 1;
994 visibleChars += 1;
995 styler.ColourTo(i, SCE_H_ASP);
996 state = beforePreProc;
997 if (inScriptType == eNonHtmlScriptPreProc)
998 inScriptType = eNonHtmlScript;
999 else
1000 inScriptType = eHtml;
1001 scriptLanguage = beforeLanguage;
1002 continue;
1005 // handle the start Django template code
1006 else if (isDjango && scriptLanguage != eScriptPython && (ch == '{' && (chNext == '%' || chNext == '{'))) {
1007 if (chNext == '%')
1008 strcpy(djangoBlockType, "%");
1009 else
1010 strcpy(djangoBlockType, "{");
1011 styler.ColourTo(i - 1, StateToPrint);
1012 beforePreProc = state;
1013 if (inScriptType == eNonHtmlScript)
1014 inScriptType = eNonHtmlScriptPreProc;
1015 else
1016 inScriptType = eNonHtmlPreProc;
1018 i += 1;
1019 visibleChars += 1;
1020 state = SCE_HP_START;
1021 beforeLanguage = scriptLanguage;
1022 scriptLanguage = eScriptPython;
1023 styler.ColourTo(i, SCE_H_ASP);
1025 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1026 continue;
1029 // handle the start of ASP pre-processor = Non-HTML
1030 else if (!isMako && !isDjango && !isCommentASPState(state) && (ch == '<') && (chNext == '%') && !isPHPStringState(state)) {
1031 styler.ColourTo(i - 1, StateToPrint);
1032 beforePreProc = state;
1033 if (inScriptType == eNonHtmlScript)
1034 inScriptType = eNonHtmlScriptPreProc;
1035 else
1036 inScriptType = eNonHtmlPreProc;
1038 if (chNext2 == '@') {
1039 i += 2; // place as if it was the second next char treated
1040 visibleChars += 2;
1041 state = SCE_H_ASPAT;
1042 } else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
1043 styler.ColourTo(i + 3, SCE_H_ASP);
1044 state = SCE_H_XCCOMMENT;
1045 scriptLanguage = eScriptVBS;
1046 continue;
1047 } else {
1048 if (chNext2 == '=') {
1049 i += 2; // place as if it was the second next char treated
1050 visibleChars += 2;
1051 } else {
1052 i++; // place as if it was the next char treated
1053 visibleChars++;
1056 state = StateForScript(aspScript);
1058 scriptLanguage = eScriptVBS;
1059 styler.ColourTo(i, SCE_H_ASP);
1060 // fold whole script
1061 if (foldHTMLPreprocessor)
1062 levelCurrent++;
1063 // should be better
1064 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1065 continue;
1068 /////////////////////////////////////
1069 // handle the start of SGML language (DTD)
1070 else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
1071 (chPrev == '<') &&
1072 (ch == '!') &&
1073 (StateToPrint != SCE_H_CDATA) &&
1074 (!IsCommentState(StateToPrint)) &&
1075 (!IsScriptCommentState(StateToPrint))) {
1076 beforePreProc = state;
1077 styler.ColourTo(i - 2, StateToPrint);
1078 if ((chNext == '-') && (chNext2 == '-')) {
1079 state = SCE_H_COMMENT; // wait for a pending command
1080 styler.ColourTo(i + 2, SCE_H_COMMENT);
1081 i += 2; // follow styling after the --
1082 } else if (isWordCdata(i + 1, i + 7, styler)) {
1083 state = SCE_H_CDATA;
1084 } else {
1085 styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
1086 scriptLanguage = eScriptSGML;
1087 state = SCE_H_SGML_COMMAND; // wait for a pending command
1089 // fold whole tag (-- when closing the tag)
1090 if (foldHTMLPreprocessor || (state == SCE_H_COMMENT))
1091 levelCurrent++;
1092 continue;
1095 // handle the end of Mako Python code
1096 else if (isMako &&
1097 ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1098 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1099 isMakoBlockEnd(ch, chNext, makoBlockType)) {
1100 if (state == SCE_H_ASPAT) {
1101 aspScript = segIsScriptingIndicator(styler,
1102 styler.GetStartSegment(), i - 1, aspScript);
1104 if (state == SCE_HP_WORD) {
1105 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1106 } else {
1107 styler.ColourTo(i - 1, StateToPrint);
1109 if (0 != strcmp(makoBlockType, "%") && (0 != strcmp(makoBlockType, "{")) && ch != '>') {
1110 i++;
1111 visibleChars++;
1113 else if (0 == strcmp(makoBlockType, "%") && ch == '/') {
1114 i++;
1115 visibleChars++;
1117 if (0 != strcmp(makoBlockType, "%") || ch == '/') {
1118 styler.ColourTo(i, SCE_H_ASP);
1120 state = beforePreProc;
1121 if (inScriptType == eNonHtmlScriptPreProc)
1122 inScriptType = eNonHtmlScript;
1123 else
1124 inScriptType = eHtml;
1125 scriptLanguage = eScriptNone;
1126 continue;
1129 // handle the end of Django template code
1130 else if (isDjango &&
1131 ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1132 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1133 isDjangoBlockEnd(ch, chNext, djangoBlockType)) {
1134 if (state == SCE_H_ASPAT) {
1135 aspScript = segIsScriptingIndicator(styler,
1136 styler.GetStartSegment(), i - 1, aspScript);
1138 if (state == SCE_HP_WORD) {
1139 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1140 } else {
1141 styler.ColourTo(i - 1, StateToPrint);
1143 i += 1;
1144 visibleChars += 1;
1145 styler.ColourTo(i, SCE_H_ASP);
1146 state = beforePreProc;
1147 if (inScriptType == eNonHtmlScriptPreProc)
1148 inScriptType = eNonHtmlScript;
1149 else
1150 inScriptType = eHtml;
1151 scriptLanguage = beforeLanguage;
1152 continue;
1155 // handle the end of a pre-processor = Non-HTML
1156 else if ((!isMako && !isDjango && ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1157 (((scriptLanguage != eScriptNone) && stateAllowsTermination(state))) &&
1158 (((ch == '%') || (ch == '?')) && (chNext == '>'))) ||
1159 ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
1160 if (state == SCE_H_ASPAT) {
1161 aspScript = segIsScriptingIndicator(styler,
1162 styler.GetStartSegment(), i - 1, aspScript);
1164 // Bounce out of any ASP mode
1165 switch (state) {
1166 case SCE_HJ_WORD:
1167 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1168 break;
1169 case SCE_HB_WORD:
1170 classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1171 break;
1172 case SCE_HP_WORD:
1173 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1174 break;
1175 case SCE_HPHP_WORD:
1176 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1177 break;
1178 case SCE_H_XCCOMMENT:
1179 styler.ColourTo(i - 1, state);
1180 break;
1181 default :
1182 styler.ColourTo(i - 1, StateToPrint);
1183 break;
1185 if (scriptLanguage != eScriptSGML) {
1186 i++;
1187 visibleChars++;
1189 if (ch == '%')
1190 styler.ColourTo(i, SCE_H_ASP);
1191 else if (scriptLanguage == eScriptXML)
1192 styler.ColourTo(i, SCE_H_XMLEND);
1193 else if (scriptLanguage == eScriptSGML)
1194 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1195 else
1196 styler.ColourTo(i, SCE_H_QUESTION);
1197 state = beforePreProc;
1198 if (inScriptType == eNonHtmlScriptPreProc)
1199 inScriptType = eNonHtmlScript;
1200 else
1201 inScriptType = eHtml;
1202 // Unfold all scripting languages, except for XML tag
1203 if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
1204 levelCurrent--;
1206 scriptLanguage = beforeLanguage;
1207 continue;
1209 /////////////////////////////////////
1211 switch (state) {
1212 case SCE_H_DEFAULT:
1213 if (ch == '<') {
1214 // in HTML, fold on tag open and unfold on tag close
1215 tagOpened = true;
1216 tagClosing = (chNext == '/');
1217 styler.ColourTo(i - 1, StateToPrint);
1218 if (chNext != '!')
1219 state = SCE_H_TAGUNKNOWN;
1220 } else if (ch == '&') {
1221 styler.ColourTo(i - 1, SCE_H_DEFAULT);
1222 state = SCE_H_ENTITY;
1224 break;
1225 case SCE_H_SGML_DEFAULT:
1226 case SCE_H_SGML_BLOCK_DEFAULT:
1227 // if (scriptLanguage == eScriptSGMLblock)
1228 // StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
1230 if (ch == '\"') {
1231 styler.ColourTo(i - 1, StateToPrint);
1232 state = SCE_H_SGML_DOUBLESTRING;
1233 } else if (ch == '\'') {
1234 styler.ColourTo(i - 1, StateToPrint);
1235 state = SCE_H_SGML_SIMPLESTRING;
1236 } else if ((ch == '-') && (chPrev == '-')) {
1237 if (static_cast<int>(styler.GetStartSegment()) <= (i - 2)) {
1238 styler.ColourTo(i - 2, StateToPrint);
1240 state = SCE_H_SGML_COMMENT;
1241 } else if (isascii(ch) && isalpha(ch) && (chPrev == '%')) {
1242 styler.ColourTo(i - 2, StateToPrint);
1243 state = SCE_H_SGML_ENTITY;
1244 } else if (ch == '#') {
1245 styler.ColourTo(i - 1, StateToPrint);
1246 state = SCE_H_SGML_SPECIAL;
1247 } else if (ch == '[') {
1248 styler.ColourTo(i - 1, StateToPrint);
1249 scriptLanguage = eScriptSGMLblock;
1250 state = SCE_H_SGML_BLOCK_DEFAULT;
1251 } else if (ch == ']') {
1252 if (scriptLanguage == eScriptSGMLblock) {
1253 styler.ColourTo(i, StateToPrint);
1254 scriptLanguage = eScriptSGML;
1255 } else {
1256 styler.ColourTo(i - 1, StateToPrint);
1257 styler.ColourTo(i, SCE_H_SGML_ERROR);
1259 state = SCE_H_SGML_DEFAULT;
1260 } else if (scriptLanguage == eScriptSGMLblock) {
1261 if ((ch == '!') && (chPrev == '<')) {
1262 styler.ColourTo(i - 2, StateToPrint);
1263 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1264 state = SCE_H_SGML_COMMAND;
1265 } else if (ch == '>') {
1266 styler.ColourTo(i - 1, StateToPrint);
1267 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1270 break;
1271 case SCE_H_SGML_COMMAND:
1272 if ((ch == '-') && (chPrev == '-')) {
1273 styler.ColourTo(i - 2, StateToPrint);
1274 state = SCE_H_SGML_COMMENT;
1275 } else if (!issgmlwordchar(ch)) {
1276 if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
1277 styler.ColourTo(i - 1, StateToPrint);
1278 state = SCE_H_SGML_1ST_PARAM;
1279 } else {
1280 state = SCE_H_SGML_ERROR;
1283 break;
1284 case SCE_H_SGML_1ST_PARAM:
1285 // wait for the beginning of the word
1286 if ((ch == '-') && (chPrev == '-')) {
1287 if (scriptLanguage == eScriptSGMLblock) {
1288 styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
1289 } else {
1290 styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
1292 state = SCE_H_SGML_1ST_PARAM_COMMENT;
1293 } else if (issgmlwordchar(ch)) {
1294 if (scriptLanguage == eScriptSGMLblock) {
1295 styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
1296 } else {
1297 styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
1299 // find the length of the word
1300 int size = 1;
1301 while (setHTMLWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(i + size))))
1302 size++;
1303 styler.ColourTo(i + size - 1, StateToPrint);
1304 i += size - 1;
1305 visibleChars += size - 1;
1306 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1307 if (scriptLanguage == eScriptSGMLblock) {
1308 state = SCE_H_SGML_BLOCK_DEFAULT;
1309 } else {
1310 state = SCE_H_SGML_DEFAULT;
1312 continue;
1314 break;
1315 case SCE_H_SGML_ERROR:
1316 if ((ch == '-') && (chPrev == '-')) {
1317 styler.ColourTo(i - 2, StateToPrint);
1318 state = SCE_H_SGML_COMMENT;
1320 case SCE_H_SGML_DOUBLESTRING:
1321 if (ch == '\"') {
1322 styler.ColourTo(i, StateToPrint);
1323 state = SCE_H_SGML_DEFAULT;
1325 break;
1326 case SCE_H_SGML_SIMPLESTRING:
1327 if (ch == '\'') {
1328 styler.ColourTo(i, StateToPrint);
1329 state = SCE_H_SGML_DEFAULT;
1331 break;
1332 case SCE_H_SGML_COMMENT:
1333 if ((ch == '-') && (chPrev == '-')) {
1334 styler.ColourTo(i, StateToPrint);
1335 state = SCE_H_SGML_DEFAULT;
1337 break;
1338 case SCE_H_CDATA:
1339 if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
1340 styler.ColourTo(i, StateToPrint);
1341 state = SCE_H_DEFAULT;
1342 levelCurrent--;
1344 break;
1345 case SCE_H_COMMENT:
1346 if ((scriptLanguage != eScriptComment) && (chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
1347 styler.ColourTo(i, StateToPrint);
1348 state = SCE_H_DEFAULT;
1349 levelCurrent--;
1351 break;
1352 case SCE_H_SGML_1ST_PARAM_COMMENT:
1353 if ((ch == '-') && (chPrev == '-')) {
1354 styler.ColourTo(i, SCE_H_SGML_COMMENT);
1355 state = SCE_H_SGML_1ST_PARAM;
1357 break;
1358 case SCE_H_SGML_SPECIAL:
1359 if (!(isascii(ch) && isupper(ch))) {
1360 styler.ColourTo(i - 1, StateToPrint);
1361 if (isalnum(ch)) {
1362 state = SCE_H_SGML_ERROR;
1363 } else {
1364 state = SCE_H_SGML_DEFAULT;
1367 break;
1368 case SCE_H_SGML_ENTITY:
1369 if (ch == ';') {
1370 styler.ColourTo(i, StateToPrint);
1371 state = SCE_H_SGML_DEFAULT;
1372 } else if (!(isascii(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
1373 styler.ColourTo(i, SCE_H_SGML_ERROR);
1374 state = SCE_H_SGML_DEFAULT;
1376 break;
1377 case SCE_H_ENTITY:
1378 if (ch == ';') {
1379 styler.ColourTo(i, StateToPrint);
1380 state = SCE_H_DEFAULT;
1382 if (ch != '#' && !(isascii(ch) && isalnum(ch)) // Should check that '#' follows '&', but it is unlikely anyway...
1383 && ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
1384 if (!isascii(ch)) // Possibly start of a multibyte character so don't allow this byte to be in entity style
1385 styler.ColourTo(i-1, SCE_H_TAGUNKNOWN);
1386 else
1387 styler.ColourTo(i, SCE_H_TAGUNKNOWN);
1388 state = SCE_H_DEFAULT;
1390 break;
1391 case SCE_H_TAGUNKNOWN:
1392 if (!setTagContinue.Contains(ch) && !((ch == '/') && (chPrev == '<'))) {
1393 int eClass = classifyTagHTML(styler.GetStartSegment(),
1394 i - 1, keywords, styler, tagDontFold, caseSensitive, isXml, allowScripts);
1395 if (eClass == SCE_H_SCRIPT || eClass == SCE_H_COMMENT) {
1396 if (!tagClosing) {
1397 inScriptType = eNonHtmlScript;
1398 scriptLanguage = eClass == SCE_H_SCRIPT ? clientScript : eScriptComment;
1399 } else {
1400 scriptLanguage = eScriptNone;
1402 eClass = SCE_H_TAG;
1404 if (ch == '>') {
1405 styler.ColourTo(i, eClass);
1406 if (inScriptType == eNonHtmlScript) {
1407 state = StateForScript(scriptLanguage);
1408 } else {
1409 state = SCE_H_DEFAULT;
1411 tagOpened = false;
1412 if (!tagDontFold) {
1413 if (tagClosing) {
1414 levelCurrent--;
1415 } else {
1416 levelCurrent++;
1419 tagClosing = false;
1420 } else if (ch == '/' && chNext == '>') {
1421 if (eClass == SCE_H_TAGUNKNOWN) {
1422 styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
1423 } else {
1424 styler.ColourTo(i - 1, StateToPrint);
1425 styler.ColourTo(i + 1, SCE_H_TAGEND);
1427 i++;
1428 ch = chNext;
1429 state = SCE_H_DEFAULT;
1430 tagOpened = false;
1431 } else {
1432 if (eClass != SCE_H_TAGUNKNOWN) {
1433 if (eClass == SCE_H_SGML_DEFAULT) {
1434 state = SCE_H_SGML_DEFAULT;
1435 } else {
1436 state = SCE_H_OTHER;
1441 break;
1442 case SCE_H_ATTRIBUTE:
1443 if (!setAttributeContinue.Contains(ch)) {
1444 if (inScriptType == eNonHtmlScript) {
1445 int scriptLanguagePrev = scriptLanguage;
1446 clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
1447 scriptLanguage = clientScript;
1448 if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
1449 inScriptType = eHtml;
1451 classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
1452 if (ch == '>') {
1453 styler.ColourTo(i, SCE_H_TAG);
1454 if (inScriptType == eNonHtmlScript) {
1455 state = StateForScript(scriptLanguage);
1456 } else {
1457 state = SCE_H_DEFAULT;
1459 tagOpened = false;
1460 if (!tagDontFold) {
1461 if (tagClosing) {
1462 levelCurrent--;
1463 } else {
1464 levelCurrent++;
1467 tagClosing = false;
1468 } else if (ch == '=') {
1469 styler.ColourTo(i, SCE_H_OTHER);
1470 state = SCE_H_VALUE;
1471 } else {
1472 state = SCE_H_OTHER;
1475 break;
1476 case SCE_H_OTHER:
1477 if (ch == '>') {
1478 styler.ColourTo(i - 1, StateToPrint);
1479 styler.ColourTo(i, SCE_H_TAG);
1480 if (inScriptType == eNonHtmlScript) {
1481 state = StateForScript(scriptLanguage);
1482 } else {
1483 state = SCE_H_DEFAULT;
1485 tagOpened = false;
1486 if (!tagDontFold) {
1487 if (tagClosing) {
1488 levelCurrent--;
1489 } else {
1490 levelCurrent++;
1493 tagClosing = false;
1494 } else if (ch == '\"') {
1495 styler.ColourTo(i - 1, StateToPrint);
1496 state = SCE_H_DOUBLESTRING;
1497 } else if (ch == '\'') {
1498 styler.ColourTo(i - 1, StateToPrint);
1499 state = SCE_H_SINGLESTRING;
1500 } else if (ch == '=') {
1501 styler.ColourTo(i, StateToPrint);
1502 state = SCE_H_VALUE;
1503 } else if (ch == '/' && chNext == '>') {
1504 styler.ColourTo(i - 1, StateToPrint);
1505 styler.ColourTo(i + 1, SCE_H_TAGEND);
1506 i++;
1507 ch = chNext;
1508 state = SCE_H_DEFAULT;
1509 tagOpened = false;
1510 } else if (ch == '?' && chNext == '>') {
1511 styler.ColourTo(i - 1, StateToPrint);
1512 styler.ColourTo(i + 1, SCE_H_XMLEND);
1513 i++;
1514 ch = chNext;
1515 state = SCE_H_DEFAULT;
1516 } else if (setHTMLWord.Contains(ch)) {
1517 styler.ColourTo(i - 1, StateToPrint);
1518 state = SCE_H_ATTRIBUTE;
1520 break;
1521 case SCE_H_DOUBLESTRING:
1522 if (ch == '\"') {
1523 if (inScriptType == eNonHtmlScript) {
1524 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1526 styler.ColourTo(i, SCE_H_DOUBLESTRING);
1527 state = SCE_H_OTHER;
1529 break;
1530 case SCE_H_SINGLESTRING:
1531 if (ch == '\'') {
1532 if (inScriptType == eNonHtmlScript) {
1533 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1535 styler.ColourTo(i, SCE_H_SINGLESTRING);
1536 state = SCE_H_OTHER;
1538 break;
1539 case SCE_H_VALUE:
1540 if (!setHTMLWord.Contains(ch)) {
1541 if (ch == '\"' && chPrev == '=') {
1542 // Should really test for being first character
1543 state = SCE_H_DOUBLESTRING;
1544 } else if (ch == '\'' && chPrev == '=') {
1545 state = SCE_H_SINGLESTRING;
1546 } else {
1547 if (IsNumber(styler.GetStartSegment(), styler)) {
1548 styler.ColourTo(i - 1, SCE_H_NUMBER);
1549 } else {
1550 styler.ColourTo(i - 1, StateToPrint);
1552 if (ch == '>') {
1553 styler.ColourTo(i, SCE_H_TAG);
1554 if (inScriptType == eNonHtmlScript) {
1555 state = StateForScript(scriptLanguage);
1556 } else {
1557 state = SCE_H_DEFAULT;
1559 tagOpened = false;
1560 if (!tagDontFold) {
1561 if (tagClosing) {
1562 levelCurrent--;
1563 } else {
1564 levelCurrent++;
1567 tagClosing = false;
1568 } else {
1569 state = SCE_H_OTHER;
1573 break;
1574 case SCE_HJ_DEFAULT:
1575 case SCE_HJ_START:
1576 case SCE_HJ_SYMBOLS:
1577 if (IsAWordStart(ch)) {
1578 styler.ColourTo(i - 1, StateToPrint);
1579 state = SCE_HJ_WORD;
1580 } else if (ch == '/' && chNext == '*') {
1581 styler.ColourTo(i - 1, StateToPrint);
1582 if (chNext2 == '*')
1583 state = SCE_HJ_COMMENTDOC;
1584 else
1585 state = SCE_HJ_COMMENT;
1586 } else if (ch == '/' && chNext == '/') {
1587 styler.ColourTo(i - 1, StateToPrint);
1588 state = SCE_HJ_COMMENTLINE;
1589 } else if (ch == '/' && isOKBeforeRE(chPrevNonWhite)) {
1590 styler.ColourTo(i - 1, StateToPrint);
1591 state = SCE_HJ_REGEX;
1592 } else if (ch == '\"') {
1593 styler.ColourTo(i - 1, StateToPrint);
1594 state = SCE_HJ_DOUBLESTRING;
1595 } else if (ch == '\'') {
1596 styler.ColourTo(i - 1, StateToPrint);
1597 state = SCE_HJ_SINGLESTRING;
1598 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1599 styler.SafeGetCharAt(i + 3) == '-') {
1600 styler.ColourTo(i - 1, StateToPrint);
1601 state = SCE_HJ_COMMENTLINE;
1602 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1603 styler.ColourTo(i - 1, StateToPrint);
1604 state = SCE_HJ_COMMENTLINE;
1605 i += 2;
1606 } else if (IsOperator(ch)) {
1607 styler.ColourTo(i - 1, StateToPrint);
1608 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1609 state = SCE_HJ_DEFAULT;
1610 } else if ((ch == ' ') || (ch == '\t')) {
1611 if (state == SCE_HJ_START) {
1612 styler.ColourTo(i - 1, StateToPrint);
1613 state = SCE_HJ_DEFAULT;
1616 break;
1617 case SCE_HJ_WORD:
1618 if (!IsAWordChar(ch)) {
1619 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1620 //styler.ColourTo(i - 1, eHTJSKeyword);
1621 state = SCE_HJ_DEFAULT;
1622 if (ch == '/' && chNext == '*') {
1623 if (chNext2 == '*')
1624 state = SCE_HJ_COMMENTDOC;
1625 else
1626 state = SCE_HJ_COMMENT;
1627 } else if (ch == '/' && chNext == '/') {
1628 state = SCE_HJ_COMMENTLINE;
1629 } else if (ch == '\"') {
1630 state = SCE_HJ_DOUBLESTRING;
1631 } else if (ch == '\'') {
1632 state = SCE_HJ_SINGLESTRING;
1633 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1634 styler.ColourTo(i - 1, StateToPrint);
1635 state = SCE_HJ_COMMENTLINE;
1636 i += 2;
1637 } else if (IsOperator(ch)) {
1638 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1639 state = SCE_HJ_DEFAULT;
1642 break;
1643 case SCE_HJ_COMMENT:
1644 case SCE_HJ_COMMENTDOC:
1645 if (ch == '/' && chPrev == '*') {
1646 styler.ColourTo(i, StateToPrint);
1647 state = SCE_HJ_DEFAULT;
1648 ch = ' ';
1650 break;
1651 case SCE_HJ_COMMENTLINE:
1652 if (ch == '\r' || ch == '\n') {
1653 styler.ColourTo(i - 1, statePrintForState(SCE_HJ_COMMENTLINE, inScriptType));
1654 state = SCE_HJ_DEFAULT;
1655 ch = ' ';
1657 break;
1658 case SCE_HJ_DOUBLESTRING:
1659 if (ch == '\\') {
1660 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1661 i++;
1663 } else if (ch == '\"') {
1664 styler.ColourTo(i, statePrintForState(SCE_HJ_DOUBLESTRING, inScriptType));
1665 state = SCE_HJ_DEFAULT;
1666 } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1667 styler.ColourTo(i - 1, StateToPrint);
1668 state = SCE_HJ_COMMENTLINE;
1669 i += 2;
1670 } else if (isLineEnd(ch)) {
1671 styler.ColourTo(i - 1, StateToPrint);
1672 state = SCE_HJ_STRINGEOL;
1674 break;
1675 case SCE_HJ_SINGLESTRING:
1676 if (ch == '\\') {
1677 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1678 i++;
1680 } else if (ch == '\'') {
1681 styler.ColourTo(i, statePrintForState(SCE_HJ_SINGLESTRING, inScriptType));
1682 state = SCE_HJ_DEFAULT;
1683 } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1684 styler.ColourTo(i - 1, StateToPrint);
1685 state = SCE_HJ_COMMENTLINE;
1686 i += 2;
1687 } else if (isLineEnd(ch)) {
1688 styler.ColourTo(i - 1, StateToPrint);
1689 if (chPrev != '\\' && (chPrev2 != '\\' || chPrev != '\r' || ch != '\n')) {
1690 state = SCE_HJ_STRINGEOL;
1693 break;
1694 case SCE_HJ_STRINGEOL:
1695 if (!isLineEnd(ch)) {
1696 styler.ColourTo(i - 1, StateToPrint);
1697 state = SCE_HJ_DEFAULT;
1698 } else if (!isLineEnd(chNext)) {
1699 styler.ColourTo(i, StateToPrint);
1700 state = SCE_HJ_DEFAULT;
1702 break;
1703 case SCE_HJ_REGEX:
1704 if (ch == '\r' || ch == '\n' || ch == '/') {
1705 if (ch == '/') {
1706 while (isascii(chNext) && islower(chNext)) { // gobble regex flags
1707 i++;
1708 ch = chNext;
1709 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1712 styler.ColourTo(i, StateToPrint);
1713 state = SCE_HJ_DEFAULT;
1714 } else if (ch == '\\') {
1715 // Gobble up the quoted character
1716 if (chNext == '\\' || chNext == '/') {
1717 i++;
1718 ch = chNext;
1719 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1722 break;
1723 case SCE_HB_DEFAULT:
1724 case SCE_HB_START:
1725 if (IsAWordStart(ch)) {
1726 styler.ColourTo(i - 1, StateToPrint);
1727 state = SCE_HB_WORD;
1728 } else if (ch == '\'') {
1729 styler.ColourTo(i - 1, StateToPrint);
1730 state = SCE_HB_COMMENTLINE;
1731 } else if (ch == '\"') {
1732 styler.ColourTo(i - 1, StateToPrint);
1733 state = SCE_HB_STRING;
1734 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1735 styler.SafeGetCharAt(i + 3) == '-') {
1736 styler.ColourTo(i - 1, StateToPrint);
1737 state = SCE_HB_COMMENTLINE;
1738 } else if (IsOperator(ch)) {
1739 styler.ColourTo(i - 1, StateToPrint);
1740 styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1741 state = SCE_HB_DEFAULT;
1742 } else if ((ch == ' ') || (ch == '\t')) {
1743 if (state == SCE_HB_START) {
1744 styler.ColourTo(i - 1, StateToPrint);
1745 state = SCE_HB_DEFAULT;
1748 break;
1749 case SCE_HB_WORD:
1750 if (!IsAWordChar(ch)) {
1751 state = classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1752 if (state == SCE_HB_DEFAULT) {
1753 if (ch == '\"') {
1754 state = SCE_HB_STRING;
1755 } else if (ch == '\'') {
1756 state = SCE_HB_COMMENTLINE;
1757 } else if (IsOperator(ch)) {
1758 styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1759 state = SCE_HB_DEFAULT;
1763 break;
1764 case SCE_HB_STRING:
1765 if (ch == '\"') {
1766 styler.ColourTo(i, StateToPrint);
1767 state = SCE_HB_DEFAULT;
1768 } else if (ch == '\r' || ch == '\n') {
1769 styler.ColourTo(i - 1, StateToPrint);
1770 state = SCE_HB_STRINGEOL;
1772 break;
1773 case SCE_HB_COMMENTLINE:
1774 if (ch == '\r' || ch == '\n') {
1775 styler.ColourTo(i - 1, StateToPrint);
1776 state = SCE_HB_DEFAULT;
1778 break;
1779 case SCE_HB_STRINGEOL:
1780 if (!isLineEnd(ch)) {
1781 styler.ColourTo(i - 1, StateToPrint);
1782 state = SCE_HB_DEFAULT;
1783 } else if (!isLineEnd(chNext)) {
1784 styler.ColourTo(i, StateToPrint);
1785 state = SCE_HB_DEFAULT;
1787 break;
1788 case SCE_HP_DEFAULT:
1789 case SCE_HP_START:
1790 if (IsAWordStart(ch)) {
1791 styler.ColourTo(i - 1, StateToPrint);
1792 state = SCE_HP_WORD;
1793 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1794 styler.SafeGetCharAt(i + 3) == '-') {
1795 styler.ColourTo(i - 1, StateToPrint);
1796 state = SCE_HP_COMMENTLINE;
1797 } else if (ch == '#') {
1798 styler.ColourTo(i - 1, StateToPrint);
1799 state = SCE_HP_COMMENTLINE;
1800 } else if (ch == '\"') {
1801 styler.ColourTo(i - 1, StateToPrint);
1802 if (chNext == '\"' && chNext2 == '\"') {
1803 i += 2;
1804 state = SCE_HP_TRIPLEDOUBLE;
1805 ch = ' ';
1806 chPrev = ' ';
1807 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1808 } else {
1809 // state = statePrintForState(SCE_HP_STRING,inScriptType);
1810 state = SCE_HP_STRING;
1812 } else if (ch == '\'') {
1813 styler.ColourTo(i - 1, StateToPrint);
1814 if (chNext == '\'' && chNext2 == '\'') {
1815 i += 2;
1816 state = SCE_HP_TRIPLE;
1817 ch = ' ';
1818 chPrev = ' ';
1819 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1820 } else {
1821 state = SCE_HP_CHARACTER;
1823 } else if (IsOperator(ch)) {
1824 styler.ColourTo(i - 1, StateToPrint);
1825 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1826 } else if ((ch == ' ') || (ch == '\t')) {
1827 if (state == SCE_HP_START) {
1828 styler.ColourTo(i - 1, StateToPrint);
1829 state = SCE_HP_DEFAULT;
1832 break;
1833 case SCE_HP_WORD:
1834 if (!IsAWordChar(ch)) {
1835 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1836 state = SCE_HP_DEFAULT;
1837 if (ch == '#') {
1838 state = SCE_HP_COMMENTLINE;
1839 } else if (ch == '\"') {
1840 if (chNext == '\"' && chNext2 == '\"') {
1841 i += 2;
1842 state = SCE_HP_TRIPLEDOUBLE;
1843 ch = ' ';
1844 chPrev = ' ';
1845 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1846 } else {
1847 state = SCE_HP_STRING;
1849 } else if (ch == '\'') {
1850 if (chNext == '\'' && chNext2 == '\'') {
1851 i += 2;
1852 state = SCE_HP_TRIPLE;
1853 ch = ' ';
1854 chPrev = ' ';
1855 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1856 } else {
1857 state = SCE_HP_CHARACTER;
1859 } else if (IsOperator(ch)) {
1860 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1863 break;
1864 case SCE_HP_COMMENTLINE:
1865 if (ch == '\r' || ch == '\n') {
1866 styler.ColourTo(i - 1, StateToPrint);
1867 state = SCE_HP_DEFAULT;
1869 break;
1870 case SCE_HP_STRING:
1871 if (ch == '\\') {
1872 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1873 i++;
1874 ch = chNext;
1875 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1877 } else if (ch == '\"') {
1878 styler.ColourTo(i, StateToPrint);
1879 state = SCE_HP_DEFAULT;
1881 break;
1882 case SCE_HP_CHARACTER:
1883 if (ch == '\\') {
1884 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1885 i++;
1886 ch = chNext;
1887 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1889 } else if (ch == '\'') {
1890 styler.ColourTo(i, StateToPrint);
1891 state = SCE_HP_DEFAULT;
1893 break;
1894 case SCE_HP_TRIPLE:
1895 if (ch == '\'' && chPrev == '\'' && chPrev2 == '\'') {
1896 styler.ColourTo(i, StateToPrint);
1897 state = SCE_HP_DEFAULT;
1899 break;
1900 case SCE_HP_TRIPLEDOUBLE:
1901 if (ch == '\"' && chPrev == '\"' && chPrev2 == '\"') {
1902 styler.ColourTo(i, StateToPrint);
1903 state = SCE_HP_DEFAULT;
1905 break;
1906 ///////////// start - PHP state handling
1907 case SCE_HPHP_WORD:
1908 if (!IsAWordChar(ch)) {
1909 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1910 if (ch == '/' && chNext == '*') {
1911 i++;
1912 state = SCE_HPHP_COMMENT;
1913 } else if (ch == '/' && chNext == '/') {
1914 i++;
1915 state = SCE_HPHP_COMMENTLINE;
1916 } else if (ch == '#') {
1917 state = SCE_HPHP_COMMENTLINE;
1918 } else if (ch == '\"') {
1919 state = SCE_HPHP_HSTRING;
1920 strcpy(phpStringDelimiter, "\"");
1921 } else if (styler.Match(i, "<<<")) {
1922 bool isSimpleString = false;
1923 i = FindPhpStringDelimiter(phpStringDelimiter, sizeof(phpStringDelimiter), i + 3, lengthDoc, styler, isSimpleString);
1924 if (strlen(phpStringDelimiter)) {
1925 state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING);
1926 if (foldHeredoc) levelCurrent++;
1928 } else if (ch == '\'') {
1929 state = SCE_HPHP_SIMPLESTRING;
1930 strcpy(phpStringDelimiter, "\'");
1931 } else if (ch == '$' && IsPhpWordStart(chNext)) {
1932 state = SCE_HPHP_VARIABLE;
1933 } else if (IsOperator(ch)) {
1934 state = SCE_HPHP_OPERATOR;
1935 } else {
1936 state = SCE_HPHP_DEFAULT;
1939 break;
1940 case SCE_HPHP_NUMBER:
1941 // recognize bases 8,10 or 16 integers OR floating-point numbers
1942 if (!IsADigit(ch)
1943 && strchr(".xXabcdefABCDEF", ch) == NULL
1944 && ((ch != '-' && ch != '+') || (chPrev != 'e' && chPrev != 'E'))) {
1945 styler.ColourTo(i - 1, SCE_HPHP_NUMBER);
1946 if (IsOperator(ch))
1947 state = SCE_HPHP_OPERATOR;
1948 else
1949 state = SCE_HPHP_DEFAULT;
1951 break;
1952 case SCE_HPHP_VARIABLE:
1953 if (!IsPhpWordChar(chNext)) {
1954 styler.ColourTo(i, SCE_HPHP_VARIABLE);
1955 state = SCE_HPHP_DEFAULT;
1957 break;
1958 case SCE_HPHP_COMMENT:
1959 if (ch == '/' && chPrev == '*') {
1960 styler.ColourTo(i, StateToPrint);
1961 state = SCE_HPHP_DEFAULT;
1963 break;
1964 case SCE_HPHP_COMMENTLINE:
1965 if (ch == '\r' || ch == '\n') {
1966 styler.ColourTo(i - 1, StateToPrint);
1967 state = SCE_HPHP_DEFAULT;
1969 break;
1970 case SCE_HPHP_HSTRING:
1971 if (ch == '\\' && (phpStringDelimiter[0] == '\"' || chNext == '$' || chNext == '{')) {
1972 // skip the next char
1973 i++;
1974 } else if (((ch == '{' && chNext == '$') || (ch == '$' && chNext == '{'))
1975 && IsPhpWordStart(chNext2)) {
1976 styler.ColourTo(i - 1, StateToPrint);
1977 state = SCE_HPHP_COMPLEX_VARIABLE;
1978 } else if (ch == '$' && IsPhpWordStart(chNext)) {
1979 styler.ColourTo(i - 1, StateToPrint);
1980 state = SCE_HPHP_HSTRING_VARIABLE;
1981 } else if (styler.Match(i, phpStringDelimiter)) {
1982 if (phpStringDelimiter[0] == '\"') {
1983 styler.ColourTo(i, StateToPrint);
1984 state = SCE_HPHP_DEFAULT;
1985 } else if (isLineEnd(chPrev)) {
1986 const int psdLength = static_cast<int>(strlen(phpStringDelimiter));
1987 const char chAfterPsd = styler.SafeGetCharAt(i + psdLength);
1988 const char chAfterPsd2 = styler.SafeGetCharAt(i + psdLength + 1);
1989 if (isLineEnd(chAfterPsd) ||
1990 (chAfterPsd == ';' && isLineEnd(chAfterPsd2))) {
1991 i += (((i + psdLength) < lengthDoc) ? psdLength : lengthDoc) - 1;
1992 styler.ColourTo(i, StateToPrint);
1993 state = SCE_HPHP_DEFAULT;
1994 if (foldHeredoc) levelCurrent--;
1998 break;
1999 case SCE_HPHP_SIMPLESTRING:
2000 if (phpStringDelimiter[0] == '\'') {
2001 if (ch == '\\') {
2002 // skip the next char
2003 i++;
2004 } else if (ch == '\'') {
2005 styler.ColourTo(i, StateToPrint);
2006 state = SCE_HPHP_DEFAULT;
2008 } else if (isLineEnd(chPrev) && styler.Match(i, phpStringDelimiter)) {
2009 const int psdLength = static_cast<int>(strlen(phpStringDelimiter));
2010 const char chAfterPsd = styler.SafeGetCharAt(i + psdLength);
2011 const char chAfterPsd2 = styler.SafeGetCharAt(i + psdLength + 1);
2012 if (isLineEnd(chAfterPsd) ||
2013 (chAfterPsd == ';' && isLineEnd(chAfterPsd2))) {
2014 i += (((i + psdLength) < lengthDoc) ? psdLength : lengthDoc) - 1;
2015 styler.ColourTo(i, StateToPrint);
2016 state = SCE_HPHP_DEFAULT;
2017 if (foldHeredoc) levelCurrent--;
2020 break;
2021 case SCE_HPHP_HSTRING_VARIABLE:
2022 if (!IsPhpWordChar(chNext)) {
2023 styler.ColourTo(i, StateToPrint);
2024 state = SCE_HPHP_HSTRING;
2026 break;
2027 case SCE_HPHP_COMPLEX_VARIABLE:
2028 if (ch == '}') {
2029 styler.ColourTo(i, StateToPrint);
2030 state = SCE_HPHP_HSTRING;
2032 break;
2033 case SCE_HPHP_OPERATOR:
2034 case SCE_HPHP_DEFAULT:
2035 styler.ColourTo(i - 1, StateToPrint);
2036 if (IsADigit(ch) || (ch == '.' && IsADigit(chNext))) {
2037 state = SCE_HPHP_NUMBER;
2038 } else if (IsAWordStart(ch)) {
2039 state = SCE_HPHP_WORD;
2040 } else if (ch == '/' && chNext == '*') {
2041 i++;
2042 state = SCE_HPHP_COMMENT;
2043 } else if (ch == '/' && chNext == '/') {
2044 i++;
2045 state = SCE_HPHP_COMMENTLINE;
2046 } else if (ch == '#') {
2047 state = SCE_HPHP_COMMENTLINE;
2048 } else if (ch == '\"') {
2049 state = SCE_HPHP_HSTRING;
2050 strcpy(phpStringDelimiter, "\"");
2051 } else if (styler.Match(i, "<<<")) {
2052 bool isSimpleString = false;
2053 i = FindPhpStringDelimiter(phpStringDelimiter, sizeof(phpStringDelimiter), i + 3, lengthDoc, styler, isSimpleString);
2054 if (strlen(phpStringDelimiter)) {
2055 state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING);
2056 if (foldHeredoc) levelCurrent++;
2058 } else if (ch == '\'') {
2059 state = SCE_HPHP_SIMPLESTRING;
2060 strcpy(phpStringDelimiter, "\'");
2061 } else if (ch == '$' && IsPhpWordStart(chNext)) {
2062 state = SCE_HPHP_VARIABLE;
2063 } else if (IsOperator(ch)) {
2064 state = SCE_HPHP_OPERATOR;
2065 } else if ((state == SCE_HPHP_OPERATOR) && (IsASpace(ch))) {
2066 state = SCE_HPHP_DEFAULT;
2068 break;
2069 ///////////// end - PHP state handling
2072 // Some of the above terminated their lexeme but since the same character starts
2073 // the same class again, only reenter if non empty segment.
2075 bool nonEmptySegment = i >= static_cast<int>(styler.GetStartSegment());
2076 if (state == SCE_HB_DEFAULT) { // One of the above succeeded
2077 if ((ch == '\"') && (nonEmptySegment)) {
2078 state = SCE_HB_STRING;
2079 } else if (ch == '\'') {
2080 state = SCE_HB_COMMENTLINE;
2081 } else if (IsAWordStart(ch)) {
2082 state = SCE_HB_WORD;
2083 } else if (IsOperator(ch)) {
2084 styler.ColourTo(i, SCE_HB_DEFAULT);
2086 } else if (state == SCE_HBA_DEFAULT) { // One of the above succeeded
2087 if ((ch == '\"') && (nonEmptySegment)) {
2088 state = SCE_HBA_STRING;
2089 } else if (ch == '\'') {
2090 state = SCE_HBA_COMMENTLINE;
2091 } else if (IsAWordStart(ch)) {
2092 state = SCE_HBA_WORD;
2093 } else if (IsOperator(ch)) {
2094 styler.ColourTo(i, SCE_HBA_DEFAULT);
2096 } else if (state == SCE_HJ_DEFAULT) { // One of the above succeeded
2097 if (ch == '/' && chNext == '*') {
2098 if (styler.SafeGetCharAt(i + 2) == '*')
2099 state = SCE_HJ_COMMENTDOC;
2100 else
2101 state = SCE_HJ_COMMENT;
2102 } else if (ch == '/' && chNext == '/') {
2103 state = SCE_HJ_COMMENTLINE;
2104 } else if ((ch == '\"') && (nonEmptySegment)) {
2105 state = SCE_HJ_DOUBLESTRING;
2106 } else if ((ch == '\'') && (nonEmptySegment)) {
2107 state = SCE_HJ_SINGLESTRING;
2108 } else if (IsAWordStart(ch)) {
2109 state = SCE_HJ_WORD;
2110 } else if (IsOperator(ch)) {
2111 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
2116 switch (state) {
2117 case SCE_HJ_WORD:
2118 classifyWordHTJS(styler.GetStartSegment(), lengthDoc - 1, keywords2, styler, inScriptType);
2119 break;
2120 case SCE_HB_WORD:
2121 classifyWordHTVB(styler.GetStartSegment(), lengthDoc - 1, keywords3, styler, inScriptType);
2122 break;
2123 case SCE_HP_WORD:
2124 classifyWordHTPy(styler.GetStartSegment(), lengthDoc - 1, keywords4, styler, prevWord, inScriptType, isMako);
2125 break;
2126 case SCE_HPHP_WORD:
2127 classifyWordHTPHP(styler.GetStartSegment(), lengthDoc - 1, keywords5, styler);
2128 break;
2129 default:
2130 StateToPrint = statePrintForState(state, inScriptType);
2131 styler.ColourTo(lengthDoc - 1, StateToPrint);
2132 break;
2135 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
2136 if (fold) {
2137 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
2138 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
2142 static void ColouriseXMLDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
2143 Accessor &styler) {
2144 // Passing in true because we're lexing XML
2145 ColouriseHyperTextDoc(startPos, length, initStyle, keywordlists, styler, true);
2148 static void ColouriseHTMLDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
2149 Accessor &styler) {
2150 // Passing in false because we're notlexing XML
2151 ColouriseHyperTextDoc(startPos, length, initStyle, keywordlists, styler, false);
2154 static void ColourisePHPScriptDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
2155 Accessor &styler) {
2156 if (startPos == 0)
2157 initStyle = SCE_HPHP_DEFAULT;
2158 ColouriseHTMLDoc(startPos, length, initStyle, keywordlists, styler);
2161 static const char * const htmlWordListDesc[] = {
2162 "HTML elements and attributes",
2163 "JavaScript keywords",
2164 "VBScript keywords",
2165 "Python keywords",
2166 "PHP keywords",
2167 "SGML and DTD keywords",
2171 static const char * const phpscriptWordListDesc[] = {
2172 "", //Unused
2173 "", //Unused
2174 "", //Unused
2175 "", //Unused
2176 "PHP keywords",
2177 "", //Unused
2181 LexerModule lmHTML(SCLEX_HTML, ColouriseHTMLDoc, "hypertext", 0, htmlWordListDesc, 8);
2182 LexerModule lmXML(SCLEX_XML, ColouriseXMLDoc, "xml", 0, htmlWordListDesc, 8);
2183 LexerModule lmPHPSCRIPT(SCLEX_PHPSCRIPT, ColourisePHPScriptDoc, "phpscript", 0, phpscriptWordListDesc, 8);