scintilla: Update scintilla with changeset 3662:1d1c06df8a2f using gtk+3
[anjuta-extras.git] / plugins / scintilla / scintilla / LexHTML.cxx
blob4c50a15a113e26c9d7a56e802c2833723da21bdf
1 // Scintilla source code edit control
2 /** @file LexHTML.cxx
3 ** Lexer for HTML.
4 **/
5 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <stdarg.h>
12 #include <assert.h>
13 #include <ctype.h>
15 #include "ILexer.h"
16 #include "Scintilla.h"
17 #include "SciLexer.h"
19 #include "WordList.h"
20 #include "LexAccessor.h"
21 #include "Accessor.h"
22 #include "StyleContext.h"
23 #include "CharacterSet.h"
24 #include "LexerModule.h"
26 #ifdef SCI_NAMESPACE
27 using namespace Scintilla;
28 #endif
30 #define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
31 #define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
32 #define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)
34 enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock, eScriptComment };
35 enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
37 static inline bool IsAWordChar(const int ch) {
38 return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
41 static inline bool IsAWordStart(const int ch) {
42 return (ch < 0x80) && (isalnum(ch) || ch == '_');
45 inline bool IsOperator(int ch) {
46 if (isascii(ch) && isalnum(ch))
47 return false;
48 // '.' left out as it is used to make up numbers
49 if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
50 ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
51 ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
52 ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
53 ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
54 ch == '?' || ch == '!' || ch == '.' || ch == '~')
55 return true;
56 return false;
59 static void GetTextSegment(Accessor &styler, unsigned int start, unsigned int end, char *s, size_t len) {
60 size_t i = 0;
61 for (; (i < end - start + 1) && (i < len-1); i++) {
62 s[i] = static_cast<char>(MakeLowerCase(styler[start + i]));
64 s[i] = '\0';
67 static const char *GetNextWord(Accessor &styler, unsigned int start, char *s, size_t sLen) {
69 size_t i = 0;
70 for (; i < sLen-1; i++) {
71 char ch = static_cast<char>(styler.SafeGetCharAt(start + i));
72 if ((i == 0) && !IsAWordStart(ch))
73 break;
74 if ((i > 0) && !IsAWordChar(ch))
75 break;
76 s[i] = ch;
78 s[i] = '\0';
80 return s;
83 static script_type segIsScriptingIndicator(Accessor &styler, unsigned int start, unsigned int end, script_type prevValue) {
84 char s[100];
85 GetTextSegment(styler, start, end, s, sizeof(s));
86 //Platform::DebugPrintf("Scripting indicator [%s]\n", s);
87 if (strstr(s, "src")) // External script
88 return eScriptNone;
89 if (strstr(s, "vbs"))
90 return eScriptVBS;
91 if (strstr(s, "pyth"))
92 return eScriptPython;
93 if (strstr(s, "javas"))
94 return eScriptJS;
95 if (strstr(s, "jscr"))
96 return eScriptJS;
97 if (strstr(s, "php"))
98 return eScriptPHP;
99 if (strstr(s, "xml")) {
100 const char *xml = strstr(s, "xml");
101 for (const char *t=s; t<xml; t++) {
102 if (!IsASpace(*t)) {
103 return prevValue;
106 return eScriptXML;
109 return prevValue;
112 static int PrintScriptingIndicatorOffset(Accessor &styler, unsigned int start, unsigned int end) {
113 int iResult = 0;
114 char s[100];
115 GetTextSegment(styler, start, end, s, sizeof(s));
116 if (0 == strncmp(s, "php", 3)) {
117 iResult = 3;
120 return iResult;
123 static script_type ScriptOfState(int state) {
124 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
125 return eScriptPython;
126 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
127 return eScriptVBS;
128 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
129 return eScriptJS;
130 } else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
131 return eScriptPHP;
132 } else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
133 return eScriptSGML;
134 } else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
135 return eScriptSGMLblock;
136 } else {
137 return eScriptNone;
141 static int statePrintForState(int state, script_mode inScriptType) {
142 int StateToPrint = state;
144 if (state >= SCE_HJ_START) {
145 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
146 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
147 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
148 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
149 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
150 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
154 return StateToPrint;
157 static int stateForPrintState(int StateToPrint) {
158 int state;
160 if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
161 state = StateToPrint - SCE_HA_PYTHON;
162 } else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
163 state = StateToPrint - SCE_HA_VBS;
164 } else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
165 state = StateToPrint - SCE_HA_JS;
166 } else {
167 state = StateToPrint;
170 return state;
173 static inline bool IsNumber(unsigned int start, Accessor &styler) {
174 return IsADigit(styler[start]) || (styler[start] == '.') ||
175 (styler[start] == '-') || (styler[start] == '#');
178 static inline bool isStringState(int state) {
179 bool bResult;
181 switch (state) {
182 case SCE_HJ_DOUBLESTRING:
183 case SCE_HJ_SINGLESTRING:
184 case SCE_HJA_DOUBLESTRING:
185 case SCE_HJA_SINGLESTRING:
186 case SCE_HB_STRING:
187 case SCE_HBA_STRING:
188 case SCE_HP_STRING:
189 case SCE_HP_CHARACTER:
190 case SCE_HP_TRIPLE:
191 case SCE_HP_TRIPLEDOUBLE:
192 case SCE_HPA_STRING:
193 case SCE_HPA_CHARACTER:
194 case SCE_HPA_TRIPLE:
195 case SCE_HPA_TRIPLEDOUBLE:
196 case SCE_HPHP_HSTRING:
197 case SCE_HPHP_SIMPLESTRING:
198 case SCE_HPHP_HSTRING_VARIABLE:
199 case SCE_HPHP_COMPLEX_VARIABLE:
200 bResult = true;
201 break;
202 default :
203 bResult = false;
204 break;
206 return bResult;
209 static inline bool stateAllowsTermination(int state) {
210 bool allowTermination = !isStringState(state);
211 if (allowTermination) {
212 switch (state) {
213 case SCE_HB_COMMENTLINE:
214 case SCE_HPHP_COMMENT:
215 case SCE_HP_COMMENTLINE:
216 case SCE_HPA_COMMENTLINE:
217 allowTermination = false;
220 return allowTermination;
223 // not really well done, since it's only comments that should lex the %> and <%
224 static inline bool isCommentASPState(int state) {
225 bool bResult;
227 switch (state) {
228 case SCE_HJ_COMMENT:
229 case SCE_HJ_COMMENTLINE:
230 case SCE_HJ_COMMENTDOC:
231 case SCE_HB_COMMENTLINE:
232 case SCE_HP_COMMENTLINE:
233 case SCE_HPHP_COMMENT:
234 case SCE_HPHP_COMMENTLINE:
235 bResult = true;
236 break;
237 default :
238 bResult = false;
239 break;
241 return bResult;
244 static void classifyAttribHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
245 bool wordIsNumber = IsNumber(start, styler);
246 char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
247 if (wordIsNumber) {
248 chAttr = SCE_H_NUMBER;
249 } else {
250 char s[100];
251 GetTextSegment(styler, start, end, s, sizeof(s));
252 if (keywords.InList(s))
253 chAttr = SCE_H_ATTRIBUTE;
255 if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
256 // No keywords -> all are known
257 chAttr = SCE_H_ATTRIBUTE;
258 styler.ColourTo(end, chAttr);
261 static int classifyTagHTML(unsigned int start, unsigned int end,
262 WordList &keywords, Accessor &styler, bool &tagDontFold,
263 bool caseSensitive, bool isXml, bool allowScripts) {
264 char s[30 + 2];
265 // Copy after the '<'
266 unsigned int i = 0;
267 for (unsigned int cPos = start; cPos <= end && i < 30; cPos++) {
268 char ch = styler[cPos];
269 if ((ch != '<') && (ch != '/')) {
270 s[i++] = caseSensitive ? ch : static_cast<char>(MakeLowerCase(ch));
274 //The following is only a quick hack, to see if this whole thing would work
275 //we first need the tagname with a trailing space...
276 s[i] = ' ';
277 s[i+1] = '\0';
279 // if the current language is XML, I can fold any tag
280 // if the current language is HTML, I don't want to fold certain tags (input, meta, etc.)
281 //...to find it in the list of no-container-tags
282 tagDontFold = (!isXml) && (NULL != strstr("meta link img area br hr input ", s));
284 //now we can remove the trailing space
285 s[i] = '\0';
287 // No keywords -> all are known
288 char chAttr = SCE_H_TAGUNKNOWN;
289 if (s[0] == '!') {
290 chAttr = SCE_H_SGML_DEFAULT;
291 } else if (!keywords || keywords.InList(s)) {
292 chAttr = SCE_H_TAG;
294 styler.ColourTo(end, chAttr);
295 if (chAttr == SCE_H_TAG) {
296 if (allowScripts && 0 == strcmp(s, "script")) {
297 // check to see if this is a self-closing tag by sniffing ahead
298 bool isSelfClose = false;
299 for (unsigned int cPos = end; cPos <= end + 100; cPos++) {
300 char ch = styler.SafeGetCharAt(cPos, '\0');
301 if (ch == '\0' || ch == '>')
302 break;
303 else if (ch == '/' && styler.SafeGetCharAt(cPos + 1, '\0') == '>') {
304 isSelfClose = true;
305 break;
309 // do not enter a script state if the tag self-closed
310 if (!isSelfClose)
311 chAttr = SCE_H_SCRIPT;
312 } else if (!isXml && 0 == strcmp(s, "comment")) {
313 chAttr = SCE_H_COMMENT;
316 return chAttr;
319 static void classifyWordHTJS(unsigned int start, unsigned int end,
320 WordList &keywords, Accessor &styler, script_mode inScriptType) {
321 char s[30 + 1];
322 unsigned int i = 0;
323 for (; i < end - start + 1 && i < 30; i++) {
324 s[i] = styler[start + i];
326 s[i] = '\0';
328 char chAttr = SCE_HJ_WORD;
329 bool wordIsNumber = IsADigit(s[0]) || ((s[0] == '.') && IsADigit(s[1]));
330 if (wordIsNumber) {
331 chAttr = SCE_HJ_NUMBER;
332 } else if (keywords.InList(s)) {
333 chAttr = SCE_HJ_KEYWORD;
335 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
338 static int classifyWordHTVB(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, script_mode inScriptType) {
339 char chAttr = SCE_HB_IDENTIFIER;
340 bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
341 if (wordIsNumber)
342 chAttr = SCE_HB_NUMBER;
343 else {
344 char s[100];
345 GetTextSegment(styler, start, end, s, sizeof(s));
346 if (keywords.InList(s)) {
347 chAttr = SCE_HB_WORD;
348 if (strcmp(s, "rem") == 0)
349 chAttr = SCE_HB_COMMENTLINE;
352 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
353 if (chAttr == SCE_HB_COMMENTLINE)
354 return SCE_HB_COMMENTLINE;
355 else
356 return SCE_HB_DEFAULT;
359 static void classifyWordHTPy(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord, script_mode inScriptType) {
360 bool wordIsNumber = IsADigit(styler[start]);
361 char s[30 + 1];
362 unsigned int i = 0;
363 for (; i < end - start + 1 && i < 30; i++) {
364 s[i] = styler[start + i];
366 s[i] = '\0';
367 char chAttr = SCE_HP_IDENTIFIER;
368 if (0 == strcmp(prevWord, "class"))
369 chAttr = SCE_HP_CLASSNAME;
370 else if (0 == strcmp(prevWord, "def"))
371 chAttr = SCE_HP_DEFNAME;
372 else if (wordIsNumber)
373 chAttr = SCE_HP_NUMBER;
374 else if (keywords.InList(s))
375 chAttr = SCE_HP_WORD;
376 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
377 strcpy(prevWord, s);
380 // Update the word colour to default or keyword
381 // Called when in a PHP word
382 static void classifyWordHTPHP(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
383 char chAttr = SCE_HPHP_DEFAULT;
384 bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
385 if (wordIsNumber)
386 chAttr = SCE_HPHP_NUMBER;
387 else {
388 char s[100];
389 GetTextSegment(styler, start, end, s, sizeof(s));
390 if (keywords.InList(s))
391 chAttr = SCE_HPHP_WORD;
393 styler.ColourTo(end, chAttr);
396 static bool isWordHSGML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
397 char s[30 + 1];
398 unsigned int i = 0;
399 for (; i < end - start + 1 && i < 30; i++) {
400 s[i] = styler[start + i];
402 s[i] = '\0';
403 return keywords.InList(s);
406 static bool isWordCdata(unsigned int start, unsigned int end, Accessor &styler) {
407 char s[30 + 1];
408 unsigned int i = 0;
409 for (; i < end - start + 1 && i < 30; i++) {
410 s[i] = styler[start + i];
412 s[i] = '\0';
413 return (0 == strcmp(s, "[CDATA["));
416 // Return the first state to reach when entering a scripting language
417 static int StateForScript(script_type scriptLanguage) {
418 int Result;
419 switch (scriptLanguage) {
420 case eScriptVBS:
421 Result = SCE_HB_START;
422 break;
423 case eScriptPython:
424 Result = SCE_HP_START;
425 break;
426 case eScriptPHP:
427 Result = SCE_HPHP_DEFAULT;
428 break;
429 case eScriptXML:
430 Result = SCE_H_TAGUNKNOWN;
431 break;
432 case eScriptSGML:
433 Result = SCE_H_SGML_DEFAULT;
434 break;
435 case eScriptComment:
436 Result = SCE_H_COMMENT;
437 break;
438 default :
439 Result = SCE_HJ_START;
440 break;
442 return Result;
445 static inline bool ishtmlwordchar(int ch) {
446 return !isascii(ch) ||
447 (isalnum(ch) || ch == '.' || ch == '-' || ch == '_' || ch == ':' || ch == '!' || ch == '#');
450 static inline bool issgmlwordchar(int ch) {
451 return !isascii(ch) ||
452 (isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
455 static inline bool IsPhpWordStart(int ch) {
456 return (isascii(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
459 static inline bool IsPhpWordChar(int ch) {
460 return IsADigit(ch) || IsPhpWordStart(ch);
463 static bool InTagState(int state) {
464 return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
465 state == SCE_H_SCRIPT ||
466 state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
467 state == SCE_H_NUMBER || state == SCE_H_OTHER ||
468 state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
471 static bool IsCommentState(const int state) {
472 return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
475 static bool IsScriptCommentState(const int state) {
476 return state == SCE_HJ_COMMENT || state == SCE_HJ_COMMENTLINE || state == SCE_HJA_COMMENT ||
477 state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
480 static bool isLineEnd(int ch) {
481 return ch == '\r' || ch == '\n';
484 static bool isOKBeforeRE(int ch) {
485 return (ch == '(') || (ch == '=') || (ch == ',');
488 static bool isMakoBlockEnd(const int ch, const int chNext, const char *blockType) {
489 if (strlen(blockType) == 0) {
490 return ((ch == '%') && (chNext == '>'));
491 } else if ((0 == strcmp(blockType, "inherit")) ||
492 (0 == strcmp(blockType, "namespace")) ||
493 (0 == strcmp(blockType, "include")) ||
494 (0 == strcmp(blockType, "page"))) {
495 return ((ch == '/') && (chNext == '>'));
496 } else if (0 == strcmp(blockType, "%")) {
497 return isLineEnd(ch);
498 } else if (0 == strcmp(blockType, "{")) {
499 return ch == '}';
500 } else {
501 return (ch == '>');
505 static bool isDjangoBlockEnd(const int ch, const int chNext, const char *blockType) {
506 if (strlen(blockType) == 0) {
507 return 0;
508 } else if (0 == strcmp(blockType, "%")) {
509 return ((ch == '%') && (chNext == '}'));
510 } else if (0 == strcmp(blockType, "{")) {
511 return ((ch == '}') && (chNext == '}'));
512 } else {
513 return 0;
517 static bool isPHPStringState(int state) {
518 return
519 (state == SCE_HPHP_HSTRING) ||
520 (state == SCE_HPHP_SIMPLESTRING) ||
521 (state == SCE_HPHP_HSTRING_VARIABLE) ||
522 (state == SCE_HPHP_COMPLEX_VARIABLE);
525 static int FindPhpStringDelimiter(char *phpStringDelimiter, const int phpStringDelimiterSize, int i, const int lengthDoc, Accessor &styler, bool &isSimpleString) {
526 int j;
527 const int beginning = i - 1;
528 bool isValidSimpleString = false;
530 while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t'))
531 i++;
533 char ch = styler.SafeGetCharAt(i);
534 const char chNext = styler.SafeGetCharAt(i + 1);
535 if (!IsPhpWordStart(ch)) {
536 if (ch == '\'' && IsPhpWordStart(chNext)) {
537 i++;
538 ch = chNext;
539 isSimpleString = true;
540 } else {
541 phpStringDelimiter[0] = '\0';
542 return beginning;
545 phpStringDelimiter[0] = ch;
546 i++;
548 for (j = i; j < lengthDoc && !isLineEnd(styler[j]); j++) {
549 if (!IsPhpWordChar(styler[j])) {
550 if (isSimpleString && (styler[j] == '\'') && isLineEnd(styler.SafeGetCharAt(j + 1))) {
551 isValidSimpleString = true;
552 j++;
553 break;
554 } else {
555 phpStringDelimiter[0] = '\0';
556 return beginning;
559 if (j - i < phpStringDelimiterSize - 2)
560 phpStringDelimiter[j-i+1] = styler[j];
561 else
562 i++;
564 if (isSimpleString && !isValidSimpleString) {
565 phpStringDelimiter[0] = '\0';
566 return beginning;
568 phpStringDelimiter[j-i+1 - (isSimpleString ? 1 : 0)] = '\0';
569 return j - 1;
572 static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
573 Accessor &styler, bool isXml) {
574 WordList &keywords = *keywordlists[0];
575 WordList &keywords2 = *keywordlists[1];
576 WordList &keywords3 = *keywordlists[2];
577 WordList &keywords4 = *keywordlists[3];
578 WordList &keywords5 = *keywordlists[4];
579 WordList &keywords6 = *keywordlists[5]; // SGML (DTD) keywords
581 // Lexer for HTML requires more lexical states (8 bits worth) than most lexers
582 styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
583 char prevWord[200];
584 prevWord[0] = '\0';
585 char phpStringDelimiter[200]; // PHP is not limited in length, we are
586 phpStringDelimiter[0] = '\0';
587 int StateToPrint = initStyle;
588 int state = stateForPrintState(StateToPrint);
589 char makoBlockType[200];
590 makoBlockType[0] = '\0';
591 char djangoBlockType[2];
592 djangoBlockType[0] = '\0';
594 // If inside a tag, it may be a script tag, so reread from the start to ensure any language tags are seen
595 if (InTagState(state)) {
596 while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
597 startPos--;
598 length++;
600 state = SCE_H_DEFAULT;
602 // String can be heredoc, must find a delimiter first. Reread from beginning of line containing the string, to get the correct lineState
603 if (isPHPStringState(state)) {
604 while (startPos > 0 && (isPHPStringState(state) || !isLineEnd(styler[startPos - 1]))) {
605 startPos--;
606 length++;
607 state = styler.StyleAt(startPos);
609 if (startPos == 0)
610 state = SCE_H_DEFAULT;
612 styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
614 int lineCurrent = styler.GetLine(startPos);
615 int lineState;
616 if (lineCurrent > 0) {
617 lineState = styler.GetLineState(lineCurrent-1);
618 } else {
619 // Default client and ASP scripting language is JavaScript
620 lineState = eScriptJS << 8;
622 // property asp.default.language
623 // Script in ASP code is initially assumed to be in JavaScript.
624 // To change this to VBScript set asp.default.language to 2. Python is 3.
625 lineState |= styler.GetPropertyInt("asp.default.language", eScriptJS) << 4;
627 script_mode inScriptType = script_mode((lineState >> 0) & 0x03); // 2 bits of scripting mode
628 bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
629 bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
630 bool tagDontFold = false; //some HTML tags should not be folded
631 script_type aspScript = script_type((lineState >> 4) & 0x0F); // 4 bits of script name
632 script_type clientScript = script_type((lineState >> 8) & 0x0F); // 4 bits of script name
633 int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
635 script_type scriptLanguage = ScriptOfState(state);
636 // If eNonHtmlScript coincides with SCE_H_COMMENT, assume eScriptComment
637 if (inScriptType == eNonHtmlScript && state == SCE_H_COMMENT) {
638 scriptLanguage = eScriptComment;
640 script_type beforeLanguage = ScriptOfState(beforePreProc);
642 // property fold.html
643 // Folding is turned on or off for HTML and XML files with this option.
644 // The fold option must also be on for folding to occur.
645 const bool foldHTML = styler.GetPropertyInt("fold.html", 0) != 0;
647 const bool fold = foldHTML && styler.GetPropertyInt("fold", 0);
649 // property fold.html.preprocessor
650 // Folding is turned on or off for scripts embedded in HTML files with this option.
651 // The default is on.
652 const bool foldHTMLPreprocessor = foldHTML && styler.GetPropertyInt("fold.html.preprocessor", 1);
654 const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
656 // property fold.hypertext.comment
657 // Allow folding for comments in scripts embedded in HTML.
658 // The default is off.
659 const bool foldComment = fold && styler.GetPropertyInt("fold.hypertext.comment", 0) != 0;
661 // property fold.hypertext.heredoc
662 // Allow folding for heredocs in scripts embedded in HTML.
663 // The default is off.
664 const bool foldHeredoc = fold && styler.GetPropertyInt("fold.hypertext.heredoc", 0) != 0;
666 // property html.tags.case.sensitive
667 // For XML and HTML, setting this property to 1 will make tags match in a case
668 // sensitive way which is the expected behaviour for XML and XHTML.
669 const bool caseSensitive = styler.GetPropertyInt("html.tags.case.sensitive", 0) != 0;
671 // property lexer.xml.allow.scripts
672 // Set to 0 to disable scripts in XML.
673 const bool allowScripts = styler.GetPropertyInt("lexer.xml.allow.scripts", 1) != 0;
675 // property lexer.html.mako
676 // Set to 1 to enable the mako template language.
677 const bool isMako = styler.GetPropertyInt("lexer.html.mako", 0) != 0;
679 // property lexer.html.django
680 // Set to 1 to enable the django template language.
681 const bool isDjango = styler.GetPropertyInt("lexer.html.django", 0) != 0;
683 const CharacterSet setHTMLWord(CharacterSet::setAlphaNum, ".-_:!#", 0x80, true);
684 const CharacterSet setTagContinue(CharacterSet::setAlphaNum, ".-_:!#[", 0x80, true);
685 const CharacterSet setAttributeContinue(CharacterSet::setAlphaNum, ".-_:!#/", 0x80, true);
687 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
688 int levelCurrent = levelPrev;
689 int visibleChars = 0;
690 int lineStartVisibleChars = 0;
692 int chPrev = ' ';
693 int ch = ' ';
694 int chPrevNonWhite = ' ';
695 // look back to set chPrevNonWhite properly for better regex colouring
696 if (scriptLanguage == eScriptJS && startPos > 0) {
697 int back = startPos;
698 int style = 0;
699 while (--back) {
700 style = styler.StyleAt(back);
701 if (style < SCE_HJ_DEFAULT || style > SCE_HJ_COMMENTDOC)
702 // includes SCE_HJ_COMMENT & SCE_HJ_COMMENTLINE
703 break;
705 if (style == SCE_HJ_SYMBOLS) {
706 chPrevNonWhite = static_cast<unsigned char>(styler.SafeGetCharAt(back));
710 styler.StartSegment(startPos);
711 const int lengthDoc = startPos + length;
712 for (int i = startPos; i < lengthDoc; i++) {
713 const int chPrev2 = chPrev;
714 chPrev = ch;
715 if (!IsASpace(ch) && state != SCE_HJ_COMMENT &&
716 state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
717 chPrevNonWhite = ch;
718 ch = static_cast<unsigned char>(styler[i]);
719 int chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
720 const int chNext2 = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2));
722 // Handle DBCS codepages
723 if (styler.IsLeadByte(static_cast<char>(ch))) {
724 chPrev = ' ';
725 i += 1;
726 continue;
729 if ((!IsASpace(ch) || !foldCompact) && fold)
730 visibleChars++;
731 if (!IsASpace(ch))
732 lineStartVisibleChars++;
734 // decide what is the current state to print (depending of the script tag)
735 StateToPrint = statePrintForState(state, inScriptType);
737 // handle script folding
738 if (fold) {
739 switch (scriptLanguage) {
740 case eScriptJS:
741 case eScriptPHP:
742 //not currently supported case eScriptVBS:
744 if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
745 //Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
746 //if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
747 if (ch == '#') {
748 int j = i + 1;
749 while ((j < lengthDoc) && IsASpaceOrTab(styler.SafeGetCharAt(j))) {
750 j++;
752 if (styler.Match(j, "region") || styler.Match(j, "if")) {
753 levelCurrent++;
754 } else if (styler.Match(j, "end")) {
755 levelCurrent--;
757 } else if ((ch == '{') || (ch == '}') || (foldComment && (ch == '/') && (chNext == '*'))) {
758 levelCurrent += (((ch == '{') || (ch == '/')) ? 1 : -1);
760 } else if (((state == SCE_HPHP_COMMENT) || (state == SCE_HJ_COMMENT)) && foldComment && (ch == '*') && (chNext == '/')) {
761 levelCurrent--;
763 break;
764 case eScriptPython:
765 if (state != SCE_HP_COMMENTLINE) {
766 if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
767 levelCurrent++;
768 } else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
769 // check if the number of tabs is lower than the level
770 int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
771 for (int j = 0; Findlevel > 0; j++) {
772 char chTmp = styler.SafeGetCharAt(i + j + 1);
773 if (chTmp == '\t') {
774 Findlevel -= 8;
775 } else if (chTmp == ' ') {
776 Findlevel--;
777 } else {
778 break;
782 if (Findlevel > 0) {
783 levelCurrent -= Findlevel / 8;
784 if (Findlevel % 8)
785 levelCurrent--;
789 break;
790 default:
791 break;
795 if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
796 // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
797 // Avoid triggering two times on Dos/Win
798 // New line -> record any line state onto /next/ line
799 if (fold) {
800 int lev = levelPrev;
801 if (visibleChars == 0)
802 lev |= SC_FOLDLEVELWHITEFLAG;
803 if ((levelCurrent > levelPrev) && (visibleChars > 0))
804 lev |= SC_FOLDLEVELHEADERFLAG;
806 styler.SetLevel(lineCurrent, lev);
807 visibleChars = 0;
808 levelPrev = levelCurrent;
810 styler.SetLineState(lineCurrent,
811 ((inScriptType & 0x03) << 0) |
812 ((tagOpened & 0x01) << 2) |
813 ((tagClosing & 0x01) << 3) |
814 ((aspScript & 0x0F) << 4) |
815 ((clientScript & 0x0F) << 8) |
816 ((beforePreProc & 0xFF) << 12));
817 lineCurrent++;
818 lineStartVisibleChars = 0;
821 // Allow falling through to mako handling code if newline is going to end a block
822 if (((ch == '\r' && chNext != '\n') || (ch == '\n')) &&
823 (!isMako || (0 != strcmp(makoBlockType, "%")))) {
826 // generic end of script processing
827 else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
828 // Check if it's the end of the script tag (or any other HTML tag)
829 switch (state) {
830 // in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
831 case SCE_H_DOUBLESTRING:
832 case SCE_H_SINGLESTRING:
833 case SCE_HJ_COMMENT:
834 case SCE_HJ_COMMENTDOC:
835 //case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
836 // the end of script marker from some JS interpreters.
837 case SCE_HB_COMMENTLINE:
838 case SCE_HBA_COMMENTLINE:
839 case SCE_HJ_DOUBLESTRING:
840 case SCE_HJ_SINGLESTRING:
841 case SCE_HJ_REGEX:
842 case SCE_HB_STRING:
843 case SCE_HBA_STRING:
844 case SCE_HP_STRING:
845 case SCE_HP_TRIPLE:
846 case SCE_HP_TRIPLEDOUBLE:
847 case SCE_HPHP_HSTRING:
848 case SCE_HPHP_SIMPLESTRING:
849 case SCE_HPHP_COMMENT:
850 case SCE_HPHP_COMMENTLINE:
851 break;
852 default :
853 // check if the closing tag is a script tag
854 if (const char *tag =
855 state == SCE_HJ_COMMENTLINE || isXml ? "script" :
856 state == SCE_H_COMMENT ? "comment" : 0) {
857 int j = i + 2;
858 int chr;
859 do {
860 chr = static_cast<int>(*tag++);
861 } while (chr != 0 && chr == MakeLowerCase(styler.SafeGetCharAt(j++)));
862 if (chr != 0) break;
864 // closing tag of the script (it's a closing HTML tag anyway)
865 styler.ColourTo(i - 1, StateToPrint);
866 state = SCE_H_TAGUNKNOWN;
867 inScriptType = eHtml;
868 scriptLanguage = eScriptNone;
869 clientScript = eScriptJS;
870 i += 2;
871 visibleChars += 2;
872 tagClosing = true;
873 continue;
877 /////////////////////////////////////
878 // handle the start of PHP pre-processor = Non-HTML
879 else if ((state != SCE_H_ASPAT) &&
880 !isPHPStringState(state) &&
881 (state != SCE_HPHP_COMMENT) &&
882 (state != SCE_HPHP_COMMENTLINE) &&
883 (ch == '<') &&
884 (chNext == '?') &&
885 !IsScriptCommentState(state)) {
886 beforeLanguage = scriptLanguage;
887 scriptLanguage = segIsScriptingIndicator(styler, i + 2, i + 6, eScriptPHP);
888 if (scriptLanguage != eScriptPHP && isStringState(state)) continue;
889 styler.ColourTo(i - 1, StateToPrint);
890 beforePreProc = state;
891 i++;
892 visibleChars++;
893 i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 6);
894 if (scriptLanguage == eScriptXML)
895 styler.ColourTo(i, SCE_H_XMLSTART);
896 else
897 styler.ColourTo(i, SCE_H_QUESTION);
898 state = StateForScript(scriptLanguage);
899 if (inScriptType == eNonHtmlScript)
900 inScriptType = eNonHtmlScriptPreProc;
901 else
902 inScriptType = eNonHtmlPreProc;
903 // Fold whole script, but not if the XML first tag (all XML-like tags in this case)
904 if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
905 levelCurrent++;
907 // should be better
908 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
909 continue;
912 // handle the start Mako template Python code
913 else if (isMako && scriptLanguage == eScriptNone && ((ch == '<' && chNext == '%') ||
914 (lineStartVisibleChars == 1 && ch == '%') ||
915 (ch == '$' && chNext == '{') ||
916 (ch == '<' && chNext == '/' && chNext2 == '%'))) {
917 if (ch == '%')
918 strcpy(makoBlockType, "%");
919 else if (ch == '$')
920 strcpy(makoBlockType, "{");
921 else if (chNext == '/')
922 GetNextWord(styler, i+3, makoBlockType, sizeof(makoBlockType));
923 else
924 GetNextWord(styler, i+2, makoBlockType, sizeof(makoBlockType));
925 styler.ColourTo(i - 1, StateToPrint);
926 beforePreProc = state;
927 if (inScriptType == eNonHtmlScript)
928 inScriptType = eNonHtmlScriptPreProc;
929 else
930 inScriptType = eNonHtmlPreProc;
932 if (chNext == '/') {
933 i += 2;
934 visibleChars += 2;
935 } else if (ch != '%') {
936 i++;
937 visibleChars++;
939 state = SCE_HP_START;
940 scriptLanguage = eScriptPython;
941 styler.ColourTo(i, SCE_H_ASP);
942 if (foldHTMLPreprocessor && ch == '<')
943 levelCurrent++;
945 if (ch != '%' && ch != '$') {
946 i += strlen(makoBlockType);
947 visibleChars += strlen(makoBlockType);
948 if (keywords4.InList(makoBlockType))
949 styler.ColourTo(i, SCE_HP_WORD);
950 else
951 styler.ColourTo(i, SCE_H_TAGUNKNOWN);
954 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
955 continue;
958 // handle the start/end of Django comment
959 else if (isDjango && state != SCE_H_COMMENT && (ch == '{' && chNext == '#')) {
960 styler.ColourTo(i - 1, StateToPrint);
961 beforePreProc = state;
962 beforeLanguage = scriptLanguage;
963 if (inScriptType == eNonHtmlScript)
964 inScriptType = eNonHtmlScriptPreProc;
965 else
966 inScriptType = eNonHtmlPreProc;
967 i += 1;
968 visibleChars += 1;
969 scriptLanguage = eScriptComment;
970 state = SCE_H_COMMENT;
971 styler.ColourTo(i, SCE_H_ASP);
972 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
973 continue;
974 } else if (isDjango && state == SCE_H_COMMENT && (ch == '#' && chNext == '}')) {
975 styler.ColourTo(i - 1, StateToPrint);
976 i += 1;
977 visibleChars += 1;
978 styler.ColourTo(i, SCE_H_ASP);
979 state = beforePreProc;
980 if (inScriptType == eNonHtmlScriptPreProc)
981 inScriptType = eNonHtmlScript;
982 else
983 inScriptType = eHtml;
984 scriptLanguage = beforeLanguage;
985 continue;
988 // handle the start Django template code
989 else if (isDjango && scriptLanguage != eScriptPython && (ch == '{' && (chNext == '%' || chNext == '{'))) {
990 if (chNext == '%')
991 strcpy(djangoBlockType, "%");
992 else
993 strcpy(djangoBlockType, "{");
994 styler.ColourTo(i - 1, StateToPrint);
995 beforePreProc = state;
996 if (inScriptType == eNonHtmlScript)
997 inScriptType = eNonHtmlScriptPreProc;
998 else
999 inScriptType = eNonHtmlPreProc;
1001 i += 1;
1002 visibleChars += 1;
1003 state = SCE_HP_START;
1004 beforeLanguage = scriptLanguage;
1005 scriptLanguage = eScriptPython;
1006 styler.ColourTo(i, SCE_H_ASP);
1008 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1009 continue;
1012 // handle the start of ASP pre-processor = Non-HTML
1013 else if (!isMako && !isDjango && !isCommentASPState(state) && (ch == '<') && (chNext == '%') && !isPHPStringState(state)) {
1014 styler.ColourTo(i - 1, StateToPrint);
1015 beforePreProc = state;
1016 if (inScriptType == eNonHtmlScript)
1017 inScriptType = eNonHtmlScriptPreProc;
1018 else
1019 inScriptType = eNonHtmlPreProc;
1021 if (chNext2 == '@') {
1022 i += 2; // place as if it was the second next char treated
1023 visibleChars += 2;
1024 state = SCE_H_ASPAT;
1025 } else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
1026 styler.ColourTo(i + 3, SCE_H_ASP);
1027 state = SCE_H_XCCOMMENT;
1028 scriptLanguage = eScriptVBS;
1029 continue;
1030 } else {
1031 if (chNext2 == '=') {
1032 i += 2; // place as if it was the second next char treated
1033 visibleChars += 2;
1034 } else {
1035 i++; // place as if it was the next char treated
1036 visibleChars++;
1039 state = StateForScript(aspScript);
1041 scriptLanguage = eScriptVBS;
1042 styler.ColourTo(i, SCE_H_ASP);
1043 // fold whole script
1044 if (foldHTMLPreprocessor)
1045 levelCurrent++;
1046 // should be better
1047 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1048 continue;
1051 /////////////////////////////////////
1052 // handle the start of SGML language (DTD)
1053 else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
1054 (chPrev == '<') &&
1055 (ch == '!') &&
1056 (StateToPrint != SCE_H_CDATA) &&
1057 (!IsCommentState(StateToPrint)) &&
1058 (!IsScriptCommentState(StateToPrint))) {
1059 beforePreProc = state;
1060 styler.ColourTo(i - 2, StateToPrint);
1061 if ((chNext == '-') && (chNext2 == '-')) {
1062 state = SCE_H_COMMENT; // wait for a pending command
1063 styler.ColourTo(i + 2, SCE_H_COMMENT);
1064 i += 2; // follow styling after the --
1065 } else if (isWordCdata(i + 1, i + 7, styler)) {
1066 state = SCE_H_CDATA;
1067 } else {
1068 styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
1069 scriptLanguage = eScriptSGML;
1070 state = SCE_H_SGML_COMMAND; // wait for a pending command
1072 // fold whole tag (-- when closing the tag)
1073 if (foldHTMLPreprocessor || (state == SCE_H_COMMENT))
1074 levelCurrent++;
1075 continue;
1078 // handle the end of Mako Python code
1079 else if (isMako &&
1080 ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1081 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1082 isMakoBlockEnd(ch, chNext, makoBlockType)) {
1083 if (state == SCE_H_ASPAT) {
1084 aspScript = segIsScriptingIndicator(styler,
1085 styler.GetStartSegment(), i - 1, aspScript);
1087 if (state == SCE_HP_WORD) {
1088 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1089 } else {
1090 styler.ColourTo(i - 1, StateToPrint);
1092 if (0 != strcmp(makoBlockType, "%") && (0 != strcmp(makoBlockType, "{")) && ch != '>') {
1093 i++;
1094 visibleChars++;
1096 if (0 != strcmp(makoBlockType, "%")) {
1097 styler.ColourTo(i, SCE_H_ASP);
1099 state = beforePreProc;
1100 if (inScriptType == eNonHtmlScriptPreProc)
1101 inScriptType = eNonHtmlScript;
1102 else
1103 inScriptType = eHtml;
1104 if (foldHTMLPreprocessor && ch != '\n' && ch != '\r') {
1105 levelCurrent--;
1107 scriptLanguage = eScriptNone;
1108 continue;
1111 // handle the end of Django template code
1112 else if (isDjango &&
1113 ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1114 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1115 isDjangoBlockEnd(ch, chNext, djangoBlockType)) {
1116 if (state == SCE_H_ASPAT) {
1117 aspScript = segIsScriptingIndicator(styler,
1118 styler.GetStartSegment(), i - 1, aspScript);
1120 if (state == SCE_HP_WORD) {
1121 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1122 } else {
1123 styler.ColourTo(i - 1, StateToPrint);
1125 i += 1;
1126 visibleChars += 1;
1127 styler.ColourTo(i, SCE_H_ASP);
1128 state = beforePreProc;
1129 if (inScriptType == eNonHtmlScriptPreProc)
1130 inScriptType = eNonHtmlScript;
1131 else
1132 inScriptType = eHtml;
1133 scriptLanguage = beforeLanguage;
1134 continue;
1137 // handle the end of a pre-processor = Non-HTML
1138 else if ((!isMako && !isDjango && ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1139 (((scriptLanguage != eScriptNone) && stateAllowsTermination(state))) &&
1140 (((ch == '%') || (ch == '?')) && (chNext == '>'))) ||
1141 ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
1142 if (state == SCE_H_ASPAT) {
1143 aspScript = segIsScriptingIndicator(styler,
1144 styler.GetStartSegment(), i - 1, aspScript);
1146 // Bounce out of any ASP mode
1147 switch (state) {
1148 case SCE_HJ_WORD:
1149 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1150 break;
1151 case SCE_HB_WORD:
1152 classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1153 break;
1154 case SCE_HP_WORD:
1155 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1156 break;
1157 case SCE_HPHP_WORD:
1158 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1159 break;
1160 case SCE_H_XCCOMMENT:
1161 styler.ColourTo(i - 1, state);
1162 break;
1163 default :
1164 styler.ColourTo(i - 1, StateToPrint);
1165 break;
1167 if (scriptLanguage != eScriptSGML) {
1168 i++;
1169 visibleChars++;
1171 if (ch == '%')
1172 styler.ColourTo(i, SCE_H_ASP);
1173 else if (scriptLanguage == eScriptXML)
1174 styler.ColourTo(i, SCE_H_XMLEND);
1175 else if (scriptLanguage == eScriptSGML)
1176 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1177 else
1178 styler.ColourTo(i, SCE_H_QUESTION);
1179 state = beforePreProc;
1180 if (inScriptType == eNonHtmlScriptPreProc)
1181 inScriptType = eNonHtmlScript;
1182 else
1183 inScriptType = eHtml;
1184 // Unfold all scripting languages, except for XML tag
1185 if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
1186 levelCurrent--;
1188 scriptLanguage = beforeLanguage;
1189 continue;
1191 /////////////////////////////////////
1193 switch (state) {
1194 case SCE_H_DEFAULT:
1195 if (ch == '<') {
1196 // in HTML, fold on tag open and unfold on tag close
1197 tagOpened = true;
1198 tagClosing = (chNext == '/');
1199 styler.ColourTo(i - 1, StateToPrint);
1200 if (chNext != '!')
1201 state = SCE_H_TAGUNKNOWN;
1202 } else if (ch == '&') {
1203 styler.ColourTo(i - 1, SCE_H_DEFAULT);
1204 state = SCE_H_ENTITY;
1206 break;
1207 case SCE_H_SGML_DEFAULT:
1208 case SCE_H_SGML_BLOCK_DEFAULT:
1209 // if (scriptLanguage == eScriptSGMLblock)
1210 // StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
1212 if (ch == '\"') {
1213 styler.ColourTo(i - 1, StateToPrint);
1214 state = SCE_H_SGML_DOUBLESTRING;
1215 } else if (ch == '\'') {
1216 styler.ColourTo(i - 1, StateToPrint);
1217 state = SCE_H_SGML_SIMPLESTRING;
1218 } else if ((ch == '-') && (chPrev == '-')) {
1219 if (static_cast<int>(styler.GetStartSegment()) <= (i - 2)) {
1220 styler.ColourTo(i - 2, StateToPrint);
1222 state = SCE_H_SGML_COMMENT;
1223 } else if (isascii(ch) && isalpha(ch) && (chPrev == '%')) {
1224 styler.ColourTo(i - 2, StateToPrint);
1225 state = SCE_H_SGML_ENTITY;
1226 } else if (ch == '#') {
1227 styler.ColourTo(i - 1, StateToPrint);
1228 state = SCE_H_SGML_SPECIAL;
1229 } else if (ch == '[') {
1230 styler.ColourTo(i - 1, StateToPrint);
1231 scriptLanguage = eScriptSGMLblock;
1232 state = SCE_H_SGML_BLOCK_DEFAULT;
1233 } else if (ch == ']') {
1234 if (scriptLanguage == eScriptSGMLblock) {
1235 styler.ColourTo(i, StateToPrint);
1236 scriptLanguage = eScriptSGML;
1237 } else {
1238 styler.ColourTo(i - 1, StateToPrint);
1239 styler.ColourTo(i, SCE_H_SGML_ERROR);
1241 state = SCE_H_SGML_DEFAULT;
1242 } else if (scriptLanguage == eScriptSGMLblock) {
1243 if ((ch == '!') && (chPrev == '<')) {
1244 styler.ColourTo(i - 2, StateToPrint);
1245 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1246 state = SCE_H_SGML_COMMAND;
1247 } else if (ch == '>') {
1248 styler.ColourTo(i - 1, StateToPrint);
1249 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1252 break;
1253 case SCE_H_SGML_COMMAND:
1254 if ((ch == '-') && (chPrev == '-')) {
1255 styler.ColourTo(i - 2, StateToPrint);
1256 state = SCE_H_SGML_COMMENT;
1257 } else if (!issgmlwordchar(ch)) {
1258 if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
1259 styler.ColourTo(i - 1, StateToPrint);
1260 state = SCE_H_SGML_1ST_PARAM;
1261 } else {
1262 state = SCE_H_SGML_ERROR;
1265 break;
1266 case SCE_H_SGML_1ST_PARAM:
1267 // wait for the beginning of the word
1268 if ((ch == '-') && (chPrev == '-')) {
1269 if (scriptLanguage == eScriptSGMLblock) {
1270 styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
1271 } else {
1272 styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
1274 state = SCE_H_SGML_1ST_PARAM_COMMENT;
1275 } else if (issgmlwordchar(ch)) {
1276 if (scriptLanguage == eScriptSGMLblock) {
1277 styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
1278 } else {
1279 styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
1281 // find the length of the word
1282 int size = 1;
1283 while (setHTMLWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(i + size))))
1284 size++;
1285 styler.ColourTo(i + size - 1, StateToPrint);
1286 i += size - 1;
1287 visibleChars += size - 1;
1288 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1289 if (scriptLanguage == eScriptSGMLblock) {
1290 state = SCE_H_SGML_BLOCK_DEFAULT;
1291 } else {
1292 state = SCE_H_SGML_DEFAULT;
1294 continue;
1296 break;
1297 case SCE_H_SGML_ERROR:
1298 if ((ch == '-') && (chPrev == '-')) {
1299 styler.ColourTo(i - 2, StateToPrint);
1300 state = SCE_H_SGML_COMMENT;
1302 case SCE_H_SGML_DOUBLESTRING:
1303 if (ch == '\"') {
1304 styler.ColourTo(i, StateToPrint);
1305 state = SCE_H_SGML_DEFAULT;
1307 break;
1308 case SCE_H_SGML_SIMPLESTRING:
1309 if (ch == '\'') {
1310 styler.ColourTo(i, StateToPrint);
1311 state = SCE_H_SGML_DEFAULT;
1313 break;
1314 case SCE_H_SGML_COMMENT:
1315 if ((ch == '-') && (chPrev == '-')) {
1316 styler.ColourTo(i, StateToPrint);
1317 state = SCE_H_SGML_DEFAULT;
1319 break;
1320 case SCE_H_CDATA:
1321 if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
1322 styler.ColourTo(i, StateToPrint);
1323 state = SCE_H_DEFAULT;
1324 levelCurrent--;
1326 break;
1327 case SCE_H_COMMENT:
1328 if ((scriptLanguage != eScriptComment) && (chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
1329 styler.ColourTo(i, StateToPrint);
1330 state = SCE_H_DEFAULT;
1331 levelCurrent--;
1333 break;
1334 case SCE_H_SGML_1ST_PARAM_COMMENT:
1335 if ((ch == '-') && (chPrev == '-')) {
1336 styler.ColourTo(i, SCE_H_SGML_COMMENT);
1337 state = SCE_H_SGML_1ST_PARAM;
1339 break;
1340 case SCE_H_SGML_SPECIAL:
1341 if (!(isascii(ch) && isupper(ch))) {
1342 styler.ColourTo(i - 1, StateToPrint);
1343 if (isalnum(ch)) {
1344 state = SCE_H_SGML_ERROR;
1345 } else {
1346 state = SCE_H_SGML_DEFAULT;
1349 break;
1350 case SCE_H_SGML_ENTITY:
1351 if (ch == ';') {
1352 styler.ColourTo(i, StateToPrint);
1353 state = SCE_H_SGML_DEFAULT;
1354 } else if (!(isascii(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
1355 styler.ColourTo(i, SCE_H_SGML_ERROR);
1356 state = SCE_H_SGML_DEFAULT;
1358 break;
1359 case SCE_H_ENTITY:
1360 if (ch == ';') {
1361 styler.ColourTo(i, StateToPrint);
1362 state = SCE_H_DEFAULT;
1364 if (ch != '#' && !(isascii(ch) && isalnum(ch)) // Should check that '#' follows '&', but it is unlikely anyway...
1365 && ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
1366 if (!isascii(ch)) // Possibly start of a multibyte character so don't allow this byte to be in entity style
1367 styler.ColourTo(i-1, SCE_H_TAGUNKNOWN);
1368 else
1369 styler.ColourTo(i, SCE_H_TAGUNKNOWN);
1370 state = SCE_H_DEFAULT;
1372 break;
1373 case SCE_H_TAGUNKNOWN:
1374 if (!setTagContinue.Contains(ch) && !((ch == '/') && (chPrev == '<'))) {
1375 int eClass = classifyTagHTML(styler.GetStartSegment(),
1376 i - 1, keywords, styler, tagDontFold, caseSensitive, isXml, allowScripts);
1377 if (eClass == SCE_H_SCRIPT || eClass == SCE_H_COMMENT) {
1378 if (!tagClosing) {
1379 inScriptType = eNonHtmlScript;
1380 scriptLanguage = eClass == SCE_H_SCRIPT ? clientScript : eScriptComment;
1381 } else {
1382 scriptLanguage = eScriptNone;
1384 eClass = SCE_H_TAG;
1386 if (ch == '>') {
1387 styler.ColourTo(i, eClass);
1388 if (inScriptType == eNonHtmlScript) {
1389 state = StateForScript(scriptLanguage);
1390 } else {
1391 state = SCE_H_DEFAULT;
1393 tagOpened = false;
1394 if (!tagDontFold) {
1395 if (tagClosing) {
1396 levelCurrent--;
1397 } else {
1398 levelCurrent++;
1401 tagClosing = false;
1402 } else if (ch == '/' && chNext == '>') {
1403 if (eClass == SCE_H_TAGUNKNOWN) {
1404 styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
1405 } else {
1406 styler.ColourTo(i - 1, StateToPrint);
1407 styler.ColourTo(i + 1, SCE_H_TAGEND);
1409 i++;
1410 ch = chNext;
1411 state = SCE_H_DEFAULT;
1412 tagOpened = false;
1413 } else {
1414 if (eClass != SCE_H_TAGUNKNOWN) {
1415 if (eClass == SCE_H_SGML_DEFAULT) {
1416 state = SCE_H_SGML_DEFAULT;
1417 } else {
1418 state = SCE_H_OTHER;
1423 break;
1424 case SCE_H_ATTRIBUTE:
1425 if (!setAttributeContinue.Contains(ch)) {
1426 if (inScriptType == eNonHtmlScript) {
1427 int scriptLanguagePrev = scriptLanguage;
1428 clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
1429 scriptLanguage = clientScript;
1430 if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
1431 inScriptType = eHtml;
1433 classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
1434 if (ch == '>') {
1435 styler.ColourTo(i, SCE_H_TAG);
1436 if (inScriptType == eNonHtmlScript) {
1437 state = StateForScript(scriptLanguage);
1438 } else {
1439 state = SCE_H_DEFAULT;
1441 tagOpened = false;
1442 if (!tagDontFold) {
1443 if (tagClosing) {
1444 levelCurrent--;
1445 } else {
1446 levelCurrent++;
1449 tagClosing = false;
1450 } else if (ch == '=') {
1451 styler.ColourTo(i, SCE_H_OTHER);
1452 state = SCE_H_VALUE;
1453 } else {
1454 state = SCE_H_OTHER;
1457 break;
1458 case SCE_H_OTHER:
1459 if (ch == '>') {
1460 styler.ColourTo(i - 1, StateToPrint);
1461 styler.ColourTo(i, SCE_H_TAG);
1462 if (inScriptType == eNonHtmlScript) {
1463 state = StateForScript(scriptLanguage);
1464 } else {
1465 state = SCE_H_DEFAULT;
1467 tagOpened = false;
1468 if (!tagDontFold) {
1469 if (tagClosing) {
1470 levelCurrent--;
1471 } else {
1472 levelCurrent++;
1475 tagClosing = false;
1476 } else if (ch == '\"') {
1477 styler.ColourTo(i - 1, StateToPrint);
1478 state = SCE_H_DOUBLESTRING;
1479 } else if (ch == '\'') {
1480 styler.ColourTo(i - 1, StateToPrint);
1481 state = SCE_H_SINGLESTRING;
1482 } else if (ch == '=') {
1483 styler.ColourTo(i, StateToPrint);
1484 state = SCE_H_VALUE;
1485 } else if (ch == '/' && chNext == '>') {
1486 styler.ColourTo(i - 1, StateToPrint);
1487 styler.ColourTo(i + 1, SCE_H_TAGEND);
1488 i++;
1489 ch = chNext;
1490 state = SCE_H_DEFAULT;
1491 tagOpened = false;
1492 } else if (ch == '?' && chNext == '>') {
1493 styler.ColourTo(i - 1, StateToPrint);
1494 styler.ColourTo(i + 1, SCE_H_XMLEND);
1495 i++;
1496 ch = chNext;
1497 state = SCE_H_DEFAULT;
1498 } else if (setHTMLWord.Contains(ch)) {
1499 styler.ColourTo(i - 1, StateToPrint);
1500 state = SCE_H_ATTRIBUTE;
1502 break;
1503 case SCE_H_DOUBLESTRING:
1504 if (ch == '\"') {
1505 if (inScriptType == eNonHtmlScript) {
1506 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1508 styler.ColourTo(i, SCE_H_DOUBLESTRING);
1509 state = SCE_H_OTHER;
1511 break;
1512 case SCE_H_SINGLESTRING:
1513 if (ch == '\'') {
1514 if (inScriptType == eNonHtmlScript) {
1515 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1517 styler.ColourTo(i, SCE_H_SINGLESTRING);
1518 state = SCE_H_OTHER;
1520 break;
1521 case SCE_H_VALUE:
1522 if (!setHTMLWord.Contains(ch)) {
1523 if (ch == '\"' && chPrev == '=') {
1524 // Should really test for being first character
1525 state = SCE_H_DOUBLESTRING;
1526 } else if (ch == '\'' && chPrev == '=') {
1527 state = SCE_H_SINGLESTRING;
1528 } else {
1529 if (IsNumber(styler.GetStartSegment(), styler)) {
1530 styler.ColourTo(i - 1, SCE_H_NUMBER);
1531 } else {
1532 styler.ColourTo(i - 1, StateToPrint);
1534 if (ch == '>') {
1535 styler.ColourTo(i, SCE_H_TAG);
1536 if (inScriptType == eNonHtmlScript) {
1537 state = StateForScript(scriptLanguage);
1538 } else {
1539 state = SCE_H_DEFAULT;
1541 tagOpened = false;
1542 if (!tagDontFold) {
1543 if (tagClosing) {
1544 levelCurrent--;
1545 } else {
1546 levelCurrent++;
1549 tagClosing = false;
1550 } else {
1551 state = SCE_H_OTHER;
1555 break;
1556 case SCE_HJ_DEFAULT:
1557 case SCE_HJ_START:
1558 case SCE_HJ_SYMBOLS:
1559 if (IsAWordStart(ch)) {
1560 styler.ColourTo(i - 1, StateToPrint);
1561 state = SCE_HJ_WORD;
1562 } else if (ch == '/' && chNext == '*') {
1563 styler.ColourTo(i - 1, StateToPrint);
1564 if (chNext2 == '*')
1565 state = SCE_HJ_COMMENTDOC;
1566 else
1567 state = SCE_HJ_COMMENT;
1568 } else if (ch == '/' && chNext == '/') {
1569 styler.ColourTo(i - 1, StateToPrint);
1570 state = SCE_HJ_COMMENTLINE;
1571 } else if (ch == '/' && isOKBeforeRE(chPrevNonWhite)) {
1572 styler.ColourTo(i - 1, StateToPrint);
1573 state = SCE_HJ_REGEX;
1574 } else if (ch == '\"') {
1575 styler.ColourTo(i - 1, StateToPrint);
1576 state = SCE_HJ_DOUBLESTRING;
1577 } else if (ch == '\'') {
1578 styler.ColourTo(i - 1, StateToPrint);
1579 state = SCE_HJ_SINGLESTRING;
1580 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1581 styler.SafeGetCharAt(i + 3) == '-') {
1582 styler.ColourTo(i - 1, StateToPrint);
1583 state = SCE_HJ_COMMENTLINE;
1584 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1585 styler.ColourTo(i - 1, StateToPrint);
1586 state = SCE_HJ_COMMENTLINE;
1587 i += 2;
1588 } else if (IsOperator(ch)) {
1589 styler.ColourTo(i - 1, StateToPrint);
1590 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1591 state = SCE_HJ_DEFAULT;
1592 } else if ((ch == ' ') || (ch == '\t')) {
1593 if (state == SCE_HJ_START) {
1594 styler.ColourTo(i - 1, StateToPrint);
1595 state = SCE_HJ_DEFAULT;
1598 break;
1599 case SCE_HJ_WORD:
1600 if (!IsAWordChar(ch)) {
1601 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1602 //styler.ColourTo(i - 1, eHTJSKeyword);
1603 state = SCE_HJ_DEFAULT;
1604 if (ch == '/' && chNext == '*') {
1605 if (chNext2 == '*')
1606 state = SCE_HJ_COMMENTDOC;
1607 else
1608 state = SCE_HJ_COMMENT;
1609 } else if (ch == '/' && chNext == '/') {
1610 state = SCE_HJ_COMMENTLINE;
1611 } else if (ch == '\"') {
1612 state = SCE_HJ_DOUBLESTRING;
1613 } else if (ch == '\'') {
1614 state = SCE_HJ_SINGLESTRING;
1615 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1616 styler.ColourTo(i - 1, StateToPrint);
1617 state = SCE_HJ_COMMENTLINE;
1618 i += 2;
1619 } else if (IsOperator(ch)) {
1620 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1621 state = SCE_HJ_DEFAULT;
1624 break;
1625 case SCE_HJ_COMMENT:
1626 case SCE_HJ_COMMENTDOC:
1627 if (ch == '/' && chPrev == '*') {
1628 styler.ColourTo(i, StateToPrint);
1629 state = SCE_HJ_DEFAULT;
1630 ch = ' ';
1632 break;
1633 case SCE_HJ_COMMENTLINE:
1634 if (ch == '\r' || ch == '\n') {
1635 styler.ColourTo(i - 1, statePrintForState(SCE_HJ_COMMENTLINE, inScriptType));
1636 state = SCE_HJ_DEFAULT;
1637 ch = ' ';
1639 break;
1640 case SCE_HJ_DOUBLESTRING:
1641 if (ch == '\\') {
1642 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1643 i++;
1645 } else if (ch == '\"') {
1646 styler.ColourTo(i, statePrintForState(SCE_HJ_DOUBLESTRING, inScriptType));
1647 state = SCE_HJ_DEFAULT;
1648 } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1649 styler.ColourTo(i - 1, StateToPrint);
1650 state = SCE_HJ_COMMENTLINE;
1651 i += 2;
1652 } else if (isLineEnd(ch)) {
1653 styler.ColourTo(i - 1, StateToPrint);
1654 state = SCE_HJ_STRINGEOL;
1656 break;
1657 case SCE_HJ_SINGLESTRING:
1658 if (ch == '\\') {
1659 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1660 i++;
1662 } else if (ch == '\'') {
1663 styler.ColourTo(i, statePrintForState(SCE_HJ_SINGLESTRING, inScriptType));
1664 state = SCE_HJ_DEFAULT;
1665 } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1666 styler.ColourTo(i - 1, StateToPrint);
1667 state = SCE_HJ_COMMENTLINE;
1668 i += 2;
1669 } else if (isLineEnd(ch)) {
1670 styler.ColourTo(i - 1, StateToPrint);
1671 if (chPrev != '\\' && (chPrev2 != '\\' || chPrev != '\r' || ch != '\n')) {
1672 state = SCE_HJ_STRINGEOL;
1675 break;
1676 case SCE_HJ_STRINGEOL:
1677 if (!isLineEnd(ch)) {
1678 styler.ColourTo(i - 1, StateToPrint);
1679 state = SCE_HJ_DEFAULT;
1680 } else if (!isLineEnd(chNext)) {
1681 styler.ColourTo(i, StateToPrint);
1682 state = SCE_HJ_DEFAULT;
1684 break;
1685 case SCE_HJ_REGEX:
1686 if (ch == '\r' || ch == '\n' || ch == '/') {
1687 if (ch == '/') {
1688 while (isascii(chNext) && islower(chNext)) { // gobble regex flags
1689 i++;
1690 ch = chNext;
1691 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1694 styler.ColourTo(i, StateToPrint);
1695 state = SCE_HJ_DEFAULT;
1696 } else if (ch == '\\') {
1697 // Gobble up the quoted character
1698 if (chNext == '\\' || chNext == '/') {
1699 i++;
1700 ch = chNext;
1701 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1704 break;
1705 case SCE_HB_DEFAULT:
1706 case SCE_HB_START:
1707 if (IsAWordStart(ch)) {
1708 styler.ColourTo(i - 1, StateToPrint);
1709 state = SCE_HB_WORD;
1710 } else if (ch == '\'') {
1711 styler.ColourTo(i - 1, StateToPrint);
1712 state = SCE_HB_COMMENTLINE;
1713 } else if (ch == '\"') {
1714 styler.ColourTo(i - 1, StateToPrint);
1715 state = SCE_HB_STRING;
1716 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1717 styler.SafeGetCharAt(i + 3) == '-') {
1718 styler.ColourTo(i - 1, StateToPrint);
1719 state = SCE_HB_COMMENTLINE;
1720 } else if (IsOperator(ch)) {
1721 styler.ColourTo(i - 1, StateToPrint);
1722 styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1723 state = SCE_HB_DEFAULT;
1724 } else if ((ch == ' ') || (ch == '\t')) {
1725 if (state == SCE_HB_START) {
1726 styler.ColourTo(i - 1, StateToPrint);
1727 state = SCE_HB_DEFAULT;
1730 break;
1731 case SCE_HB_WORD:
1732 if (!IsAWordChar(ch)) {
1733 state = classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1734 if (state == SCE_HB_DEFAULT) {
1735 if (ch == '\"') {
1736 state = SCE_HB_STRING;
1737 } else if (ch == '\'') {
1738 state = SCE_HB_COMMENTLINE;
1739 } else if (IsOperator(ch)) {
1740 styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1741 state = SCE_HB_DEFAULT;
1745 break;
1746 case SCE_HB_STRING:
1747 if (ch == '\"') {
1748 styler.ColourTo(i, StateToPrint);
1749 state = SCE_HB_DEFAULT;
1750 } else if (ch == '\r' || ch == '\n') {
1751 styler.ColourTo(i - 1, StateToPrint);
1752 state = SCE_HB_STRINGEOL;
1754 break;
1755 case SCE_HB_COMMENTLINE:
1756 if (ch == '\r' || ch == '\n') {
1757 styler.ColourTo(i - 1, StateToPrint);
1758 state = SCE_HB_DEFAULT;
1760 break;
1761 case SCE_HB_STRINGEOL:
1762 if (!isLineEnd(ch)) {
1763 styler.ColourTo(i - 1, StateToPrint);
1764 state = SCE_HB_DEFAULT;
1765 } else if (!isLineEnd(chNext)) {
1766 styler.ColourTo(i, StateToPrint);
1767 state = SCE_HB_DEFAULT;
1769 break;
1770 case SCE_HP_DEFAULT:
1771 case SCE_HP_START:
1772 if (IsAWordStart(ch)) {
1773 styler.ColourTo(i - 1, StateToPrint);
1774 state = SCE_HP_WORD;
1775 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1776 styler.SafeGetCharAt(i + 3) == '-') {
1777 styler.ColourTo(i - 1, StateToPrint);
1778 state = SCE_HP_COMMENTLINE;
1779 } else if (ch == '#') {
1780 styler.ColourTo(i - 1, StateToPrint);
1781 state = SCE_HP_COMMENTLINE;
1782 } else if (ch == '\"') {
1783 styler.ColourTo(i - 1, StateToPrint);
1784 if (chNext == '\"' && chNext2 == '\"') {
1785 i += 2;
1786 state = SCE_HP_TRIPLEDOUBLE;
1787 ch = ' ';
1788 chPrev = ' ';
1789 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1790 } else {
1791 // state = statePrintForState(SCE_HP_STRING,inScriptType);
1792 state = SCE_HP_STRING;
1794 } else if (ch == '\'') {
1795 styler.ColourTo(i - 1, StateToPrint);
1796 if (chNext == '\'' && chNext2 == '\'') {
1797 i += 2;
1798 state = SCE_HP_TRIPLE;
1799 ch = ' ';
1800 chPrev = ' ';
1801 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1802 } else {
1803 state = SCE_HP_CHARACTER;
1805 } else if (IsOperator(ch)) {
1806 styler.ColourTo(i - 1, StateToPrint);
1807 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1808 } else if ((ch == ' ') || (ch == '\t')) {
1809 if (state == SCE_HP_START) {
1810 styler.ColourTo(i - 1, StateToPrint);
1811 state = SCE_HP_DEFAULT;
1814 break;
1815 case SCE_HP_WORD:
1816 if (!IsAWordChar(ch)) {
1817 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1818 state = SCE_HP_DEFAULT;
1819 if (ch == '#') {
1820 state = SCE_HP_COMMENTLINE;
1821 } else if (ch == '\"') {
1822 if (chNext == '\"' && chNext2 == '\"') {
1823 i += 2;
1824 state = SCE_HP_TRIPLEDOUBLE;
1825 ch = ' ';
1826 chPrev = ' ';
1827 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1828 } else {
1829 state = SCE_HP_STRING;
1831 } else if (ch == '\'') {
1832 if (chNext == '\'' && chNext2 == '\'') {
1833 i += 2;
1834 state = SCE_HP_TRIPLE;
1835 ch = ' ';
1836 chPrev = ' ';
1837 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1838 } else {
1839 state = SCE_HP_CHARACTER;
1841 } else if (IsOperator(ch)) {
1842 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1845 break;
1846 case SCE_HP_COMMENTLINE:
1847 if (ch == '\r' || ch == '\n') {
1848 styler.ColourTo(i - 1, StateToPrint);
1849 state = SCE_HP_DEFAULT;
1851 break;
1852 case SCE_HP_STRING:
1853 if (ch == '\\') {
1854 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1855 i++;
1856 ch = chNext;
1857 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1859 } else if (ch == '\"') {
1860 styler.ColourTo(i, StateToPrint);
1861 state = SCE_HP_DEFAULT;
1863 break;
1864 case SCE_HP_CHARACTER:
1865 if (ch == '\\') {
1866 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1867 i++;
1868 ch = chNext;
1869 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1871 } else if (ch == '\'') {
1872 styler.ColourTo(i, StateToPrint);
1873 state = SCE_HP_DEFAULT;
1875 break;
1876 case SCE_HP_TRIPLE:
1877 if (ch == '\'' && chPrev == '\'' && chPrev2 == '\'') {
1878 styler.ColourTo(i, StateToPrint);
1879 state = SCE_HP_DEFAULT;
1881 break;
1882 case SCE_HP_TRIPLEDOUBLE:
1883 if (ch == '\"' && chPrev == '\"' && chPrev2 == '\"') {
1884 styler.ColourTo(i, StateToPrint);
1885 state = SCE_HP_DEFAULT;
1887 break;
1888 ///////////// start - PHP state handling
1889 case SCE_HPHP_WORD:
1890 if (!IsAWordChar(ch)) {
1891 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1892 if (ch == '/' && chNext == '*') {
1893 i++;
1894 state = SCE_HPHP_COMMENT;
1895 } else if (ch == '/' && chNext == '/') {
1896 i++;
1897 state = SCE_HPHP_COMMENTLINE;
1898 } else if (ch == '#') {
1899 state = SCE_HPHP_COMMENTLINE;
1900 } else if (ch == '\"') {
1901 state = SCE_HPHP_HSTRING;
1902 strcpy(phpStringDelimiter, "\"");
1903 } else if (styler.Match(i, "<<<")) {
1904 bool isSimpleString = false;
1905 i = FindPhpStringDelimiter(phpStringDelimiter, sizeof(phpStringDelimiter), i + 3, lengthDoc, styler, isSimpleString);
1906 if (strlen(phpStringDelimiter)) {
1907 state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING);
1908 if (foldHeredoc) levelCurrent++;
1910 } else if (ch == '\'') {
1911 state = SCE_HPHP_SIMPLESTRING;
1912 strcpy(phpStringDelimiter, "\'");
1913 } else if (ch == '$' && IsPhpWordStart(chNext)) {
1914 state = SCE_HPHP_VARIABLE;
1915 } else if (IsOperator(ch)) {
1916 state = SCE_HPHP_OPERATOR;
1917 } else {
1918 state = SCE_HPHP_DEFAULT;
1921 break;
1922 case SCE_HPHP_NUMBER:
1923 // recognize bases 8,10 or 16 integers OR floating-point numbers
1924 if (!IsADigit(ch)
1925 && strchr(".xXabcdefABCDEF", ch) == NULL
1926 && ((ch != '-' && ch != '+') || (chPrev != 'e' && chPrev != 'E'))) {
1927 styler.ColourTo(i - 1, SCE_HPHP_NUMBER);
1928 if (IsOperator(ch))
1929 state = SCE_HPHP_OPERATOR;
1930 else
1931 state = SCE_HPHP_DEFAULT;
1933 break;
1934 case SCE_HPHP_VARIABLE:
1935 if (!IsPhpWordChar(chNext)) {
1936 styler.ColourTo(i, SCE_HPHP_VARIABLE);
1937 state = SCE_HPHP_DEFAULT;
1939 break;
1940 case SCE_HPHP_COMMENT:
1941 if (ch == '/' && chPrev == '*') {
1942 styler.ColourTo(i, StateToPrint);
1943 state = SCE_HPHP_DEFAULT;
1945 break;
1946 case SCE_HPHP_COMMENTLINE:
1947 if (ch == '\r' || ch == '\n') {
1948 styler.ColourTo(i - 1, StateToPrint);
1949 state = SCE_HPHP_DEFAULT;
1951 break;
1952 case SCE_HPHP_HSTRING:
1953 if (ch == '\\' && (phpStringDelimiter[0] == '\"' || chNext == '$' || chNext == '{')) {
1954 // skip the next char
1955 i++;
1956 } else if (((ch == '{' && chNext == '$') || (ch == '$' && chNext == '{'))
1957 && IsPhpWordStart(chNext2)) {
1958 styler.ColourTo(i - 1, StateToPrint);
1959 state = SCE_HPHP_COMPLEX_VARIABLE;
1960 } else if (ch == '$' && IsPhpWordStart(chNext)) {
1961 styler.ColourTo(i - 1, StateToPrint);
1962 state = SCE_HPHP_HSTRING_VARIABLE;
1963 } else if (styler.Match(i, phpStringDelimiter)) {
1964 if (phpStringDelimiter[0] == '\"') {
1965 styler.ColourTo(i, StateToPrint);
1966 state = SCE_HPHP_DEFAULT;
1967 } else if (isLineEnd(chPrev)) {
1968 const int psdLength = strlen(phpStringDelimiter);
1969 const char chAfterPsd = styler.SafeGetCharAt(i + psdLength);
1970 const char chAfterPsd2 = styler.SafeGetCharAt(i + psdLength + 1);
1971 if (isLineEnd(chAfterPsd) ||
1972 (chAfterPsd == ';' && isLineEnd(chAfterPsd2))) {
1973 i += (((i + psdLength) < lengthDoc) ? psdLength : lengthDoc) - 1;
1974 styler.ColourTo(i, StateToPrint);
1975 state = SCE_HPHP_DEFAULT;
1976 if (foldHeredoc) levelCurrent--;
1980 break;
1981 case SCE_HPHP_SIMPLESTRING:
1982 if (phpStringDelimiter[0] == '\'') {
1983 if (ch == '\\') {
1984 // skip the next char
1985 i++;
1986 } else if (ch == '\'') {
1987 styler.ColourTo(i, StateToPrint);
1988 state = SCE_HPHP_DEFAULT;
1990 } else if (isLineEnd(chPrev) && styler.Match(i, phpStringDelimiter)) {
1991 const int psdLength = strlen(phpStringDelimiter);
1992 const char chAfterPsd = styler.SafeGetCharAt(i + psdLength);
1993 const char chAfterPsd2 = styler.SafeGetCharAt(i + psdLength + 1);
1994 if (isLineEnd(chAfterPsd) ||
1995 (chAfterPsd == ';' && isLineEnd(chAfterPsd2))) {
1996 i += (((i + psdLength) < lengthDoc) ? psdLength : lengthDoc) - 1;
1997 styler.ColourTo(i, StateToPrint);
1998 state = SCE_HPHP_DEFAULT;
1999 if (foldHeredoc) levelCurrent--;
2002 break;
2003 case SCE_HPHP_HSTRING_VARIABLE:
2004 if (!IsPhpWordChar(chNext)) {
2005 styler.ColourTo(i, StateToPrint);
2006 state = SCE_HPHP_HSTRING;
2008 break;
2009 case SCE_HPHP_COMPLEX_VARIABLE:
2010 if (ch == '}') {
2011 styler.ColourTo(i, StateToPrint);
2012 state = SCE_HPHP_HSTRING;
2014 break;
2015 case SCE_HPHP_OPERATOR:
2016 case SCE_HPHP_DEFAULT:
2017 styler.ColourTo(i - 1, StateToPrint);
2018 if (IsADigit(ch) || (ch == '.' && IsADigit(chNext))) {
2019 state = SCE_HPHP_NUMBER;
2020 } else if (IsAWordStart(ch)) {
2021 state = SCE_HPHP_WORD;
2022 } else if (ch == '/' && chNext == '*') {
2023 i++;
2024 state = SCE_HPHP_COMMENT;
2025 } else if (ch == '/' && chNext == '/') {
2026 i++;
2027 state = SCE_HPHP_COMMENTLINE;
2028 } else if (ch == '#') {
2029 state = SCE_HPHP_COMMENTLINE;
2030 } else if (ch == '\"') {
2031 state = SCE_HPHP_HSTRING;
2032 strcpy(phpStringDelimiter, "\"");
2033 } else if (styler.Match(i, "<<<")) {
2034 bool isSimpleString = false;
2035 i = FindPhpStringDelimiter(phpStringDelimiter, sizeof(phpStringDelimiter), i + 3, lengthDoc, styler, isSimpleString);
2036 if (strlen(phpStringDelimiter)) {
2037 state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING);
2038 if (foldHeredoc) levelCurrent++;
2040 } else if (ch == '\'') {
2041 state = SCE_HPHP_SIMPLESTRING;
2042 strcpy(phpStringDelimiter, "\'");
2043 } else if (ch == '$' && IsPhpWordStart(chNext)) {
2044 state = SCE_HPHP_VARIABLE;
2045 } else if (IsOperator(ch)) {
2046 state = SCE_HPHP_OPERATOR;
2047 } else if ((state == SCE_HPHP_OPERATOR) && (IsASpace(ch))) {
2048 state = SCE_HPHP_DEFAULT;
2050 break;
2051 ///////////// end - PHP state handling
2054 // Some of the above terminated their lexeme but since the same character starts
2055 // the same class again, only reenter if non empty segment.
2057 bool nonEmptySegment = i >= static_cast<int>(styler.GetStartSegment());
2058 if (state == SCE_HB_DEFAULT) { // One of the above succeeded
2059 if ((ch == '\"') && (nonEmptySegment)) {
2060 state = SCE_HB_STRING;
2061 } else if (ch == '\'') {
2062 state = SCE_HB_COMMENTLINE;
2063 } else if (IsAWordStart(ch)) {
2064 state = SCE_HB_WORD;
2065 } else if (IsOperator(ch)) {
2066 styler.ColourTo(i, SCE_HB_DEFAULT);
2068 } else if (state == SCE_HBA_DEFAULT) { // One of the above succeeded
2069 if ((ch == '\"') && (nonEmptySegment)) {
2070 state = SCE_HBA_STRING;
2071 } else if (ch == '\'') {
2072 state = SCE_HBA_COMMENTLINE;
2073 } else if (IsAWordStart(ch)) {
2074 state = SCE_HBA_WORD;
2075 } else if (IsOperator(ch)) {
2076 styler.ColourTo(i, SCE_HBA_DEFAULT);
2078 } else if (state == SCE_HJ_DEFAULT) { // One of the above succeeded
2079 if (ch == '/' && chNext == '*') {
2080 if (styler.SafeGetCharAt(i + 2) == '*')
2081 state = SCE_HJ_COMMENTDOC;
2082 else
2083 state = SCE_HJ_COMMENT;
2084 } else if (ch == '/' && chNext == '/') {
2085 state = SCE_HJ_COMMENTLINE;
2086 } else if ((ch == '\"') && (nonEmptySegment)) {
2087 state = SCE_HJ_DOUBLESTRING;
2088 } else if ((ch == '\'') && (nonEmptySegment)) {
2089 state = SCE_HJ_SINGLESTRING;
2090 } else if (IsAWordStart(ch)) {
2091 state = SCE_HJ_WORD;
2092 } else if (IsOperator(ch)) {
2093 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
2098 switch (state) {
2099 case SCE_HJ_WORD:
2100 classifyWordHTJS(styler.GetStartSegment(), lengthDoc - 1, keywords2, styler, inScriptType);
2101 break;
2102 case SCE_HB_WORD:
2103 classifyWordHTVB(styler.GetStartSegment(), lengthDoc - 1, keywords3, styler, inScriptType);
2104 break;
2105 case SCE_HP_WORD:
2106 classifyWordHTPy(styler.GetStartSegment(), lengthDoc - 1, keywords4, styler, prevWord, inScriptType);
2107 break;
2108 case SCE_HPHP_WORD:
2109 classifyWordHTPHP(styler.GetStartSegment(), lengthDoc - 1, keywords5, styler);
2110 break;
2111 default:
2112 StateToPrint = statePrintForState(state, inScriptType);
2113 styler.ColourTo(lengthDoc - 1, StateToPrint);
2114 break;
2117 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
2118 if (fold) {
2119 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
2120 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
2124 static void ColouriseXMLDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
2125 Accessor &styler) {
2126 // Passing in true because we're lexing XML
2127 ColouriseHyperTextDoc(startPos, length, initStyle, keywordlists, styler, true);
2130 static void ColouriseHTMLDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
2131 Accessor &styler) {
2132 // Passing in false because we're notlexing XML
2133 ColouriseHyperTextDoc(startPos, length, initStyle, keywordlists, styler, false);
2136 static void ColourisePHPScriptDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
2137 Accessor &styler) {
2138 if (startPos == 0)
2139 initStyle = SCE_HPHP_DEFAULT;
2140 ColouriseHTMLDoc(startPos, length, initStyle, keywordlists, styler);
2143 static const char * const htmlWordListDesc[] = {
2144 "HTML elements and attributes",
2145 "JavaScript keywords",
2146 "VBScript keywords",
2147 "Python keywords",
2148 "PHP keywords",
2149 "SGML and DTD keywords",
2153 static const char * const phpscriptWordListDesc[] = {
2154 "", //Unused
2155 "", //Unused
2156 "", //Unused
2157 "", //Unused
2158 "PHP keywords",
2159 "", //Unused
2163 LexerModule lmHTML(SCLEX_HTML, ColouriseHTMLDoc, "hypertext", 0, htmlWordListDesc, 8);
2164 LexerModule lmXML(SCLEX_XML, ColouriseXMLDoc, "xml", 0, htmlWordListDesc, 8);
2165 LexerModule lmPHPSCRIPT(SCLEX_PHPSCRIPT, ColourisePHPScriptDoc, "phpscript", 0, phpscriptWordListDesc, 8);