Improve API docs related to keybindings configuration file
[geany-mirror.git] / scintilla / lexers / LexHTML.cxx
blob59c9e8ee4f50cfe1124e6ccdee47790ae2f57e50
1 // Scintilla source code edit control
2 /** @file LexHTML.cxx
3 ** Lexer for HTML.
4 **/
5 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <stdarg.h>
12 #include <assert.h>
13 #include <ctype.h>
15 #include "ILexer.h"
16 #include "Scintilla.h"
17 #include "SciLexer.h"
19 #include "StringCopy.h"
20 #include "WordList.h"
21 #include "LexAccessor.h"
22 #include "Accessor.h"
23 #include "StyleContext.h"
24 #include "CharacterSet.h"
25 #include "LexerModule.h"
27 #ifdef SCI_NAMESPACE
28 using namespace Scintilla;
29 #endif
31 #define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
32 #define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
33 #define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)
35 enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock, eScriptComment };
36 enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
38 static inline bool IsAWordChar(const int ch) {
39 return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
42 static inline bool IsAWordStart(const int ch) {
43 return (ch < 0x80) && (isalnum(ch) || ch == '_');
46 inline bool IsOperator(int ch) {
47 if (IsASCII(ch) && isalnum(ch))
48 return false;
49 // '.' left out as it is used to make up numbers
50 if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
51 ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
52 ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
53 ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
54 ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
55 ch == '?' || ch == '!' || ch == '.' || ch == '~')
56 return true;
57 return false;
60 static void GetTextSegment(Accessor &styler, Sci_PositionU start, Sci_PositionU end, char *s, size_t len) {
61 Sci_PositionU i = 0;
62 for (; (i < end - start + 1) && (i < len-1); i++) {
63 s[i] = static_cast<char>(MakeLowerCase(styler[start + i]));
65 s[i] = '\0';
68 static const char *GetNextWord(Accessor &styler, Sci_PositionU start, char *s, size_t sLen) {
70 Sci_PositionU i = 0;
71 for (; i < sLen-1; i++) {
72 char ch = static_cast<char>(styler.SafeGetCharAt(start + i));
73 if ((i == 0) && !IsAWordStart(ch))
74 break;
75 if ((i > 0) && !IsAWordChar(ch))
76 break;
77 s[i] = ch;
79 s[i] = '\0';
81 return s;
84 static script_type segIsScriptingIndicator(Accessor &styler, Sci_PositionU start, Sci_PositionU end, script_type prevValue) {
85 char s[100];
86 GetTextSegment(styler, start, end, s, sizeof(s));
87 //Platform::DebugPrintf("Scripting indicator [%s]\n", s);
88 if (strstr(s, "src")) // External script
89 return eScriptNone;
90 if (strstr(s, "vbs"))
91 return eScriptVBS;
92 if (strstr(s, "pyth"))
93 return eScriptPython;
94 if (strstr(s, "javas"))
95 return eScriptJS;
96 if (strstr(s, "jscr"))
97 return eScriptJS;
98 if (strstr(s, "php"))
99 return eScriptPHP;
100 if (strstr(s, "xml")) {
101 const char *xml = strstr(s, "xml");
102 for (const char *t=s; t<xml; t++) {
103 if (!IsASpace(*t)) {
104 return prevValue;
107 return eScriptXML;
110 return prevValue;
113 static int PrintScriptingIndicatorOffset(Accessor &styler, Sci_PositionU start, Sci_PositionU end) {
114 int iResult = 0;
115 char s[100];
116 GetTextSegment(styler, start, end, s, sizeof(s));
117 if (0 == strncmp(s, "php", 3)) {
118 iResult = 3;
121 return iResult;
124 static script_type ScriptOfState(int state) {
125 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
126 return eScriptPython;
127 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
128 return eScriptVBS;
129 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
130 return eScriptJS;
131 } else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
132 return eScriptPHP;
133 } else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
134 return eScriptSGML;
135 } else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
136 return eScriptSGMLblock;
137 } else {
138 return eScriptNone;
142 static int statePrintForState(int state, script_mode inScriptType) {
143 int StateToPrint = state;
145 if (state >= SCE_HJ_START) {
146 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
147 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
148 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
149 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
150 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
151 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
155 return StateToPrint;
158 static int stateForPrintState(int StateToPrint) {
159 int state;
161 if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
162 state = StateToPrint - SCE_HA_PYTHON;
163 } else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
164 state = StateToPrint - SCE_HA_VBS;
165 } else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
166 state = StateToPrint - SCE_HA_JS;
167 } else {
168 state = StateToPrint;
171 return state;
174 static inline bool IsNumber(Sci_PositionU start, Accessor &styler) {
175 return IsADigit(styler[start]) || (styler[start] == '.') ||
176 (styler[start] == '-') || (styler[start] == '#');
179 static inline bool isStringState(int state) {
180 bool bResult;
182 switch (state) {
183 case SCE_HJ_DOUBLESTRING:
184 case SCE_HJ_SINGLESTRING:
185 case SCE_HJA_DOUBLESTRING:
186 case SCE_HJA_SINGLESTRING:
187 case SCE_HB_STRING:
188 case SCE_HBA_STRING:
189 case SCE_HP_STRING:
190 case SCE_HP_CHARACTER:
191 case SCE_HP_TRIPLE:
192 case SCE_HP_TRIPLEDOUBLE:
193 case SCE_HPA_STRING:
194 case SCE_HPA_CHARACTER:
195 case SCE_HPA_TRIPLE:
196 case SCE_HPA_TRIPLEDOUBLE:
197 case SCE_HPHP_HSTRING:
198 case SCE_HPHP_SIMPLESTRING:
199 case SCE_HPHP_HSTRING_VARIABLE:
200 case SCE_HPHP_COMPLEX_VARIABLE:
201 bResult = true;
202 break;
203 default :
204 bResult = false;
205 break;
207 return bResult;
210 static inline bool stateAllowsTermination(int state) {
211 bool allowTermination = !isStringState(state);
212 if (allowTermination) {
213 switch (state) {
214 case SCE_HB_COMMENTLINE:
215 case SCE_HPHP_COMMENT:
216 case SCE_HP_COMMENTLINE:
217 case SCE_HPA_COMMENTLINE:
218 allowTermination = false;
221 return allowTermination;
224 // not really well done, since it's only comments that should lex the %> and <%
225 static inline bool isCommentASPState(int state) {
226 bool bResult;
228 switch (state) {
229 case SCE_HJ_COMMENT:
230 case SCE_HJ_COMMENTLINE:
231 case SCE_HJ_COMMENTDOC:
232 case SCE_HB_COMMENTLINE:
233 case SCE_HP_COMMENTLINE:
234 case SCE_HPHP_COMMENT:
235 case SCE_HPHP_COMMENTLINE:
236 bResult = true;
237 break;
238 default :
239 bResult = false;
240 break;
242 return bResult;
245 static void classifyAttribHTML(Sci_PositionU start, Sci_PositionU end, WordList &keywords, Accessor &styler) {
246 bool wordIsNumber = IsNumber(start, styler);
247 char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
248 if (wordIsNumber) {
249 chAttr = SCE_H_NUMBER;
250 } else {
251 char s[100];
252 GetTextSegment(styler, start, end, s, sizeof(s));
253 if (keywords.InList(s))
254 chAttr = SCE_H_ATTRIBUTE;
256 if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
257 // No keywords -> all are known
258 chAttr = SCE_H_ATTRIBUTE;
259 styler.ColourTo(end, chAttr);
262 static int classifyTagHTML(Sci_PositionU start, Sci_PositionU end,
263 WordList &keywords, Accessor &styler, bool &tagDontFold,
264 bool caseSensitive, bool isXml, bool allowScripts) {
265 char withSpace[30 + 2] = " ";
266 const char *s = withSpace + 1;
267 // Copy after the '<'
268 Sci_PositionU i = 1;
269 for (Sci_PositionU cPos = start; cPos <= end && i < 30; cPos++) {
270 char ch = styler[cPos];
271 if ((ch != '<') && (ch != '/')) {
272 withSpace[i++] = caseSensitive ? ch : static_cast<char>(MakeLowerCase(ch));
276 //The following is only a quick hack, to see if this whole thing would work
277 //we first need the tagname with a trailing space...
278 withSpace[i] = ' ';
279 withSpace[i+1] = '\0';
281 // if the current language is XML, I can fold any tag
282 // if the current language is HTML, I don't want to fold certain tags (input, meta, etc.)
283 //...to find it in the list of no-container-tags
284 tagDontFold = (!isXml) && (NULL != strstr(" area base basefont br col command embed frame hr img input isindex keygen link meta param source track wbr ", withSpace));
286 //now we can remove the trailing space
287 withSpace[i] = '\0';
289 // No keywords -> all are known
290 char chAttr = SCE_H_TAGUNKNOWN;
291 if (s[0] == '!') {
292 chAttr = SCE_H_SGML_DEFAULT;
293 } else if (!keywords || keywords.InList(s)) {
294 chAttr = SCE_H_TAG;
296 styler.ColourTo(end, chAttr);
297 if (chAttr == SCE_H_TAG) {
298 if (allowScripts && 0 == strcmp(s, "script")) {
299 // check to see if this is a self-closing tag by sniffing ahead
300 bool isSelfClose = false;
301 for (Sci_PositionU cPos = end; cPos <= end + 200; cPos++) {
302 char ch = styler.SafeGetCharAt(cPos, '\0');
303 if (ch == '\0' || ch == '>')
304 break;
305 else if (ch == '/' && styler.SafeGetCharAt(cPos + 1, '\0') == '>') {
306 isSelfClose = true;
307 break;
311 // do not enter a script state if the tag self-closed
312 if (!isSelfClose)
313 chAttr = SCE_H_SCRIPT;
314 } else if (!isXml && 0 == strcmp(s, "comment")) {
315 chAttr = SCE_H_COMMENT;
318 return chAttr;
321 static void classifyWordHTJS(Sci_PositionU start, Sci_PositionU end,
322 WordList &keywords, Accessor &styler, script_mode inScriptType) {
323 char s[30 + 1];
324 Sci_PositionU i = 0;
325 for (; i < end - start + 1 && i < 30; i++) {
326 s[i] = styler[start + i];
328 s[i] = '\0';
330 char chAttr = SCE_HJ_WORD;
331 bool wordIsNumber = IsADigit(s[0]) || ((s[0] == '.') && IsADigit(s[1]));
332 if (wordIsNumber) {
333 chAttr = SCE_HJ_NUMBER;
334 } else if (keywords.InList(s)) {
335 chAttr = SCE_HJ_KEYWORD;
337 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
340 static int classifyWordHTVB(Sci_PositionU start, Sci_PositionU end, WordList &keywords, Accessor &styler, script_mode inScriptType) {
341 char chAttr = SCE_HB_IDENTIFIER;
342 bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
343 if (wordIsNumber) {
344 chAttr = SCE_HB_NUMBER;
345 } else {
346 char s[100];
347 GetTextSegment(styler, start, end, s, sizeof(s));
348 if (keywords.InList(s)) {
349 chAttr = SCE_HB_WORD;
350 if (strcmp(s, "rem") == 0)
351 chAttr = SCE_HB_COMMENTLINE;
354 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
355 if (chAttr == SCE_HB_COMMENTLINE)
356 return SCE_HB_COMMENTLINE;
357 else
358 return SCE_HB_DEFAULT;
361 static void classifyWordHTPy(Sci_PositionU start, Sci_PositionU end, WordList &keywords, Accessor &styler, char *prevWord, script_mode inScriptType, bool isMako) {
362 bool wordIsNumber = IsADigit(styler[start]);
363 char s[30 + 1];
364 Sci_PositionU i = 0;
365 for (; i < end - start + 1 && i < 30; i++) {
366 s[i] = styler[start + i];
368 s[i] = '\0';
369 char chAttr = SCE_HP_IDENTIFIER;
370 if (0 == strcmp(prevWord, "class"))
371 chAttr = SCE_HP_CLASSNAME;
372 else if (0 == strcmp(prevWord, "def"))
373 chAttr = SCE_HP_DEFNAME;
374 else if (wordIsNumber)
375 chAttr = SCE_HP_NUMBER;
376 else if (keywords.InList(s))
377 chAttr = SCE_HP_WORD;
378 else if (isMako && 0 == strcmp(s, "block"))
379 chAttr = SCE_HP_WORD;
380 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
381 strcpy(prevWord, s);
384 // Update the word colour to default or keyword
385 // Called when in a PHP word
386 static void classifyWordHTPHP(Sci_PositionU start, Sci_PositionU end, WordList &keywords, Accessor &styler) {
387 char chAttr = SCE_HPHP_DEFAULT;
388 bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
389 if (wordIsNumber) {
390 chAttr = SCE_HPHP_NUMBER;
391 } else {
392 char s[100];
393 GetTextSegment(styler, start, end, s, sizeof(s));
394 if (keywords.InList(s))
395 chAttr = SCE_HPHP_WORD;
397 styler.ColourTo(end, chAttr);
400 static bool isWordHSGML(Sci_PositionU start, Sci_PositionU end, WordList &keywords, Accessor &styler) {
401 char s[30 + 1];
402 Sci_PositionU i = 0;
403 for (; i < end - start + 1 && i < 30; i++) {
404 s[i] = styler[start + i];
406 s[i] = '\0';
407 return keywords.InList(s);
410 static bool isWordCdata(Sci_PositionU start, Sci_PositionU end, Accessor &styler) {
411 char s[30 + 1];
412 Sci_PositionU i = 0;
413 for (; i < end - start + 1 && i < 30; i++) {
414 s[i] = styler[start + i];
416 s[i] = '\0';
417 return (0 == strcmp(s, "[CDATA["));
420 // Return the first state to reach when entering a scripting language
421 static int StateForScript(script_type scriptLanguage) {
422 int Result;
423 switch (scriptLanguage) {
424 case eScriptVBS:
425 Result = SCE_HB_START;
426 break;
427 case eScriptPython:
428 Result = SCE_HP_START;
429 break;
430 case eScriptPHP:
431 Result = SCE_HPHP_DEFAULT;
432 break;
433 case eScriptXML:
434 Result = SCE_H_TAGUNKNOWN;
435 break;
436 case eScriptSGML:
437 Result = SCE_H_SGML_DEFAULT;
438 break;
439 case eScriptComment:
440 Result = SCE_H_COMMENT;
441 break;
442 default :
443 Result = SCE_HJ_START;
444 break;
446 return Result;
449 static inline bool issgmlwordchar(int ch) {
450 return !IsASCII(ch) ||
451 (isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
454 static inline bool IsPhpWordStart(int ch) {
455 return (IsASCII(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
458 static inline bool IsPhpWordChar(int ch) {
459 return IsADigit(ch) || IsPhpWordStart(ch);
462 static bool InTagState(int state) {
463 return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
464 state == SCE_H_SCRIPT ||
465 state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
466 state == SCE_H_NUMBER || state == SCE_H_OTHER ||
467 state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
470 static bool IsCommentState(const int state) {
471 return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
474 static bool IsScriptCommentState(const int state) {
475 return state == SCE_HJ_COMMENT || state == SCE_HJ_COMMENTLINE || state == SCE_HJA_COMMENT ||
476 state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
479 static bool isLineEnd(int ch) {
480 return ch == '\r' || ch == '\n';
483 static bool isMakoBlockEnd(const int ch, const int chNext, const char *blockType) {
484 if (strlen(blockType) == 0) {
485 return ((ch == '%') && (chNext == '>'));
486 } else if ((0 == strcmp(blockType, "inherit")) ||
487 (0 == strcmp(blockType, "namespace")) ||
488 (0 == strcmp(blockType, "include")) ||
489 (0 == strcmp(blockType, "page"))) {
490 return ((ch == '/') && (chNext == '>'));
491 } else if (0 == strcmp(blockType, "%")) {
492 if (ch == '/' && isLineEnd(chNext))
493 return 1;
494 else
495 return isLineEnd(ch);
496 } else if (0 == strcmp(blockType, "{")) {
497 return ch == '}';
498 } else {
499 return (ch == '>');
503 static bool isDjangoBlockEnd(const int ch, const int chNext, const char *blockType) {
504 if (strlen(blockType) == 0) {
505 return 0;
506 } else if (0 == strcmp(blockType, "%")) {
507 return ((ch == '%') && (chNext == '}'));
508 } else if (0 == strcmp(blockType, "{")) {
509 return ((ch == '}') && (chNext == '}'));
510 } else {
511 return 0;
515 static bool isPHPStringState(int state) {
516 return
517 (state == SCE_HPHP_HSTRING) ||
518 (state == SCE_HPHP_SIMPLESTRING) ||
519 (state == SCE_HPHP_HSTRING_VARIABLE) ||
520 (state == SCE_HPHP_COMPLEX_VARIABLE);
523 static Sci_Position FindPhpStringDelimiter(char *phpStringDelimiter, const int phpStringDelimiterSize, Sci_Position i, const Sci_Position lengthDoc, Accessor &styler, bool &isSimpleString) {
524 Sci_Position j;
525 const Sci_Position beginning = i - 1;
526 bool isValidSimpleString = false;
528 while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t'))
529 i++;
531 char ch = styler.SafeGetCharAt(i);
532 const char chNext = styler.SafeGetCharAt(i + 1);
533 if (!IsPhpWordStart(ch)) {
534 if (ch == '\'' && IsPhpWordStart(chNext)) {
535 i++;
536 ch = chNext;
537 isSimpleString = true;
538 } else {
539 phpStringDelimiter[0] = '\0';
540 return beginning;
543 phpStringDelimiter[0] = ch;
544 i++;
546 for (j = i; j < lengthDoc && !isLineEnd(styler[j]); j++) {
547 if (!IsPhpWordChar(styler[j])) {
548 if (isSimpleString && (styler[j] == '\'') && isLineEnd(styler.SafeGetCharAt(j + 1))) {
549 isValidSimpleString = true;
550 j++;
551 break;
552 } else {
553 phpStringDelimiter[0] = '\0';
554 return beginning;
557 if (j - i < phpStringDelimiterSize - 2)
558 phpStringDelimiter[j-i+1] = styler[j];
559 else
560 i++;
562 if (isSimpleString && !isValidSimpleString) {
563 phpStringDelimiter[0] = '\0';
564 return beginning;
566 phpStringDelimiter[j-i+1 - (isSimpleString ? 1 : 0)] = '\0';
567 return j - 1;
570 static void ColouriseHyperTextDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[],
571 Accessor &styler, bool isXml) {
572 WordList &keywords = *keywordlists[0];
573 WordList &keywords2 = *keywordlists[1];
574 WordList &keywords3 = *keywordlists[2];
575 WordList &keywords4 = *keywordlists[3];
576 WordList &keywords5 = *keywordlists[4];
577 WordList &keywords6 = *keywordlists[5]; // SGML (DTD) keywords
579 styler.StartAt(startPos);
580 char prevWord[200];
581 prevWord[0] = '\0';
582 char phpStringDelimiter[200]; // PHP is not limited in length, we are
583 phpStringDelimiter[0] = '\0';
584 int StateToPrint = initStyle;
585 int state = stateForPrintState(StateToPrint);
586 char makoBlockType[200];
587 makoBlockType[0] = '\0';
588 int makoComment = 0;
589 char djangoBlockType[2];
590 djangoBlockType[0] = '\0';
592 // If inside a tag, it may be a script tag, so reread from the start of line starting tag to ensure any language tags are seen
593 if (InTagState(state)) {
594 while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
595 Sci_Position backLineStart = styler.LineStart(styler.GetLine(startPos-1));
596 length += startPos - backLineStart;
597 startPos = backLineStart;
599 state = SCE_H_DEFAULT;
601 // String can be heredoc, must find a delimiter first. Reread from beginning of line containing the string, to get the correct lineState
602 if (isPHPStringState(state)) {
603 while (startPos > 0 && (isPHPStringState(state) || !isLineEnd(styler[startPos - 1]))) {
604 startPos--;
605 length++;
606 state = styler.StyleAt(startPos);
608 if (startPos == 0)
609 state = SCE_H_DEFAULT;
611 styler.StartAt(startPos);
613 Sci_Position lineCurrent = styler.GetLine(startPos);
614 int lineState;
615 if (lineCurrent > 0) {
616 lineState = styler.GetLineState(lineCurrent-1);
617 } else {
618 // Default client and ASP scripting language is JavaScript
619 lineState = eScriptJS << 8;
621 // property asp.default.language
622 // Script in ASP code is initially assumed to be in JavaScript.
623 // To change this to VBScript set asp.default.language to 2. Python is 3.
624 lineState |= styler.GetPropertyInt("asp.default.language", eScriptJS) << 4;
626 script_mode inScriptType = script_mode((lineState >> 0) & 0x03); // 2 bits of scripting mode
627 bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
628 bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
629 bool tagDontFold = false; //some HTML tags should not be folded
630 script_type aspScript = script_type((lineState >> 4) & 0x0F); // 4 bits of script name
631 script_type clientScript = script_type((lineState >> 8) & 0x0F); // 4 bits of script name
632 int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
634 script_type scriptLanguage = ScriptOfState(state);
635 // If eNonHtmlScript coincides with SCE_H_COMMENT, assume eScriptComment
636 if (inScriptType == eNonHtmlScript && state == SCE_H_COMMENT) {
637 scriptLanguage = eScriptComment;
639 script_type beforeLanguage = ScriptOfState(beforePreProc);
641 // property fold.html
642 // Folding is turned on or off for HTML and XML files with this option.
643 // The fold option must also be on for folding to occur.
644 const bool foldHTML = styler.GetPropertyInt("fold.html", 0) != 0;
646 const bool fold = foldHTML && styler.GetPropertyInt("fold", 0);
648 // property fold.html.preprocessor
649 // Folding is turned on or off for scripts embedded in HTML files with this option.
650 // The default is on.
651 const bool foldHTMLPreprocessor = foldHTML && styler.GetPropertyInt("fold.html.preprocessor", 1);
653 const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
655 // property fold.hypertext.comment
656 // Allow folding for comments in scripts embedded in HTML.
657 // The default is off.
658 const bool foldComment = fold && styler.GetPropertyInt("fold.hypertext.comment", 0) != 0;
660 // property fold.hypertext.heredoc
661 // Allow folding for heredocs in scripts embedded in HTML.
662 // The default is off.
663 const bool foldHeredoc = fold && styler.GetPropertyInt("fold.hypertext.heredoc", 0) != 0;
665 // property html.tags.case.sensitive
666 // For XML and HTML, setting this property to 1 will make tags match in a case
667 // sensitive way which is the expected behaviour for XML and XHTML.
668 const bool caseSensitive = styler.GetPropertyInt("html.tags.case.sensitive", 0) != 0;
670 // property lexer.xml.allow.scripts
671 // Set to 0 to disable scripts in XML.
672 const bool allowScripts = styler.GetPropertyInt("lexer.xml.allow.scripts", 1) != 0;
674 // property lexer.html.mako
675 // Set to 1 to enable the mako template language.
676 const bool isMako = styler.GetPropertyInt("lexer.html.mako", 0) != 0;
678 // property lexer.html.django
679 // Set to 1 to enable the django template language.
680 const bool isDjango = styler.GetPropertyInt("lexer.html.django", 0) != 0;
682 const CharacterSet setHTMLWord(CharacterSet::setAlphaNum, ".-_:!#", 0x80, true);
683 const CharacterSet setTagContinue(CharacterSet::setAlphaNum, ".-_:!#[", 0x80, true);
684 const CharacterSet setAttributeContinue(CharacterSet::setAlphaNum, ".-_:!#/", 0x80, true);
685 // TODO: also handle + and - (except if they're part of ++ or --) and return keywords
686 const CharacterSet setOKBeforeJSRE(CharacterSet::setNone, "([{=,:;!%^&*|?~");
688 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
689 int levelCurrent = levelPrev;
690 int visibleChars = 0;
691 int lineStartVisibleChars = 0;
693 int chPrev = ' ';
694 int ch = ' ';
695 int chPrevNonWhite = ' ';
696 // look back to set chPrevNonWhite properly for better regex colouring
697 if (scriptLanguage == eScriptJS && startPos > 0) {
698 Sci_Position back = startPos;
699 int style = 0;
700 while (--back) {
701 style = styler.StyleAt(back);
702 if (style < SCE_HJ_DEFAULT || style > SCE_HJ_COMMENTDOC)
703 // includes SCE_HJ_COMMENT & SCE_HJ_COMMENTLINE
704 break;
706 if (style == SCE_HJ_SYMBOLS) {
707 chPrevNonWhite = static_cast<unsigned char>(styler.SafeGetCharAt(back));
711 styler.StartSegment(startPos);
712 const Sci_Position lengthDoc = startPos + length;
713 for (Sci_Position i = startPos; i < lengthDoc; i++) {
714 const int chPrev2 = chPrev;
715 chPrev = ch;
716 if (!IsASpace(ch) && state != SCE_HJ_COMMENT &&
717 state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
718 chPrevNonWhite = ch;
719 ch = static_cast<unsigned char>(styler[i]);
720 int chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
721 const int chNext2 = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2));
723 // Handle DBCS codepages
724 if (styler.IsLeadByte(static_cast<char>(ch))) {
725 chPrev = ' ';
726 i += 1;
727 continue;
730 if ((!IsASpace(ch) || !foldCompact) && fold)
731 visibleChars++;
732 if (!IsASpace(ch))
733 lineStartVisibleChars++;
735 // decide what is the current state to print (depending of the script tag)
736 StateToPrint = statePrintForState(state, inScriptType);
738 // handle script folding
739 if (fold) {
740 switch (scriptLanguage) {
741 case eScriptJS:
742 case eScriptPHP:
743 //not currently supported case eScriptVBS:
745 if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
746 //Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
747 //if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
748 if (ch == '#') {
749 Sci_Position j = i + 1;
750 while ((j < lengthDoc) && IsASpaceOrTab(styler.SafeGetCharAt(j))) {
751 j++;
753 if (styler.Match(j, "region") || styler.Match(j, "if")) {
754 levelCurrent++;
755 } else if (styler.Match(j, "end")) {
756 levelCurrent--;
758 } else if ((ch == '{') || (ch == '}') || (foldComment && (ch == '/') && (chNext == '*'))) {
759 levelCurrent += (((ch == '{') || (ch == '/')) ? 1 : -1);
761 } else if (((state == SCE_HPHP_COMMENT) || (state == SCE_HJ_COMMENT)) && foldComment && (ch == '*') && (chNext == '/')) {
762 levelCurrent--;
764 break;
765 case eScriptPython:
766 if (state != SCE_HP_COMMENTLINE && !isMako) {
767 if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
768 levelCurrent++;
769 } else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
770 // check if the number of tabs is lower than the level
771 int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
772 for (Sci_Position j = 0; Findlevel > 0; j++) {
773 char chTmp = styler.SafeGetCharAt(i + j + 1);
774 if (chTmp == '\t') {
775 Findlevel -= 8;
776 } else if (chTmp == ' ') {
777 Findlevel--;
778 } else {
779 break;
783 if (Findlevel > 0) {
784 levelCurrent -= Findlevel / 8;
785 if (Findlevel % 8)
786 levelCurrent--;
790 break;
791 default:
792 break;
796 if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
797 // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
798 // Avoid triggering two times on Dos/Win
799 // New line -> record any line state onto /next/ line
800 if (fold) {
801 int lev = levelPrev;
802 if (visibleChars == 0)
803 lev |= SC_FOLDLEVELWHITEFLAG;
804 if ((levelCurrent > levelPrev) && (visibleChars > 0))
805 lev |= SC_FOLDLEVELHEADERFLAG;
807 styler.SetLevel(lineCurrent, lev);
808 visibleChars = 0;
809 levelPrev = levelCurrent;
811 styler.SetLineState(lineCurrent,
812 ((inScriptType & 0x03) << 0) |
813 ((tagOpened ? 1 : 0) << 2) |
814 ((tagClosing ? 1 : 0) << 3) |
815 ((aspScript & 0x0F) << 4) |
816 ((clientScript & 0x0F) << 8) |
817 ((beforePreProc & 0xFF) << 12));
818 lineCurrent++;
819 lineStartVisibleChars = 0;
822 // handle start of Mako comment line
823 if (isMako && ch == '#' && chNext == '#') {
824 makoComment = 1;
825 state = SCE_HP_COMMENTLINE;
828 // handle end of Mako comment line
829 else if (isMako && makoComment && (ch == '\r' || ch == '\n')) {
830 makoComment = 0;
831 styler.ColourTo(i, StateToPrint);
832 if (scriptLanguage == eScriptPython) {
833 state = SCE_HP_DEFAULT;
834 } else {
835 state = SCE_H_DEFAULT;
839 // Allow falling through to mako handling code if newline is going to end a block
840 if (((ch == '\r' && chNext != '\n') || (ch == '\n')) &&
841 (!isMako || (0 != strcmp(makoBlockType, "%")))) {
843 // Ignore everything in mako comment until the line ends
844 else if (isMako && makoComment) {
847 // generic end of script processing
848 else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
849 // Check if it's the end of the script tag (or any other HTML tag)
850 switch (state) {
851 // in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
852 case SCE_H_DOUBLESTRING:
853 case SCE_H_SINGLESTRING:
854 case SCE_HJ_COMMENT:
855 case SCE_HJ_COMMENTDOC:
856 //case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
857 // the end of script marker from some JS interpreters.
858 case SCE_HB_COMMENTLINE:
859 case SCE_HBA_COMMENTLINE:
860 case SCE_HJ_DOUBLESTRING:
861 case SCE_HJ_SINGLESTRING:
862 case SCE_HJ_REGEX:
863 case SCE_HB_STRING:
864 case SCE_HBA_STRING:
865 case SCE_HP_STRING:
866 case SCE_HP_TRIPLE:
867 case SCE_HP_TRIPLEDOUBLE:
868 case SCE_HPHP_HSTRING:
869 case SCE_HPHP_SIMPLESTRING:
870 case SCE_HPHP_COMMENT:
871 case SCE_HPHP_COMMENTLINE:
872 break;
873 default :
874 // check if the closing tag is a script tag
875 if (const char *tag =
876 state == SCE_HJ_COMMENTLINE || isXml ? "script" :
877 state == SCE_H_COMMENT ? "comment" : 0) {
878 Sci_Position j = i + 2;
879 int chr;
880 do {
881 chr = static_cast<int>(*tag++);
882 } while (chr != 0 && chr == MakeLowerCase(styler.SafeGetCharAt(j++)));
883 if (chr != 0) break;
885 // closing tag of the script (it's a closing HTML tag anyway)
886 styler.ColourTo(i - 1, StateToPrint);
887 state = SCE_H_TAGUNKNOWN;
888 inScriptType = eHtml;
889 scriptLanguage = eScriptNone;
890 clientScript = eScriptJS;
891 i += 2;
892 visibleChars += 2;
893 tagClosing = true;
894 continue;
898 /////////////////////////////////////
899 // handle the start of PHP pre-processor = Non-HTML
900 else if ((state != SCE_H_ASPAT) &&
901 !isPHPStringState(state) &&
902 (state != SCE_HPHP_COMMENT) &&
903 (state != SCE_HPHP_COMMENTLINE) &&
904 (ch == '<') &&
905 (chNext == '?') &&
906 !IsScriptCommentState(state)) {
907 beforeLanguage = scriptLanguage;
908 scriptLanguage = segIsScriptingIndicator(styler, i + 2, i + 6, isXml ? eScriptXML : eScriptPHP);
909 if ((scriptLanguage != eScriptPHP) && (isStringState(state) || (state==SCE_H_COMMENT))) continue;
910 styler.ColourTo(i - 1, StateToPrint);
911 beforePreProc = state;
912 i++;
913 visibleChars++;
914 i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 6);
915 if (scriptLanguage == eScriptXML)
916 styler.ColourTo(i, SCE_H_XMLSTART);
917 else
918 styler.ColourTo(i, SCE_H_QUESTION);
919 state = StateForScript(scriptLanguage);
920 if (inScriptType == eNonHtmlScript)
921 inScriptType = eNonHtmlScriptPreProc;
922 else
923 inScriptType = eNonHtmlPreProc;
924 // Fold whole script, but not if the XML first tag (all XML-like tags in this case)
925 if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
926 levelCurrent++;
928 // should be better
929 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
930 continue;
933 // handle the start Mako template Python code
934 else if (isMako && scriptLanguage == eScriptNone && ((ch == '<' && chNext == '%') ||
935 (lineStartVisibleChars == 1 && ch == '%') ||
936 (lineStartVisibleChars == 1 && ch == '/' && chNext == '%') ||
937 (ch == '$' && chNext == '{') ||
938 (ch == '<' && chNext == '/' && chNext2 == '%'))) {
939 if (ch == '%' || ch == '/')
940 StringCopy(makoBlockType, "%");
941 else if (ch == '$')
942 StringCopy(makoBlockType, "{");
943 else if (chNext == '/')
944 GetNextWord(styler, i+3, makoBlockType, sizeof(makoBlockType));
945 else
946 GetNextWord(styler, i+2, makoBlockType, sizeof(makoBlockType));
947 styler.ColourTo(i - 1, StateToPrint);
948 beforePreProc = state;
949 if (inScriptType == eNonHtmlScript)
950 inScriptType = eNonHtmlScriptPreProc;
951 else
952 inScriptType = eNonHtmlPreProc;
954 if (chNext == '/') {
955 i += 2;
956 visibleChars += 2;
957 } else if (ch != '%') {
958 i++;
959 visibleChars++;
961 state = SCE_HP_START;
962 scriptLanguage = eScriptPython;
963 styler.ColourTo(i, SCE_H_ASP);
965 if (ch != '%' && ch != '$' && ch != '/') {
966 i += static_cast<int>(strlen(makoBlockType));
967 visibleChars += static_cast<int>(strlen(makoBlockType));
968 if (keywords4.InList(makoBlockType))
969 styler.ColourTo(i, SCE_HP_WORD);
970 else
971 styler.ColourTo(i, SCE_H_TAGUNKNOWN);
974 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
975 continue;
978 // handle the start/end of Django comment
979 else if (isDjango && state != SCE_H_COMMENT && (ch == '{' && chNext == '#')) {
980 styler.ColourTo(i - 1, StateToPrint);
981 beforePreProc = state;
982 beforeLanguage = scriptLanguage;
983 if (inScriptType == eNonHtmlScript)
984 inScriptType = eNonHtmlScriptPreProc;
985 else
986 inScriptType = eNonHtmlPreProc;
987 i += 1;
988 visibleChars += 1;
989 scriptLanguage = eScriptComment;
990 state = SCE_H_COMMENT;
991 styler.ColourTo(i, SCE_H_ASP);
992 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
993 continue;
994 } else if (isDjango && state == SCE_H_COMMENT && (ch == '#' && chNext == '}')) {
995 styler.ColourTo(i - 1, StateToPrint);
996 i += 1;
997 visibleChars += 1;
998 styler.ColourTo(i, SCE_H_ASP);
999 state = beforePreProc;
1000 if (inScriptType == eNonHtmlScriptPreProc)
1001 inScriptType = eNonHtmlScript;
1002 else
1003 inScriptType = eHtml;
1004 scriptLanguage = beforeLanguage;
1005 continue;
1008 // handle the start Django template code
1009 else if (isDjango && scriptLanguage != eScriptPython && (ch == '{' && (chNext == '%' || chNext == '{'))) {
1010 if (chNext == '%')
1011 StringCopy(djangoBlockType, "%");
1012 else
1013 StringCopy(djangoBlockType, "{");
1014 styler.ColourTo(i - 1, StateToPrint);
1015 beforePreProc = state;
1016 if (inScriptType == eNonHtmlScript)
1017 inScriptType = eNonHtmlScriptPreProc;
1018 else
1019 inScriptType = eNonHtmlPreProc;
1021 i += 1;
1022 visibleChars += 1;
1023 state = SCE_HP_START;
1024 beforeLanguage = scriptLanguage;
1025 scriptLanguage = eScriptPython;
1026 styler.ColourTo(i, SCE_H_ASP);
1028 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1029 continue;
1032 // handle the start of ASP pre-processor = Non-HTML
1033 else if (!isMako && !isDjango && !isCommentASPState(state) && (ch == '<') && (chNext == '%') && !isPHPStringState(state)) {
1034 styler.ColourTo(i - 1, StateToPrint);
1035 beforePreProc = state;
1036 if (inScriptType == eNonHtmlScript)
1037 inScriptType = eNonHtmlScriptPreProc;
1038 else
1039 inScriptType = eNonHtmlPreProc;
1041 if (chNext2 == '@') {
1042 i += 2; // place as if it was the second next char treated
1043 visibleChars += 2;
1044 state = SCE_H_ASPAT;
1045 } else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
1046 styler.ColourTo(i + 3, SCE_H_ASP);
1047 state = SCE_H_XCCOMMENT;
1048 scriptLanguage = eScriptVBS;
1049 continue;
1050 } else {
1051 if (chNext2 == '=') {
1052 i += 2; // place as if it was the second next char treated
1053 visibleChars += 2;
1054 } else {
1055 i++; // place as if it was the next char treated
1056 visibleChars++;
1059 state = StateForScript(aspScript);
1061 scriptLanguage = eScriptVBS;
1062 styler.ColourTo(i, SCE_H_ASP);
1063 // fold whole script
1064 if (foldHTMLPreprocessor)
1065 levelCurrent++;
1066 // should be better
1067 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1068 continue;
1071 /////////////////////////////////////
1072 // handle the start of SGML language (DTD)
1073 else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
1074 (chPrev == '<') &&
1075 (ch == '!') &&
1076 (StateToPrint != SCE_H_CDATA) &&
1077 (!IsCommentState(StateToPrint)) &&
1078 (!IsScriptCommentState(StateToPrint))) {
1079 beforePreProc = state;
1080 styler.ColourTo(i - 2, StateToPrint);
1081 if ((chNext == '-') && (chNext2 == '-')) {
1082 state = SCE_H_COMMENT; // wait for a pending command
1083 styler.ColourTo(i + 2, SCE_H_COMMENT);
1084 i += 2; // follow styling after the --
1085 } else if (isWordCdata(i + 1, i + 7, styler)) {
1086 state = SCE_H_CDATA;
1087 } else {
1088 styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
1089 scriptLanguage = eScriptSGML;
1090 state = SCE_H_SGML_COMMAND; // wait for a pending command
1092 // fold whole tag (-- when closing the tag)
1093 if (foldHTMLPreprocessor || state == SCE_H_COMMENT || state == SCE_H_CDATA)
1094 levelCurrent++;
1095 continue;
1098 // handle the end of Mako Python code
1099 else if (isMako &&
1100 ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1101 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1102 isMakoBlockEnd(ch, chNext, makoBlockType)) {
1103 if (state == SCE_H_ASPAT) {
1104 aspScript = segIsScriptingIndicator(styler,
1105 styler.GetStartSegment(), i - 1, aspScript);
1107 if (state == SCE_HP_WORD) {
1108 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1109 } else {
1110 styler.ColourTo(i - 1, StateToPrint);
1112 if (0 != strcmp(makoBlockType, "%") && (0 != strcmp(makoBlockType, "{")) && ch != '>') {
1113 i++;
1114 visibleChars++;
1116 else if (0 == strcmp(makoBlockType, "%") && ch == '/') {
1117 i++;
1118 visibleChars++;
1120 if (0 != strcmp(makoBlockType, "%") || ch == '/') {
1121 styler.ColourTo(i, SCE_H_ASP);
1123 state = beforePreProc;
1124 if (inScriptType == eNonHtmlScriptPreProc)
1125 inScriptType = eNonHtmlScript;
1126 else
1127 inScriptType = eHtml;
1128 scriptLanguage = eScriptNone;
1129 continue;
1132 // handle the end of Django template code
1133 else if (isDjango &&
1134 ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1135 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1136 isDjangoBlockEnd(ch, chNext, djangoBlockType)) {
1137 if (state == SCE_H_ASPAT) {
1138 aspScript = segIsScriptingIndicator(styler,
1139 styler.GetStartSegment(), i - 1, aspScript);
1141 if (state == SCE_HP_WORD) {
1142 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1143 } else {
1144 styler.ColourTo(i - 1, StateToPrint);
1146 i += 1;
1147 visibleChars += 1;
1148 styler.ColourTo(i, SCE_H_ASP);
1149 state = beforePreProc;
1150 if (inScriptType == eNonHtmlScriptPreProc)
1151 inScriptType = eNonHtmlScript;
1152 else
1153 inScriptType = eHtml;
1154 scriptLanguage = beforeLanguage;
1155 continue;
1158 // handle the end of a pre-processor = Non-HTML
1159 else if ((!isMako && !isDjango && ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1160 (((scriptLanguage != eScriptNone) && stateAllowsTermination(state))) &&
1161 (((ch == '%') || (ch == '?')) && (chNext == '>'))) ||
1162 ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
1163 if (state == SCE_H_ASPAT) {
1164 aspScript = segIsScriptingIndicator(styler,
1165 styler.GetStartSegment(), i - 1, aspScript);
1167 // Bounce out of any ASP mode
1168 switch (state) {
1169 case SCE_HJ_WORD:
1170 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1171 break;
1172 case SCE_HB_WORD:
1173 classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1174 break;
1175 case SCE_HP_WORD:
1176 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1177 break;
1178 case SCE_HPHP_WORD:
1179 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1180 break;
1181 case SCE_H_XCCOMMENT:
1182 styler.ColourTo(i - 1, state);
1183 break;
1184 default :
1185 styler.ColourTo(i - 1, StateToPrint);
1186 break;
1188 if (scriptLanguage != eScriptSGML) {
1189 i++;
1190 visibleChars++;
1192 if (ch == '%')
1193 styler.ColourTo(i, SCE_H_ASP);
1194 else if (scriptLanguage == eScriptXML)
1195 styler.ColourTo(i, SCE_H_XMLEND);
1196 else if (scriptLanguage == eScriptSGML)
1197 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1198 else
1199 styler.ColourTo(i, SCE_H_QUESTION);
1200 state = beforePreProc;
1201 if (inScriptType == eNonHtmlScriptPreProc)
1202 inScriptType = eNonHtmlScript;
1203 else
1204 inScriptType = eHtml;
1205 // Unfold all scripting languages, except for XML tag
1206 if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
1207 levelCurrent--;
1209 scriptLanguage = beforeLanguage;
1210 continue;
1212 /////////////////////////////////////
1214 switch (state) {
1215 case SCE_H_DEFAULT:
1216 if (ch == '<') {
1217 // in HTML, fold on tag open and unfold on tag close
1218 tagOpened = true;
1219 tagClosing = (chNext == '/');
1220 styler.ColourTo(i - 1, StateToPrint);
1221 if (chNext != '!')
1222 state = SCE_H_TAGUNKNOWN;
1223 } else if (ch == '&') {
1224 styler.ColourTo(i - 1, SCE_H_DEFAULT);
1225 state = SCE_H_ENTITY;
1227 break;
1228 case SCE_H_SGML_DEFAULT:
1229 case SCE_H_SGML_BLOCK_DEFAULT:
1230 // if (scriptLanguage == eScriptSGMLblock)
1231 // StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
1233 if (ch == '\"') {
1234 styler.ColourTo(i - 1, StateToPrint);
1235 state = SCE_H_SGML_DOUBLESTRING;
1236 } else if (ch == '\'') {
1237 styler.ColourTo(i - 1, StateToPrint);
1238 state = SCE_H_SGML_SIMPLESTRING;
1239 } else if ((ch == '-') && (chPrev == '-')) {
1240 if (static_cast<Sci_Position>(styler.GetStartSegment()) <= (i - 2)) {
1241 styler.ColourTo(i - 2, StateToPrint);
1243 state = SCE_H_SGML_COMMENT;
1244 } else if (IsASCII(ch) && isalpha(ch) && (chPrev == '%')) {
1245 styler.ColourTo(i - 2, StateToPrint);
1246 state = SCE_H_SGML_ENTITY;
1247 } else if (ch == '#') {
1248 styler.ColourTo(i - 1, StateToPrint);
1249 state = SCE_H_SGML_SPECIAL;
1250 } else if (ch == '[') {
1251 styler.ColourTo(i - 1, StateToPrint);
1252 scriptLanguage = eScriptSGMLblock;
1253 state = SCE_H_SGML_BLOCK_DEFAULT;
1254 } else if (ch == ']') {
1255 if (scriptLanguage == eScriptSGMLblock) {
1256 styler.ColourTo(i, StateToPrint);
1257 scriptLanguage = eScriptSGML;
1258 } else {
1259 styler.ColourTo(i - 1, StateToPrint);
1260 styler.ColourTo(i, SCE_H_SGML_ERROR);
1262 state = SCE_H_SGML_DEFAULT;
1263 } else if (scriptLanguage == eScriptSGMLblock) {
1264 if ((ch == '!') && (chPrev == '<')) {
1265 styler.ColourTo(i - 2, StateToPrint);
1266 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1267 state = SCE_H_SGML_COMMAND;
1268 } else if (ch == '>') {
1269 styler.ColourTo(i - 1, StateToPrint);
1270 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1273 break;
1274 case SCE_H_SGML_COMMAND:
1275 if ((ch == '-') && (chPrev == '-')) {
1276 styler.ColourTo(i - 2, StateToPrint);
1277 state = SCE_H_SGML_COMMENT;
1278 } else if (!issgmlwordchar(ch)) {
1279 if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
1280 styler.ColourTo(i - 1, StateToPrint);
1281 state = SCE_H_SGML_1ST_PARAM;
1282 } else {
1283 state = SCE_H_SGML_ERROR;
1286 break;
1287 case SCE_H_SGML_1ST_PARAM:
1288 // wait for the beginning of the word
1289 if ((ch == '-') && (chPrev == '-')) {
1290 if (scriptLanguage == eScriptSGMLblock) {
1291 styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
1292 } else {
1293 styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
1295 state = SCE_H_SGML_1ST_PARAM_COMMENT;
1296 } else if (issgmlwordchar(ch)) {
1297 if (scriptLanguage == eScriptSGMLblock) {
1298 styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
1299 } else {
1300 styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
1302 // find the length of the word
1303 int size = 1;
1304 while (setHTMLWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(i + size))))
1305 size++;
1306 styler.ColourTo(i + size - 1, StateToPrint);
1307 i += size - 1;
1308 visibleChars += size - 1;
1309 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1310 if (scriptLanguage == eScriptSGMLblock) {
1311 state = SCE_H_SGML_BLOCK_DEFAULT;
1312 } else {
1313 state = SCE_H_SGML_DEFAULT;
1315 continue;
1317 break;
1318 case SCE_H_SGML_ERROR:
1319 if ((ch == '-') && (chPrev == '-')) {
1320 styler.ColourTo(i - 2, StateToPrint);
1321 state = SCE_H_SGML_COMMENT;
1323 break;
1324 case SCE_H_SGML_DOUBLESTRING:
1325 if (ch == '\"') {
1326 styler.ColourTo(i, StateToPrint);
1327 state = SCE_H_SGML_DEFAULT;
1329 break;
1330 case SCE_H_SGML_SIMPLESTRING:
1331 if (ch == '\'') {
1332 styler.ColourTo(i, StateToPrint);
1333 state = SCE_H_SGML_DEFAULT;
1335 break;
1336 case SCE_H_SGML_COMMENT:
1337 if ((ch == '-') && (chPrev == '-')) {
1338 styler.ColourTo(i, StateToPrint);
1339 state = SCE_H_SGML_DEFAULT;
1341 break;
1342 case SCE_H_CDATA:
1343 if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
1344 styler.ColourTo(i, StateToPrint);
1345 state = SCE_H_DEFAULT;
1346 levelCurrent--;
1348 break;
1349 case SCE_H_COMMENT:
1350 if ((scriptLanguage != eScriptComment) && (chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
1351 styler.ColourTo(i, StateToPrint);
1352 state = SCE_H_DEFAULT;
1353 levelCurrent--;
1355 break;
1356 case SCE_H_SGML_1ST_PARAM_COMMENT:
1357 if ((ch == '-') && (chPrev == '-')) {
1358 styler.ColourTo(i, SCE_H_SGML_COMMENT);
1359 state = SCE_H_SGML_1ST_PARAM;
1361 break;
1362 case SCE_H_SGML_SPECIAL:
1363 if (!(IsASCII(ch) && isupper(ch))) {
1364 styler.ColourTo(i - 1, StateToPrint);
1365 if (isalnum(ch)) {
1366 state = SCE_H_SGML_ERROR;
1367 } else {
1368 state = SCE_H_SGML_DEFAULT;
1371 break;
1372 case SCE_H_SGML_ENTITY:
1373 if (ch == ';') {
1374 styler.ColourTo(i, StateToPrint);
1375 state = SCE_H_SGML_DEFAULT;
1376 } else if (!(IsASCII(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
1377 styler.ColourTo(i, SCE_H_SGML_ERROR);
1378 state = SCE_H_SGML_DEFAULT;
1380 break;
1381 case SCE_H_ENTITY:
1382 if (ch == ';') {
1383 styler.ColourTo(i, StateToPrint);
1384 state = SCE_H_DEFAULT;
1386 if (ch != '#' && !(IsASCII(ch) && isalnum(ch)) // Should check that '#' follows '&', but it is unlikely anyway...
1387 && ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
1388 if (!IsASCII(ch)) // Possibly start of a multibyte character so don't allow this byte to be in entity style
1389 styler.ColourTo(i-1, SCE_H_TAGUNKNOWN);
1390 else
1391 styler.ColourTo(i, SCE_H_TAGUNKNOWN);
1392 state = SCE_H_DEFAULT;
1394 break;
1395 case SCE_H_TAGUNKNOWN:
1396 if (!setTagContinue.Contains(ch) && !((ch == '/') && (chPrev == '<'))) {
1397 int eClass = classifyTagHTML(styler.GetStartSegment(),
1398 i - 1, keywords, styler, tagDontFold, caseSensitive, isXml, allowScripts);
1399 if (eClass == SCE_H_SCRIPT || eClass == SCE_H_COMMENT) {
1400 if (!tagClosing) {
1401 inScriptType = eNonHtmlScript;
1402 scriptLanguage = eClass == SCE_H_SCRIPT ? clientScript : eScriptComment;
1403 } else {
1404 scriptLanguage = eScriptNone;
1406 eClass = SCE_H_TAG;
1408 if (ch == '>') {
1409 styler.ColourTo(i, eClass);
1410 if (inScriptType == eNonHtmlScript) {
1411 state = StateForScript(scriptLanguage);
1412 } else {
1413 state = SCE_H_DEFAULT;
1415 tagOpened = false;
1416 if (!tagDontFold) {
1417 if (tagClosing) {
1418 levelCurrent--;
1419 } else {
1420 levelCurrent++;
1423 tagClosing = false;
1424 } else if (ch == '/' && chNext == '>') {
1425 if (eClass == SCE_H_TAGUNKNOWN) {
1426 styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
1427 } else {
1428 styler.ColourTo(i - 1, StateToPrint);
1429 styler.ColourTo(i + 1, SCE_H_TAGEND);
1431 i++;
1432 ch = chNext;
1433 state = SCE_H_DEFAULT;
1434 tagOpened = false;
1435 } else {
1436 if (eClass != SCE_H_TAGUNKNOWN) {
1437 if (eClass == SCE_H_SGML_DEFAULT) {
1438 state = SCE_H_SGML_DEFAULT;
1439 } else {
1440 state = SCE_H_OTHER;
1445 break;
1446 case SCE_H_ATTRIBUTE:
1447 if (!setAttributeContinue.Contains(ch)) {
1448 if (inScriptType == eNonHtmlScript) {
1449 int scriptLanguagePrev = scriptLanguage;
1450 clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
1451 scriptLanguage = clientScript;
1452 if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
1453 inScriptType = eHtml;
1455 classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
1456 if (ch == '>') {
1457 styler.ColourTo(i, SCE_H_TAG);
1458 if (inScriptType == eNonHtmlScript) {
1459 state = StateForScript(scriptLanguage);
1460 } else {
1461 state = SCE_H_DEFAULT;
1463 tagOpened = false;
1464 if (!tagDontFold) {
1465 if (tagClosing) {
1466 levelCurrent--;
1467 } else {
1468 levelCurrent++;
1471 tagClosing = false;
1472 } else if (ch == '=') {
1473 styler.ColourTo(i, SCE_H_OTHER);
1474 state = SCE_H_VALUE;
1475 } else {
1476 state = SCE_H_OTHER;
1479 break;
1480 case SCE_H_OTHER:
1481 if (ch == '>') {
1482 styler.ColourTo(i - 1, StateToPrint);
1483 styler.ColourTo(i, SCE_H_TAG);
1484 if (inScriptType == eNonHtmlScript) {
1485 state = StateForScript(scriptLanguage);
1486 } else {
1487 state = SCE_H_DEFAULT;
1489 tagOpened = false;
1490 if (!tagDontFold) {
1491 if (tagClosing) {
1492 levelCurrent--;
1493 } else {
1494 levelCurrent++;
1497 tagClosing = false;
1498 } else if (ch == '\"') {
1499 styler.ColourTo(i - 1, StateToPrint);
1500 state = SCE_H_DOUBLESTRING;
1501 } else if (ch == '\'') {
1502 styler.ColourTo(i - 1, StateToPrint);
1503 state = SCE_H_SINGLESTRING;
1504 } else if (ch == '=') {
1505 styler.ColourTo(i, StateToPrint);
1506 state = SCE_H_VALUE;
1507 } else if (ch == '/' && chNext == '>') {
1508 styler.ColourTo(i - 1, StateToPrint);
1509 styler.ColourTo(i + 1, SCE_H_TAGEND);
1510 i++;
1511 ch = chNext;
1512 state = SCE_H_DEFAULT;
1513 tagOpened = false;
1514 } else if (ch == '?' && chNext == '>') {
1515 styler.ColourTo(i - 1, StateToPrint);
1516 styler.ColourTo(i + 1, SCE_H_XMLEND);
1517 i++;
1518 ch = chNext;
1519 state = SCE_H_DEFAULT;
1520 } else if (setHTMLWord.Contains(ch)) {
1521 styler.ColourTo(i - 1, StateToPrint);
1522 state = SCE_H_ATTRIBUTE;
1524 break;
1525 case SCE_H_DOUBLESTRING:
1526 if (ch == '\"') {
1527 if (inScriptType == eNonHtmlScript) {
1528 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1530 styler.ColourTo(i, SCE_H_DOUBLESTRING);
1531 state = SCE_H_OTHER;
1533 break;
1534 case SCE_H_SINGLESTRING:
1535 if (ch == '\'') {
1536 if (inScriptType == eNonHtmlScript) {
1537 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1539 styler.ColourTo(i, SCE_H_SINGLESTRING);
1540 state = SCE_H_OTHER;
1542 break;
1543 case SCE_H_VALUE:
1544 if (!setHTMLWord.Contains(ch)) {
1545 if (ch == '\"' && chPrev == '=') {
1546 // Should really test for being first character
1547 state = SCE_H_DOUBLESTRING;
1548 } else if (ch == '\'' && chPrev == '=') {
1549 state = SCE_H_SINGLESTRING;
1550 } else {
1551 if (IsNumber(styler.GetStartSegment(), styler)) {
1552 styler.ColourTo(i - 1, SCE_H_NUMBER);
1553 } else {
1554 styler.ColourTo(i - 1, StateToPrint);
1556 if (ch == '>') {
1557 styler.ColourTo(i, SCE_H_TAG);
1558 if (inScriptType == eNonHtmlScript) {
1559 state = StateForScript(scriptLanguage);
1560 } else {
1561 state = SCE_H_DEFAULT;
1563 tagOpened = false;
1564 if (!tagDontFold) {
1565 if (tagClosing) {
1566 levelCurrent--;
1567 } else {
1568 levelCurrent++;
1571 tagClosing = false;
1572 } else {
1573 state = SCE_H_OTHER;
1577 break;
1578 case SCE_HJ_DEFAULT:
1579 case SCE_HJ_START:
1580 case SCE_HJ_SYMBOLS:
1581 if (IsAWordStart(ch)) {
1582 styler.ColourTo(i - 1, StateToPrint);
1583 state = SCE_HJ_WORD;
1584 } else if (ch == '/' && chNext == '*') {
1585 styler.ColourTo(i - 1, StateToPrint);
1586 if (chNext2 == '*')
1587 state = SCE_HJ_COMMENTDOC;
1588 else
1589 state = SCE_HJ_COMMENT;
1590 if (chNext2 == '/') {
1591 // Eat the * so it isn't used for the end of the comment
1592 i++;
1594 } else if (ch == '/' && chNext == '/') {
1595 styler.ColourTo(i - 1, StateToPrint);
1596 state = SCE_HJ_COMMENTLINE;
1597 } else if (ch == '/' && setOKBeforeJSRE.Contains(chPrevNonWhite)) {
1598 styler.ColourTo(i - 1, StateToPrint);
1599 state = SCE_HJ_REGEX;
1600 } else if (ch == '\"') {
1601 styler.ColourTo(i - 1, StateToPrint);
1602 state = SCE_HJ_DOUBLESTRING;
1603 } else if (ch == '\'') {
1604 styler.ColourTo(i - 1, StateToPrint);
1605 state = SCE_HJ_SINGLESTRING;
1606 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1607 styler.SafeGetCharAt(i + 3) == '-') {
1608 styler.ColourTo(i - 1, StateToPrint);
1609 state = SCE_HJ_COMMENTLINE;
1610 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1611 styler.ColourTo(i - 1, StateToPrint);
1612 state = SCE_HJ_COMMENTLINE;
1613 i += 2;
1614 } else if (IsOperator(ch)) {
1615 styler.ColourTo(i - 1, StateToPrint);
1616 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1617 state = SCE_HJ_DEFAULT;
1618 } else if ((ch == ' ') || (ch == '\t')) {
1619 if (state == SCE_HJ_START) {
1620 styler.ColourTo(i - 1, StateToPrint);
1621 state = SCE_HJ_DEFAULT;
1624 break;
1625 case SCE_HJ_WORD:
1626 if (!IsAWordChar(ch)) {
1627 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1628 //styler.ColourTo(i - 1, eHTJSKeyword);
1629 state = SCE_HJ_DEFAULT;
1630 if (ch == '/' && chNext == '*') {
1631 if (chNext2 == '*')
1632 state = SCE_HJ_COMMENTDOC;
1633 else
1634 state = SCE_HJ_COMMENT;
1635 } else if (ch == '/' && chNext == '/') {
1636 state = SCE_HJ_COMMENTLINE;
1637 } else if (ch == '\"') {
1638 state = SCE_HJ_DOUBLESTRING;
1639 } else if (ch == '\'') {
1640 state = SCE_HJ_SINGLESTRING;
1641 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1642 styler.ColourTo(i - 1, StateToPrint);
1643 state = SCE_HJ_COMMENTLINE;
1644 i += 2;
1645 } else if (IsOperator(ch)) {
1646 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1647 state = SCE_HJ_DEFAULT;
1650 break;
1651 case SCE_HJ_COMMENT:
1652 case SCE_HJ_COMMENTDOC:
1653 if (ch == '/' && chPrev == '*') {
1654 styler.ColourTo(i, StateToPrint);
1655 state = SCE_HJ_DEFAULT;
1656 ch = ' ';
1658 break;
1659 case SCE_HJ_COMMENTLINE:
1660 if (ch == '\r' || ch == '\n') {
1661 styler.ColourTo(i - 1, statePrintForState(SCE_HJ_COMMENTLINE, inScriptType));
1662 state = SCE_HJ_DEFAULT;
1663 ch = ' ';
1665 break;
1666 case SCE_HJ_DOUBLESTRING:
1667 if (ch == '\\') {
1668 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1669 i++;
1671 } else if (ch == '\"') {
1672 styler.ColourTo(i, statePrintForState(SCE_HJ_DOUBLESTRING, inScriptType));
1673 state = SCE_HJ_DEFAULT;
1674 } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1675 styler.ColourTo(i - 1, StateToPrint);
1676 state = SCE_HJ_COMMENTLINE;
1677 i += 2;
1678 } else if (isLineEnd(ch)) {
1679 styler.ColourTo(i - 1, StateToPrint);
1680 state = SCE_HJ_STRINGEOL;
1682 break;
1683 case SCE_HJ_SINGLESTRING:
1684 if (ch == '\\') {
1685 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1686 i++;
1688 } else if (ch == '\'') {
1689 styler.ColourTo(i, statePrintForState(SCE_HJ_SINGLESTRING, inScriptType));
1690 state = SCE_HJ_DEFAULT;
1691 } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1692 styler.ColourTo(i - 1, StateToPrint);
1693 state = SCE_HJ_COMMENTLINE;
1694 i += 2;
1695 } else if (isLineEnd(ch)) {
1696 styler.ColourTo(i - 1, StateToPrint);
1697 if (chPrev != '\\' && (chPrev2 != '\\' || chPrev != '\r' || ch != '\n')) {
1698 state = SCE_HJ_STRINGEOL;
1701 break;
1702 case SCE_HJ_STRINGEOL:
1703 if (!isLineEnd(ch)) {
1704 styler.ColourTo(i - 1, StateToPrint);
1705 state = SCE_HJ_DEFAULT;
1706 } else if (!isLineEnd(chNext)) {
1707 styler.ColourTo(i, StateToPrint);
1708 state = SCE_HJ_DEFAULT;
1710 break;
1711 case SCE_HJ_REGEX:
1712 if (ch == '\r' || ch == '\n' || ch == '/') {
1713 if (ch == '/') {
1714 while (IsASCII(chNext) && islower(chNext)) { // gobble regex flags
1715 i++;
1716 ch = chNext;
1717 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1720 styler.ColourTo(i, StateToPrint);
1721 state = SCE_HJ_DEFAULT;
1722 } else if (ch == '\\') {
1723 // Gobble up the quoted character
1724 if (chNext == '\\' || chNext == '/') {
1725 i++;
1726 ch = chNext;
1727 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1730 break;
1731 case SCE_HB_DEFAULT:
1732 case SCE_HB_START:
1733 if (IsAWordStart(ch)) {
1734 styler.ColourTo(i - 1, StateToPrint);
1735 state = SCE_HB_WORD;
1736 } else if (ch == '\'') {
1737 styler.ColourTo(i - 1, StateToPrint);
1738 state = SCE_HB_COMMENTLINE;
1739 } else if (ch == '\"') {
1740 styler.ColourTo(i - 1, StateToPrint);
1741 state = SCE_HB_STRING;
1742 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1743 styler.SafeGetCharAt(i + 3) == '-') {
1744 styler.ColourTo(i - 1, StateToPrint);
1745 state = SCE_HB_COMMENTLINE;
1746 } else if (IsOperator(ch)) {
1747 styler.ColourTo(i - 1, StateToPrint);
1748 styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1749 state = SCE_HB_DEFAULT;
1750 } else if ((ch == ' ') || (ch == '\t')) {
1751 if (state == SCE_HB_START) {
1752 styler.ColourTo(i - 1, StateToPrint);
1753 state = SCE_HB_DEFAULT;
1756 break;
1757 case SCE_HB_WORD:
1758 if (!IsAWordChar(ch)) {
1759 state = classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1760 if (state == SCE_HB_DEFAULT) {
1761 if (ch == '\"') {
1762 state = SCE_HB_STRING;
1763 } else if (ch == '\'') {
1764 state = SCE_HB_COMMENTLINE;
1765 } else if (IsOperator(ch)) {
1766 styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1767 state = SCE_HB_DEFAULT;
1771 break;
1772 case SCE_HB_STRING:
1773 if (ch == '\"') {
1774 styler.ColourTo(i, StateToPrint);
1775 state = SCE_HB_DEFAULT;
1776 } else if (ch == '\r' || ch == '\n') {
1777 styler.ColourTo(i - 1, StateToPrint);
1778 state = SCE_HB_STRINGEOL;
1780 break;
1781 case SCE_HB_COMMENTLINE:
1782 if (ch == '\r' || ch == '\n') {
1783 styler.ColourTo(i - 1, StateToPrint);
1784 state = SCE_HB_DEFAULT;
1786 break;
1787 case SCE_HB_STRINGEOL:
1788 if (!isLineEnd(ch)) {
1789 styler.ColourTo(i - 1, StateToPrint);
1790 state = SCE_HB_DEFAULT;
1791 } else if (!isLineEnd(chNext)) {
1792 styler.ColourTo(i, StateToPrint);
1793 state = SCE_HB_DEFAULT;
1795 break;
1796 case SCE_HP_DEFAULT:
1797 case SCE_HP_START:
1798 if (IsAWordStart(ch)) {
1799 styler.ColourTo(i - 1, StateToPrint);
1800 state = SCE_HP_WORD;
1801 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1802 styler.SafeGetCharAt(i + 3) == '-') {
1803 styler.ColourTo(i - 1, StateToPrint);
1804 state = SCE_HP_COMMENTLINE;
1805 } else if (ch == '#') {
1806 styler.ColourTo(i - 1, StateToPrint);
1807 state = SCE_HP_COMMENTLINE;
1808 } else if (ch == '\"') {
1809 styler.ColourTo(i - 1, StateToPrint);
1810 if (chNext == '\"' && chNext2 == '\"') {
1811 i += 2;
1812 state = SCE_HP_TRIPLEDOUBLE;
1813 ch = ' ';
1814 chPrev = ' ';
1815 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1816 } else {
1817 // state = statePrintForState(SCE_HP_STRING,inScriptType);
1818 state = SCE_HP_STRING;
1820 } else if (ch == '\'') {
1821 styler.ColourTo(i - 1, StateToPrint);
1822 if (chNext == '\'' && chNext2 == '\'') {
1823 i += 2;
1824 state = SCE_HP_TRIPLE;
1825 ch = ' ';
1826 chPrev = ' ';
1827 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1828 } else {
1829 state = SCE_HP_CHARACTER;
1831 } else if (IsOperator(ch)) {
1832 styler.ColourTo(i - 1, StateToPrint);
1833 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1834 } else if ((ch == ' ') || (ch == '\t')) {
1835 if (state == SCE_HP_START) {
1836 styler.ColourTo(i - 1, StateToPrint);
1837 state = SCE_HP_DEFAULT;
1840 break;
1841 case SCE_HP_WORD:
1842 if (!IsAWordChar(ch)) {
1843 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1844 state = SCE_HP_DEFAULT;
1845 if (ch == '#') {
1846 state = SCE_HP_COMMENTLINE;
1847 } else if (ch == '\"') {
1848 if (chNext == '\"' && chNext2 == '\"') {
1849 i += 2;
1850 state = SCE_HP_TRIPLEDOUBLE;
1851 ch = ' ';
1852 chPrev = ' ';
1853 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1854 } else {
1855 state = SCE_HP_STRING;
1857 } else if (ch == '\'') {
1858 if (chNext == '\'' && chNext2 == '\'') {
1859 i += 2;
1860 state = SCE_HP_TRIPLE;
1861 ch = ' ';
1862 chPrev = ' ';
1863 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1864 } else {
1865 state = SCE_HP_CHARACTER;
1867 } else if (IsOperator(ch)) {
1868 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1871 break;
1872 case SCE_HP_COMMENTLINE:
1873 if (ch == '\r' || ch == '\n') {
1874 styler.ColourTo(i - 1, StateToPrint);
1875 state = SCE_HP_DEFAULT;
1877 break;
1878 case SCE_HP_STRING:
1879 if (ch == '\\') {
1880 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1881 i++;
1882 ch = chNext;
1883 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1885 } else if (ch == '\"') {
1886 styler.ColourTo(i, StateToPrint);
1887 state = SCE_HP_DEFAULT;
1889 break;
1890 case SCE_HP_CHARACTER:
1891 if (ch == '\\') {
1892 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1893 i++;
1894 ch = chNext;
1895 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1897 } else if (ch == '\'') {
1898 styler.ColourTo(i, StateToPrint);
1899 state = SCE_HP_DEFAULT;
1901 break;
1902 case SCE_HP_TRIPLE:
1903 if (ch == '\'' && chPrev == '\'' && chPrev2 == '\'') {
1904 styler.ColourTo(i, StateToPrint);
1905 state = SCE_HP_DEFAULT;
1907 break;
1908 case SCE_HP_TRIPLEDOUBLE:
1909 if (ch == '\"' && chPrev == '\"' && chPrev2 == '\"') {
1910 styler.ColourTo(i, StateToPrint);
1911 state = SCE_HP_DEFAULT;
1913 break;
1914 ///////////// start - PHP state handling
1915 case SCE_HPHP_WORD:
1916 if (!IsAWordChar(ch)) {
1917 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1918 if (ch == '/' && chNext == '*') {
1919 i++;
1920 state = SCE_HPHP_COMMENT;
1921 } else if (ch == '/' && chNext == '/') {
1922 i++;
1923 state = SCE_HPHP_COMMENTLINE;
1924 } else if (ch == '#') {
1925 state = SCE_HPHP_COMMENTLINE;
1926 } else if (ch == '\"') {
1927 state = SCE_HPHP_HSTRING;
1928 StringCopy(phpStringDelimiter, "\"");
1929 } else if (styler.Match(i, "<<<")) {
1930 bool isSimpleString = false;
1931 i = FindPhpStringDelimiter(phpStringDelimiter, sizeof(phpStringDelimiter), i + 3, lengthDoc, styler, isSimpleString);
1932 if (strlen(phpStringDelimiter)) {
1933 state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING);
1934 if (foldHeredoc) levelCurrent++;
1936 } else if (ch == '\'') {
1937 state = SCE_HPHP_SIMPLESTRING;
1938 StringCopy(phpStringDelimiter, "\'");
1939 } else if (ch == '$' && IsPhpWordStart(chNext)) {
1940 state = SCE_HPHP_VARIABLE;
1941 } else if (IsOperator(ch)) {
1942 state = SCE_HPHP_OPERATOR;
1943 } else {
1944 state = SCE_HPHP_DEFAULT;
1947 break;
1948 case SCE_HPHP_NUMBER:
1949 // recognize bases 8,10 or 16 integers OR floating-point numbers
1950 if (!IsADigit(ch)
1951 && strchr(".xXabcdefABCDEF", ch) == NULL
1952 && ((ch != '-' && ch != '+') || (chPrev != 'e' && chPrev != 'E'))) {
1953 styler.ColourTo(i - 1, SCE_HPHP_NUMBER);
1954 if (IsOperator(ch))
1955 state = SCE_HPHP_OPERATOR;
1956 else
1957 state = SCE_HPHP_DEFAULT;
1959 break;
1960 case SCE_HPHP_VARIABLE:
1961 if (!IsPhpWordChar(chNext)) {
1962 styler.ColourTo(i, SCE_HPHP_VARIABLE);
1963 state = SCE_HPHP_DEFAULT;
1965 break;
1966 case SCE_HPHP_COMMENT:
1967 if (ch == '/' && chPrev == '*') {
1968 styler.ColourTo(i, StateToPrint);
1969 state = SCE_HPHP_DEFAULT;
1971 break;
1972 case SCE_HPHP_COMMENTLINE:
1973 if (ch == '\r' || ch == '\n') {
1974 styler.ColourTo(i - 1, StateToPrint);
1975 state = SCE_HPHP_DEFAULT;
1977 break;
1978 case SCE_HPHP_HSTRING:
1979 if (ch == '\\' && (phpStringDelimiter[0] == '\"' || chNext == '$' || chNext == '{')) {
1980 // skip the next char
1981 i++;
1982 } else if (((ch == '{' && chNext == '$') || (ch == '$' && chNext == '{'))
1983 && IsPhpWordStart(chNext2)) {
1984 styler.ColourTo(i - 1, StateToPrint);
1985 state = SCE_HPHP_COMPLEX_VARIABLE;
1986 } else if (ch == '$' && IsPhpWordStart(chNext)) {
1987 styler.ColourTo(i - 1, StateToPrint);
1988 state = SCE_HPHP_HSTRING_VARIABLE;
1989 } else if (styler.Match(i, phpStringDelimiter)) {
1990 if (phpStringDelimiter[0] == '\"') {
1991 styler.ColourTo(i, StateToPrint);
1992 state = SCE_HPHP_DEFAULT;
1993 } else if (isLineEnd(chPrev)) {
1994 const int psdLength = static_cast<int>(strlen(phpStringDelimiter));
1995 const char chAfterPsd = styler.SafeGetCharAt(i + psdLength);
1996 const char chAfterPsd2 = styler.SafeGetCharAt(i + psdLength + 1);
1997 if (isLineEnd(chAfterPsd) ||
1998 (chAfterPsd == ';' && isLineEnd(chAfterPsd2))) {
1999 i += (((i + psdLength) < lengthDoc) ? psdLength : lengthDoc) - 1;
2000 styler.ColourTo(i, StateToPrint);
2001 state = SCE_HPHP_DEFAULT;
2002 if (foldHeredoc) levelCurrent--;
2006 break;
2007 case SCE_HPHP_SIMPLESTRING:
2008 if (phpStringDelimiter[0] == '\'') {
2009 if (ch == '\\') {
2010 // skip the next char
2011 i++;
2012 } else if (ch == '\'') {
2013 styler.ColourTo(i, StateToPrint);
2014 state = SCE_HPHP_DEFAULT;
2016 } else if (isLineEnd(chPrev) && styler.Match(i, phpStringDelimiter)) {
2017 const int psdLength = static_cast<int>(strlen(phpStringDelimiter));
2018 const char chAfterPsd = styler.SafeGetCharAt(i + psdLength);
2019 const char chAfterPsd2 = styler.SafeGetCharAt(i + psdLength + 1);
2020 if (isLineEnd(chAfterPsd) ||
2021 (chAfterPsd == ';' && isLineEnd(chAfterPsd2))) {
2022 i += (((i + psdLength) < lengthDoc) ? psdLength : lengthDoc) - 1;
2023 styler.ColourTo(i, StateToPrint);
2024 state = SCE_HPHP_DEFAULT;
2025 if (foldHeredoc) levelCurrent--;
2028 break;
2029 case SCE_HPHP_HSTRING_VARIABLE:
2030 if (!IsPhpWordChar(chNext)) {
2031 styler.ColourTo(i, StateToPrint);
2032 state = SCE_HPHP_HSTRING;
2034 break;
2035 case SCE_HPHP_COMPLEX_VARIABLE:
2036 if (ch == '}') {
2037 styler.ColourTo(i, StateToPrint);
2038 state = SCE_HPHP_HSTRING;
2040 break;
2041 case SCE_HPHP_OPERATOR:
2042 case SCE_HPHP_DEFAULT:
2043 styler.ColourTo(i - 1, StateToPrint);
2044 if (IsADigit(ch) || (ch == '.' && IsADigit(chNext))) {
2045 state = SCE_HPHP_NUMBER;
2046 } else if (IsAWordStart(ch)) {
2047 state = SCE_HPHP_WORD;
2048 } else if (ch == '/' && chNext == '*') {
2049 i++;
2050 state = SCE_HPHP_COMMENT;
2051 } else if (ch == '/' && chNext == '/') {
2052 i++;
2053 state = SCE_HPHP_COMMENTLINE;
2054 } else if (ch == '#') {
2055 state = SCE_HPHP_COMMENTLINE;
2056 } else if (ch == '\"') {
2057 state = SCE_HPHP_HSTRING;
2058 StringCopy(phpStringDelimiter, "\"");
2059 } else if (styler.Match(i, "<<<")) {
2060 bool isSimpleString = false;
2061 i = FindPhpStringDelimiter(phpStringDelimiter, sizeof(phpStringDelimiter), i + 3, lengthDoc, styler, isSimpleString);
2062 if (strlen(phpStringDelimiter)) {
2063 state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING);
2064 if (foldHeredoc) levelCurrent++;
2066 } else if (ch == '\'') {
2067 state = SCE_HPHP_SIMPLESTRING;
2068 StringCopy(phpStringDelimiter, "\'");
2069 } else if (ch == '$' && IsPhpWordStart(chNext)) {
2070 state = SCE_HPHP_VARIABLE;
2071 } else if (IsOperator(ch)) {
2072 state = SCE_HPHP_OPERATOR;
2073 } else if ((state == SCE_HPHP_OPERATOR) && (IsASpace(ch))) {
2074 state = SCE_HPHP_DEFAULT;
2076 break;
2077 ///////////// end - PHP state handling
2080 // Some of the above terminated their lexeme but since the same character starts
2081 // the same class again, only reenter if non empty segment.
2083 bool nonEmptySegment = i >= static_cast<Sci_Position>(styler.GetStartSegment());
2084 if (state == SCE_HB_DEFAULT) { // One of the above succeeded
2085 if ((ch == '\"') && (nonEmptySegment)) {
2086 state = SCE_HB_STRING;
2087 } else if (ch == '\'') {
2088 state = SCE_HB_COMMENTLINE;
2089 } else if (IsAWordStart(ch)) {
2090 state = SCE_HB_WORD;
2091 } else if (IsOperator(ch)) {
2092 styler.ColourTo(i, SCE_HB_DEFAULT);
2094 } else if (state == SCE_HBA_DEFAULT) { // One of the above succeeded
2095 if ((ch == '\"') && (nonEmptySegment)) {
2096 state = SCE_HBA_STRING;
2097 } else if (ch == '\'') {
2098 state = SCE_HBA_COMMENTLINE;
2099 } else if (IsAWordStart(ch)) {
2100 state = SCE_HBA_WORD;
2101 } else if (IsOperator(ch)) {
2102 styler.ColourTo(i, SCE_HBA_DEFAULT);
2104 } else if (state == SCE_HJ_DEFAULT) { // One of the above succeeded
2105 if (ch == '/' && chNext == '*') {
2106 if (styler.SafeGetCharAt(i + 2) == '*')
2107 state = SCE_HJ_COMMENTDOC;
2108 else
2109 state = SCE_HJ_COMMENT;
2110 } else if (ch == '/' && chNext == '/') {
2111 state = SCE_HJ_COMMENTLINE;
2112 } else if ((ch == '\"') && (nonEmptySegment)) {
2113 state = SCE_HJ_DOUBLESTRING;
2114 } else if ((ch == '\'') && (nonEmptySegment)) {
2115 state = SCE_HJ_SINGLESTRING;
2116 } else if (IsAWordStart(ch)) {
2117 state = SCE_HJ_WORD;
2118 } else if (IsOperator(ch)) {
2119 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
2124 switch (state) {
2125 case SCE_HJ_WORD:
2126 classifyWordHTJS(styler.GetStartSegment(), lengthDoc - 1, keywords2, styler, inScriptType);
2127 break;
2128 case SCE_HB_WORD:
2129 classifyWordHTVB(styler.GetStartSegment(), lengthDoc - 1, keywords3, styler, inScriptType);
2130 break;
2131 case SCE_HP_WORD:
2132 classifyWordHTPy(styler.GetStartSegment(), lengthDoc - 1, keywords4, styler, prevWord, inScriptType, isMako);
2133 break;
2134 case SCE_HPHP_WORD:
2135 classifyWordHTPHP(styler.GetStartSegment(), lengthDoc - 1, keywords5, styler);
2136 break;
2137 default:
2138 StateToPrint = statePrintForState(state, inScriptType);
2139 if (static_cast<Sci_Position>(styler.GetStartSegment()) < lengthDoc)
2140 styler.ColourTo(lengthDoc - 1, StateToPrint);
2141 break;
2144 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
2145 if (fold) {
2146 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
2147 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
2151 static void ColouriseXMLDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[],
2152 Accessor &styler) {
2153 // Passing in true because we're lexing XML
2154 ColouriseHyperTextDoc(startPos, length, initStyle, keywordlists, styler, true);
2157 static void ColouriseHTMLDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[],
2158 Accessor &styler) {
2159 // Passing in false because we're notlexing XML
2160 ColouriseHyperTextDoc(startPos, length, initStyle, keywordlists, styler, false);
2163 static void ColourisePHPScriptDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[],
2164 Accessor &styler) {
2165 if (startPos == 0)
2166 initStyle = SCE_HPHP_DEFAULT;
2167 ColouriseHTMLDoc(startPos, length, initStyle, keywordlists, styler);
2170 static const char * const htmlWordListDesc[] = {
2171 "HTML elements and attributes",
2172 "JavaScript keywords",
2173 "VBScript keywords",
2174 "Python keywords",
2175 "PHP keywords",
2176 "SGML and DTD keywords",
2180 static const char * const phpscriptWordListDesc[] = {
2181 "", //Unused
2182 "", //Unused
2183 "", //Unused
2184 "", //Unused
2185 "PHP keywords",
2186 "", //Unused
2190 LexerModule lmHTML(SCLEX_HTML, ColouriseHTMLDoc, "hypertext", 0, htmlWordListDesc);
2191 LexerModule lmXML(SCLEX_XML, ColouriseXMLDoc, "xml", 0, htmlWordListDesc);
2192 LexerModule lmPHPSCRIPT(SCLEX_PHPSCRIPT, ColourisePHPScriptDoc, "phpscript", 0, phpscriptWordListDesc);