Set release date.
[geany-mirror.git] / scintilla / LexHTML.cxx
blob9bec3a2141a56f9e9edafb55eb1ad9be4191ae99
1 // Scintilla source code edit control
2 /** @file LexHTML.cxx
3 ** Lexer for HTML.
4 **/
5 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <stdlib.h>
9 #include <string.h>
10 #include <ctype.h>
11 #include <stdio.h>
12 #include <stdarg.h>
14 #include "Platform.h"
16 #include "PropSet.h"
17 #include "Accessor.h"
18 #include "StyleContext.h"
19 #include "KeyWords.h"
20 #include "Scintilla.h"
21 #include "SciLexer.h"
22 #include "CharacterSet.h"
24 #ifdef SCI_NAMESPACE
25 using namespace Scintilla;
26 #endif
28 #define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
29 #define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
30 #define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)
32 enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock, eScriptComment };
33 enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
35 static inline bool IsAWordChar(const int ch) {
36 return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
39 static inline bool IsAWordStart(const int ch) {
40 return (ch < 0x80) && (isalnum(ch) || ch == '_');
43 inline bool IsOperator(int ch) {
44 if (isascii(ch) && isalnum(ch))
45 return false;
46 // '.' left out as it is used to make up numbers
47 if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
48 ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
49 ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
50 ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
51 ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
52 ch == '?' || ch == '!' || ch == '.' || ch == '~')
53 return true;
54 return false;
57 static inline int MakeLowerCase(int ch) {
58 if (ch < 'A' || ch > 'Z')
59 return ch;
60 else
61 return ch - 'A' + 'a';
64 static void GetTextSegment(Accessor &styler, unsigned int start, unsigned int end, char *s, size_t len) {
65 size_t i = 0;
66 for (; (i < end - start + 1) && (i < len-1); i++) {
67 s[i] = static_cast<char>(MakeLowerCase(styler[start + i]));
69 s[i] = '\0';
72 static const char *GetNextWord(Accessor &styler, unsigned int start, char *s, size_t sLen) {
74 size_t i = 0;
75 for (; i < sLen-1; i++) {
76 char ch = static_cast<char>(styler.SafeGetCharAt(start + i));
77 if ((i == 0) && !IsAWordStart(ch))
78 break;
79 if ((i > 0) && !IsAWordChar(ch))
80 break;
81 s[i] = ch;
83 s[i] = '\0';
85 return s;
88 static script_type segIsScriptingIndicator(Accessor &styler, unsigned int start, unsigned int end, script_type prevValue) {
89 char s[100];
90 GetTextSegment(styler, start, end, s, sizeof(s));
91 //Platform::DebugPrintf("Scripting indicator [%s]\n", s);
92 if (strstr(s, "src")) // External script
93 return eScriptNone;
94 if (strstr(s, "vbs"))
95 return eScriptVBS;
96 if (strstr(s, "pyth"))
97 return eScriptPython;
98 if (strstr(s, "javas"))
99 return eScriptJS;
100 if (strstr(s, "jscr"))
101 return eScriptJS;
102 if (strstr(s, "php"))
103 return eScriptPHP;
104 if (strstr(s, "xml")) {
105 const char *xml = strstr(s, "xml");
106 for (const char *t=s; t<xml; t++) {
107 if (!IsASpace(*t)) {
108 return prevValue;
111 return eScriptXML;
114 return prevValue;
117 static int PrintScriptingIndicatorOffset(Accessor &styler, unsigned int start, unsigned int end) {
118 int iResult = 0;
119 char s[100];
120 GetTextSegment(styler, start, end, s, sizeof(s));
121 if (0 == strncmp(s, "php", 3)) {
122 iResult = 3;
125 return iResult;
128 static script_type ScriptOfState(int state) {
129 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
130 return eScriptPython;
131 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
132 return eScriptVBS;
133 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
134 return eScriptJS;
135 } else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
136 return eScriptPHP;
137 } else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
138 return eScriptSGML;
139 } else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
140 return eScriptSGMLblock;
141 } else {
142 return eScriptNone;
146 static int statePrintForState(int state, script_mode inScriptType) {
147 int StateToPrint = state;
149 if (state >= SCE_HJ_START) {
150 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
151 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
152 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
153 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
154 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
155 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
159 return StateToPrint;
162 static int stateForPrintState(int StateToPrint) {
163 int state;
165 if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
166 state = StateToPrint - SCE_HA_PYTHON;
167 } else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
168 state = StateToPrint - SCE_HA_VBS;
169 } else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
170 state = StateToPrint - SCE_HA_JS;
171 } else {
172 state = StateToPrint;
175 return state;
178 static inline bool IsNumber(unsigned int start, Accessor &styler) {
179 return IsADigit(styler[start]) || (styler[start] == '.') ||
180 (styler[start] == '-') || (styler[start] == '#');
183 static inline bool isStringState(int state) {
184 bool bResult;
186 switch (state) {
187 case SCE_HJ_DOUBLESTRING:
188 case SCE_HJ_SINGLESTRING:
189 case SCE_HJA_DOUBLESTRING:
190 case SCE_HJA_SINGLESTRING:
191 case SCE_HB_STRING:
192 case SCE_HBA_STRING:
193 case SCE_HP_STRING:
194 case SCE_HP_CHARACTER:
195 case SCE_HP_TRIPLE:
196 case SCE_HP_TRIPLEDOUBLE:
197 case SCE_HPA_STRING:
198 case SCE_HPA_CHARACTER:
199 case SCE_HPA_TRIPLE:
200 case SCE_HPA_TRIPLEDOUBLE:
201 case SCE_HPHP_HSTRING:
202 case SCE_HPHP_SIMPLESTRING:
203 case SCE_HPHP_HSTRING_VARIABLE:
204 case SCE_HPHP_COMPLEX_VARIABLE:
205 bResult = true;
206 break;
207 default :
208 bResult = false;
209 break;
211 return bResult;
214 static inline bool stateAllowsTermination(int state) {
215 bool allowTermination = !isStringState(state);
216 if (allowTermination) {
217 switch (state) {
218 case SCE_HB_COMMENTLINE:
219 case SCE_HPHP_COMMENT:
220 case SCE_HP_COMMENTLINE:
221 case SCE_HPA_COMMENTLINE:
222 allowTermination = false;
225 return allowTermination;
228 // not really well done, since it's only comments that should lex the %> and <%
229 static inline bool isCommentASPState(int state) {
230 bool bResult;
232 switch (state) {
233 case SCE_HJ_COMMENT:
234 case SCE_HJ_COMMENTLINE:
235 case SCE_HJ_COMMENTDOC:
236 case SCE_HB_COMMENTLINE:
237 case SCE_HP_COMMENTLINE:
238 case SCE_HPHP_COMMENT:
239 case SCE_HPHP_COMMENTLINE:
240 bResult = true;
241 break;
242 default :
243 bResult = false;
244 break;
246 return bResult;
249 static void classifyAttribHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
250 bool wordIsNumber = IsNumber(start, styler);
251 char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
252 if (wordIsNumber) {
253 chAttr = SCE_H_NUMBER;
254 } else {
255 char s[100];
256 GetTextSegment(styler, start, end, s, sizeof(s));
257 if (keywords.InList(s))
258 chAttr = SCE_H_ATTRIBUTE;
260 if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
261 // No keywords -> all are known
262 chAttr = SCE_H_ATTRIBUTE;
263 styler.ColourTo(end, chAttr);
266 static int classifyTagHTML(unsigned int start, unsigned int end,
267 WordList &keywords, Accessor &styler, bool &tagDontFold,
268 bool caseSensitive, bool isXml, bool allowScripts) {
269 char s[30 + 2];
270 // Copy after the '<'
271 unsigned int i = 0;
272 for (unsigned int cPos = start; cPos <= end && i < 30; cPos++) {
273 char ch = styler[cPos];
274 if ((ch != '<') && (ch != '/')) {
275 s[i++] = caseSensitive ? ch : static_cast<char>(MakeLowerCase(ch));
279 //The following is only a quick hack, to see if this whole thing would work
280 //we first need the tagname with a trailing space...
281 s[i] = ' ';
282 s[i+1] = '\0';
284 // if the current language is XML, I can fold any tag
285 // if the current language is HTML, I don't want to fold certain tags (input, meta, etc.)
286 //...to find it in the list of no-container-tags
287 tagDontFold = (!isXml) && (NULL != strstr("meta link img area br hr input ", s));
289 //now we can remove the trailing space
290 s[i] = '\0';
292 // No keywords -> all are known
293 char chAttr = SCE_H_TAGUNKNOWN;
294 if (s[0] == '!') {
295 chAttr = SCE_H_SGML_DEFAULT;
296 } else if (!keywords || keywords.InList(s)) {
297 chAttr = SCE_H_TAG;
299 styler.ColourTo(end, chAttr);
300 if (chAttr == SCE_H_TAG) {
301 if (allowScripts && 0 == strcmp(s, "script")) {
302 // check to see if this is a self-closing tag by sniffing ahead
303 bool isSelfClose = false;
304 for (unsigned int cPos = end; cPos <= end + 100; cPos++) {
305 char ch = styler.SafeGetCharAt(cPos, '\0');
306 if (ch == '\0' || ch == '>')
307 break;
308 else if (ch == '/' && styler.SafeGetCharAt(cPos + 1, '\0') == '>') {
309 isSelfClose = true;
310 break;
314 // do not enter a script state if the tag self-closed
315 if (!isSelfClose)
316 chAttr = SCE_H_SCRIPT;
317 } else if (!isXml && 0 == strcmp(s, "comment")) {
318 chAttr = SCE_H_COMMENT;
321 return chAttr;
324 static void classifyWordHTJS(unsigned int start, unsigned int end,
325 WordList &keywords, Accessor &styler, script_mode inScriptType) {
326 char chAttr = SCE_HJ_WORD;
327 bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
328 if (wordIsNumber)
329 chAttr = SCE_HJ_NUMBER;
330 else {
331 char s[30 + 1];
332 unsigned int i = 0;
333 for (; i < end - start + 1 && i < 30; i++) {
334 s[i] = styler[start + i];
336 s[i] = '\0';
337 if (keywords.InList(s))
338 chAttr = SCE_HJ_KEYWORD;
340 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
343 static int classifyWordHTVB(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, script_mode inScriptType) {
344 char chAttr = SCE_HB_IDENTIFIER;
345 bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
346 if (wordIsNumber)
347 chAttr = SCE_HB_NUMBER;
348 else {
349 char s[100];
350 GetTextSegment(styler, start, end, s, sizeof(s));
351 if (keywords.InList(s)) {
352 chAttr = SCE_HB_WORD;
353 if (strcmp(s, "rem") == 0)
354 chAttr = SCE_HB_COMMENTLINE;
357 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
358 if (chAttr == SCE_HB_COMMENTLINE)
359 return SCE_HB_COMMENTLINE;
360 else
361 return SCE_HB_DEFAULT;
364 static void classifyWordHTPy(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord, script_mode inScriptType) {
365 bool wordIsNumber = IsADigit(styler[start]);
366 char s[30 + 1];
367 unsigned int i = 0;
368 for (; i < end - start + 1 && i < 30; i++) {
369 s[i] = styler[start + i];
371 s[i] = '\0';
372 char chAttr = SCE_HP_IDENTIFIER;
373 if (0 == strcmp(prevWord, "class"))
374 chAttr = SCE_HP_CLASSNAME;
375 else if (0 == strcmp(prevWord, "def"))
376 chAttr = SCE_HP_DEFNAME;
377 else if (wordIsNumber)
378 chAttr = SCE_HP_NUMBER;
379 else if (keywords.InList(s))
380 chAttr = SCE_HP_WORD;
381 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
382 strcpy(prevWord, s);
385 // Update the word colour to default or keyword
386 // Called when in a PHP word
387 static void classifyWordHTPHP(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
388 char chAttr = SCE_HPHP_DEFAULT;
389 bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
390 if (wordIsNumber)
391 chAttr = SCE_HPHP_NUMBER;
392 else {
393 char s[100];
394 GetTextSegment(styler, start, end, s, sizeof(s));
395 if (keywords.InList(s))
396 chAttr = SCE_HPHP_WORD;
398 styler.ColourTo(end, chAttr);
401 static bool isWordHSGML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
402 char s[30 + 1];
403 unsigned int i = 0;
404 for (; i < end - start + 1 && i < 30; i++) {
405 s[i] = styler[start + i];
407 s[i] = '\0';
408 return keywords.InList(s);
411 static bool isWordCdata(unsigned int start, unsigned int end, Accessor &styler) {
412 char s[30 + 1];
413 unsigned int i = 0;
414 for (; i < end - start + 1 && i < 30; i++) {
415 s[i] = styler[start + i];
417 s[i] = '\0';
418 return (0 == strcmp(s, "[CDATA["));
421 // Return the first state to reach when entering a scripting language
422 static int StateForScript(script_type scriptLanguage) {
423 int Result;
424 switch (scriptLanguage) {
425 case eScriptVBS:
426 Result = SCE_HB_START;
427 break;
428 case eScriptPython:
429 Result = SCE_HP_START;
430 break;
431 case eScriptPHP:
432 Result = SCE_HPHP_DEFAULT;
433 break;
434 case eScriptXML:
435 Result = SCE_H_TAGUNKNOWN;
436 break;
437 case eScriptSGML:
438 Result = SCE_H_SGML_DEFAULT;
439 break;
440 case eScriptComment:
441 Result = SCE_H_COMMENT;
442 break;
443 default :
444 Result = SCE_HJ_START;
445 break;
447 return Result;
450 static inline bool ishtmlwordchar(int ch) {
451 return !isascii(ch) ||
452 (isalnum(ch) || ch == '.' || ch == '-' || ch == '_' || ch == ':' || ch == '!' || ch == '#');
455 static inline bool issgmlwordchar(int ch) {
456 return !isascii(ch) ||
457 (isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
460 static inline bool IsPhpWordStart(int ch) {
461 return (isascii(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
464 static inline bool IsPhpWordChar(int ch) {
465 return IsADigit(ch) || IsPhpWordStart(ch);
468 static bool InTagState(int state) {
469 return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
470 state == SCE_H_SCRIPT ||
471 state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
472 state == SCE_H_NUMBER || state == SCE_H_OTHER ||
473 state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
476 static bool IsCommentState(const int state) {
477 return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
480 static bool IsScriptCommentState(const int state) {
481 return state == SCE_HJ_COMMENT || state == SCE_HJ_COMMENTLINE || state == SCE_HJA_COMMENT ||
482 state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
485 static bool isLineEnd(int ch) {
486 return ch == '\r' || ch == '\n';
489 static bool isOKBeforeRE(int ch) {
490 return (ch == '(') || (ch == '=') || (ch == ',');
493 static bool isMakoBlockEnd(const int ch, const int chNext, const char *blockType) {
494 if (strlen(blockType) == 0) {
495 return ((ch == '%') && (chNext == '>'));
496 } else if ((0 == strcmp(blockType, "inherit")) ||
497 (0 == strcmp(blockType, "namespace")) ||
498 (0 == strcmp(blockType, "include")) ||
499 (0 == strcmp(blockType, "page"))) {
500 return ((ch == '/') && (chNext == '>'));
501 } else if (0 == strcmp(blockType, "%")) {
502 return isLineEnd(ch);
503 } else if (0 == strcmp(blockType, "{")) {
504 return ch == '}';
505 } else {
506 return (ch == '>');
510 static bool isDjangoBlockEnd(const int ch, const int chNext, const char *blockType) {
511 if (strlen(blockType) == 0) {
512 return 0;
513 } else if (0 == strcmp(blockType, "%")) {
514 return ((ch == '%') && (chNext == '}'));
515 } else if (0 == strcmp(blockType, "{")) {
516 return ((ch == '}') && (chNext == '}'));
517 } else {
518 return 0;
522 static bool isPHPStringState(int state) {
523 return
524 (state == SCE_HPHP_HSTRING) ||
525 (state == SCE_HPHP_SIMPLESTRING) ||
526 (state == SCE_HPHP_HSTRING_VARIABLE) ||
527 (state == SCE_HPHP_COMPLEX_VARIABLE);
530 static int FindPhpStringDelimiter(char *phpStringDelimiter, const int phpStringDelimiterSize, int i, const int lengthDoc, Accessor &styler, bool &isSimpleString) {
531 int j;
532 const int beginning = i - 1;
533 bool isValidSimpleString = false;
535 while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t'))
536 i++;
538 char ch = styler.SafeGetCharAt(i);
539 const char chNext = styler.SafeGetCharAt(i + 1);
540 if (!IsPhpWordStart(ch)) {
541 if (ch == '\'' && IsPhpWordStart(chNext)) {
542 i++;
543 ch = chNext;
544 isSimpleString = true;
545 } else {
546 phpStringDelimiter[0] = '\0';
547 return beginning;
550 phpStringDelimiter[0] = ch;
551 i++;
553 for (j = i; j < lengthDoc && !isLineEnd(styler[j]); j++) {
554 if (!IsPhpWordChar(styler[j])) {
555 if (isSimpleString && (styler[j] == '\'') && isLineEnd(styler.SafeGetCharAt(j + 1))) {
556 isValidSimpleString = true;
557 j++;
558 break;
559 } else {
560 phpStringDelimiter[0] = '\0';
561 return beginning;
564 if (j - i < phpStringDelimiterSize - 2)
565 phpStringDelimiter[j-i+1] = styler[j];
566 else
567 i++;
569 if (isSimpleString && !isValidSimpleString) {
570 phpStringDelimiter[0] = '\0';
571 return beginning;
573 phpStringDelimiter[j-i+1 - (isSimpleString ? 1 : 0)] = '\0';
574 return j - 1;
577 static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
578 Accessor &styler, bool isXml) {
579 WordList &keywords = *keywordlists[0];
580 WordList &keywords2 = *keywordlists[1];
581 WordList &keywords3 = *keywordlists[2];
582 WordList &keywords4 = *keywordlists[3];
583 WordList &keywords5 = *keywordlists[4];
584 WordList &keywords6 = *keywordlists[5]; // SGML (DTD) keywords
586 // Lexer for HTML requires more lexical states (8 bits worth) than most lexers
587 styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
588 char prevWord[200];
589 prevWord[0] = '\0';
590 char phpStringDelimiter[200]; // PHP is not limited in length, we are
591 phpStringDelimiter[0] = '\0';
592 int StateToPrint = initStyle;
593 int state = stateForPrintState(StateToPrint);
594 char makoBlockType[200];
595 makoBlockType[0] = '\0';
596 char djangoBlockType[2];
597 djangoBlockType[0] = '\0';
599 // If inside a tag, it may be a script tag, so reread from the start to ensure any language tags are seen
600 if (InTagState(state)) {
601 while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
602 startPos--;
603 length++;
605 state = SCE_H_DEFAULT;
607 // String can be heredoc, must find a delimiter first. Reread from beginning of line containing the string, to get the correct lineState
608 if (isPHPStringState(state)) {
609 while (startPos > 0 && (isPHPStringState(state) || !isLineEnd(styler[startPos - 1]))) {
610 startPos--;
611 length++;
612 state = styler.StyleAt(startPos);
614 if (startPos == 0)
615 state = SCE_H_DEFAULT;
617 styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
619 int lineCurrent = styler.GetLine(startPos);
620 int lineState;
621 if (lineCurrent > 0) {
622 lineState = styler.GetLineState(lineCurrent);
623 } else {
624 // Default client and ASP scripting language is JavaScript
625 lineState = eScriptJS << 8;
627 // property asp.default.language
628 // Script in ASP code is initially assumed to be in JavaScript.
629 // To change this to VBScript set asp.default.language to 2. Python is 3.
630 lineState |= styler.GetPropertyInt("asp.default.language", eScriptJS) << 4;
632 script_mode inScriptType = script_mode((lineState >> 0) & 0x03); // 2 bits of scripting mode
633 bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
634 bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
635 bool tagDontFold = false; //some HTML tags should not be folded
636 script_type aspScript = script_type((lineState >> 4) & 0x0F); // 4 bits of script name
637 script_type clientScript = script_type((lineState >> 8) & 0x0F); // 4 bits of script name
638 int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
640 script_type scriptLanguage = ScriptOfState(state);
641 // If eNonHtmlScript coincides with SCE_H_COMMENT, assume eScriptComment
642 if (inScriptType == eNonHtmlScript && state == SCE_H_COMMENT) {
643 scriptLanguage = eScriptComment;
645 script_type beforeLanguage = ScriptOfState(beforePreProc);
647 // property fold.html
648 // Folding is turned on or off for HTML and XML files with this option.
649 // The fold option must also be on for folding to occur.
650 const bool foldHTML = styler.GetPropertyInt("fold.html", 0) != 0;
652 const bool fold = foldHTML && styler.GetPropertyInt("fold", 0);
654 // property fold.html.preprocessor
655 // Folding is turned on or off for scripts embedded in HTML files with this option.
656 // The default is on.
657 const bool foldHTMLPreprocessor = foldHTML && styler.GetPropertyInt("fold.html.preprocessor", 1);
659 const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
661 // property fold.hypertext.comment
662 // Allow folding for comments in scripts embedded in HTML.
663 // The default is off.
664 const bool foldComment = fold && styler.GetPropertyInt("fold.hypertext.comment", 0) != 0;
666 // property fold.hypertext.heredoc
667 // Allow folding for heredocs in scripts embedded in HTML.
668 // The default is off.
669 const bool foldHeredoc = fold && styler.GetPropertyInt("fold.hypertext.heredoc", 0) != 0;
671 // property html.tags.case.sensitive
672 // For XML and HTML, setting this property to 1 will make tags match in a case
673 // sensitive way which is the expected behaviour for XML and XHTML.
674 const bool caseSensitive = styler.GetPropertyInt("html.tags.case.sensitive", 0) != 0;
676 // property lexer.xml.allow.scripts
677 // Set to 0 to disable scripts in XML.
678 const bool allowScripts = styler.GetPropertyInt("lexer.xml.allow.scripts", 1) != 0;
680 // property lexer.html.mako
681 // Set to 1 to enable the mako template language.
682 const bool isMako = styler.GetPropertyInt("lexer.html.mako", 0) != 0;
684 // property lexer.html.django
685 // Set to 1 to enable the django template language.
686 const bool isDjango = styler.GetPropertyInt("lexer.html.django", 0) != 0;
688 const CharacterSet setHTMLWord(CharacterSet::setAlphaNum, ".-_:!#", 0x80, true);
689 const CharacterSet setTagContinue(CharacterSet::setAlphaNum, ".-_:!#[", 0x80, true);
690 const CharacterSet setAttributeContinue(CharacterSet::setAlphaNum, ".-_:!#/", 0x80, true);
692 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
693 int levelCurrent = levelPrev;
694 int visibleChars = 0;
695 int lineStartVisibleChars = 0;
697 int chPrev = ' ';
698 int ch = ' ';
699 int chPrevNonWhite = ' ';
700 // look back to set chPrevNonWhite properly for better regex colouring
701 if (scriptLanguage == eScriptJS && startPos > 0) {
702 int back = startPos;
703 int style = 0;
704 while (--back) {
705 style = styler.StyleAt(back);
706 if (style < SCE_HJ_DEFAULT || style > SCE_HJ_COMMENTDOC)
707 // includes SCE_HJ_COMMENT & SCE_HJ_COMMENTLINE
708 break;
710 if (style == SCE_HJ_SYMBOLS) {
711 chPrevNonWhite = static_cast<unsigned char>(styler.SafeGetCharAt(back));
715 styler.StartSegment(startPos);
716 const int lengthDoc = startPos + length;
717 for (int i = startPos; i < lengthDoc; i++) {
718 const int chPrev2 = chPrev;
719 chPrev = ch;
720 if (!IsASpace(ch) && state != SCE_HJ_COMMENT &&
721 state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
722 chPrevNonWhite = ch;
723 ch = static_cast<unsigned char>(styler[i]);
724 int chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
725 const int chNext2 = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2));
727 // Handle DBCS codepages
728 if (styler.IsLeadByte(static_cast<char>(ch))) {
729 chPrev = ' ';
730 i += 1;
731 continue;
734 if ((!IsASpace(ch) || !foldCompact) && fold)
735 visibleChars++;
736 if (!IsASpace(ch))
737 lineStartVisibleChars++;
739 // decide what is the current state to print (depending of the script tag)
740 StateToPrint = statePrintForState(state, inScriptType);
742 // handle script folding
743 if (fold) {
744 switch (scriptLanguage) {
745 case eScriptJS:
746 case eScriptPHP:
747 //not currently supported case eScriptVBS:
749 if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
750 //Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
751 //if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
752 if ((ch == '{') || (ch == '}') || (foldComment && (ch == '/') && (chNext == '*'))) {
753 levelCurrent += ((ch == '{') || (ch == '/')) ? 1 : -1;
755 } else if (((state == SCE_HPHP_COMMENT) || (state == SCE_HJ_COMMENT)) && foldComment && (ch == '*') && (chNext == '/')) {
756 levelCurrent--;
758 break;
759 case eScriptPython:
760 if (state != SCE_HP_COMMENTLINE) {
761 if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
762 levelCurrent++;
763 } else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
764 // check if the number of tabs is lower than the level
765 int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
766 for (int j = 0; Findlevel > 0; j++) {
767 char chTmp = styler.SafeGetCharAt(i + j + 1);
768 if (chTmp == '\t') {
769 Findlevel -= 8;
770 } else if (chTmp == ' ') {
771 Findlevel--;
772 } else {
773 break;
777 if (Findlevel > 0) {
778 levelCurrent -= Findlevel / 8;
779 if (Findlevel % 8)
780 levelCurrent--;
784 break;
785 default:
786 break;
790 if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
791 // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
792 // Avoid triggering two times on Dos/Win
793 // New line -> record any line state onto /next/ line
794 if (fold) {
795 int lev = levelPrev;
796 if (visibleChars == 0)
797 lev |= SC_FOLDLEVELWHITEFLAG;
798 if ((levelCurrent > levelPrev) && (visibleChars > 0))
799 lev |= SC_FOLDLEVELHEADERFLAG;
801 styler.SetLevel(lineCurrent, lev);
802 visibleChars = 0;
803 levelPrev = levelCurrent;
805 lineCurrent++;
806 lineStartVisibleChars = 0;
807 styler.SetLineState(lineCurrent,
808 ((inScriptType & 0x03) << 0) |
809 ((tagOpened & 0x01) << 2) |
810 ((tagClosing & 0x01) << 3) |
811 ((aspScript & 0x0F) << 4) |
812 ((clientScript & 0x0F) << 8) |
813 ((beforePreProc & 0xFF) << 12));
816 // Allow falling through to mako handling code if newline is going to end a block
817 if (((ch == '\r' && chNext != '\n') || (ch == '\n')) &&
818 (!isMako || (0 != strcmp(makoBlockType, "%")))) {
821 // generic end of script processing
822 else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
823 // Check if it's the end of the script tag (or any other HTML tag)
824 switch (state) {
825 // in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
826 case SCE_H_DOUBLESTRING:
827 case SCE_H_SINGLESTRING:
828 case SCE_HJ_COMMENT:
829 case SCE_HJ_COMMENTDOC:
830 //case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
831 // the end of script marker from some JS interpreters.
832 case SCE_HB_COMMENTLINE:
833 case SCE_HBA_COMMENTLINE:
834 case SCE_HJ_DOUBLESTRING:
835 case SCE_HJ_SINGLESTRING:
836 case SCE_HJ_REGEX:
837 case SCE_HB_STRING:
838 case SCE_HBA_STRING:
839 case SCE_HP_STRING:
840 case SCE_HP_TRIPLE:
841 case SCE_HP_TRIPLEDOUBLE:
842 case SCE_HPHP_HSTRING:
843 case SCE_HPHP_SIMPLESTRING:
844 case SCE_HPHP_COMMENT:
845 case SCE_HPHP_COMMENTLINE:
846 break;
847 default :
848 // check if the closing tag is a script tag
849 if (const char *tag =
850 state == SCE_HJ_COMMENTLINE || isXml ? "script" :
851 state == SCE_H_COMMENT ? "comment" : 0) {
852 int j = i + 2;
853 int chr;
854 do {
855 chr = static_cast<int>(*tag++);
856 } while (chr != 0 && chr == MakeLowerCase(styler.SafeGetCharAt(j++)));
857 if (chr != 0) break;
859 // closing tag of the script (it's a closing HTML tag anyway)
860 styler.ColourTo(i - 1, StateToPrint);
861 state = SCE_H_TAGUNKNOWN;
862 inScriptType = eHtml;
863 scriptLanguage = eScriptNone;
864 clientScript = eScriptJS;
865 i += 2;
866 visibleChars += 2;
867 tagClosing = true;
868 continue;
872 /////////////////////////////////////
873 // handle the start of PHP pre-processor = Non-HTML
874 else if ((state != SCE_H_ASPAT) &&
875 !isPHPStringState(state) &&
876 (state != SCE_HPHP_COMMENT) &&
877 (ch == '<') &&
878 (chNext == '?') &&
879 !IsScriptCommentState(state) ) {
880 scriptLanguage = segIsScriptingIndicator(styler, i + 2, i + 6, eScriptPHP);
881 if (scriptLanguage != eScriptPHP && isStringState(state)) continue;
882 styler.ColourTo(i - 1, StateToPrint);
883 beforePreProc = state;
884 i++;
885 visibleChars++;
886 i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 6);
887 if (scriptLanguage == eScriptXML)
888 styler.ColourTo(i, SCE_H_XMLSTART);
889 else
890 styler.ColourTo(i, SCE_H_QUESTION);
891 state = StateForScript(scriptLanguage);
892 if (inScriptType == eNonHtmlScript)
893 inScriptType = eNonHtmlScriptPreProc;
894 else
895 inScriptType = eNonHtmlPreProc;
896 // Fold whole script, but not if the XML first tag (all XML-like tags in this case)
897 if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
898 levelCurrent++;
900 // should be better
901 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
902 continue;
905 // handle the start Mako template Python code
906 else if (isMako && scriptLanguage == eScriptNone && ((ch == '<' && chNext == '%') ||
907 (lineStartVisibleChars == 1 && ch == '%') ||
908 (ch == '$' && chNext == '{') ||
909 (ch == '<' && chNext == '/' && chNext2 == '%'))) {
910 if (ch == '%')
911 strcpy(makoBlockType, "%");
912 else if (ch == '$')
913 strcpy(makoBlockType, "{");
914 else if (chNext == '/')
915 GetNextWord(styler, i+3, makoBlockType, sizeof(makoBlockType));
916 else
917 GetNextWord(styler, i+2, makoBlockType, sizeof(makoBlockType));
918 styler.ColourTo(i - 1, StateToPrint);
919 beforePreProc = state;
920 if (inScriptType == eNonHtmlScript)
921 inScriptType = eNonHtmlScriptPreProc;
922 else
923 inScriptType = eNonHtmlPreProc;
925 if (chNext == '/') {
926 i += 2;
927 visibleChars += 2;
928 } else if (ch != '%') {
929 i++;
930 visibleChars++;
932 state = SCE_HP_START;
933 scriptLanguage = eScriptPython;
934 styler.ColourTo(i, SCE_H_ASP);
935 if (foldHTMLPreprocessor && ch == '<')
936 levelCurrent++;
938 if (ch != '%' && ch != '$') {
939 i += strlen(makoBlockType);
940 visibleChars += strlen(makoBlockType);
941 if (keywords4.InList(makoBlockType))
942 styler.ColourTo(i, SCE_HP_WORD);
943 else
944 styler.ColourTo(i, SCE_H_TAGUNKNOWN);
947 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
948 continue;
951 // handle the start Django template code
952 else if (isDjango && scriptLanguage != eScriptPython && (ch == '{' && (chNext == '%' || chNext == '{'))) {
953 if (chNext == '%')
954 strcpy(djangoBlockType, "%");
955 else
956 strcpy(djangoBlockType, "{");
957 styler.ColourTo(i - 1, StateToPrint);
958 beforePreProc = state;
959 if (inScriptType == eNonHtmlScript)
960 inScriptType = eNonHtmlScriptPreProc;
961 else
962 inScriptType = eNonHtmlPreProc;
964 i += 1;
965 visibleChars += 1;
966 state = SCE_HP_START;
967 beforeLanguage = scriptLanguage;
968 scriptLanguage = eScriptPython;
969 styler.ColourTo(i, SCE_H_ASP);
970 if (foldHTMLPreprocessor && chNext == '%')
971 levelCurrent++;
973 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
974 continue;
977 // handle the start of ASP pre-processor = Non-HTML
978 else if (!isMako && !isDjango && !isCommentASPState(state) && (ch == '<') && (chNext == '%') && !isPHPStringState(state)) {
979 styler.ColourTo(i - 1, StateToPrint);
980 beforePreProc = state;
981 if (inScriptType == eNonHtmlScript)
982 inScriptType = eNonHtmlScriptPreProc;
983 else
984 inScriptType = eNonHtmlPreProc;
986 if (chNext2 == '@') {
987 i += 2; // place as if it was the second next char treated
988 visibleChars += 2;
989 state = SCE_H_ASPAT;
990 } else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
991 styler.ColourTo(i + 3, SCE_H_ASP);
992 state = SCE_H_XCCOMMENT;
993 scriptLanguage = eScriptVBS;
994 continue;
995 } else {
996 if (chNext2 == '=') {
997 i += 2; // place as if it was the second next char treated
998 visibleChars += 2;
999 } else {
1000 i++; // place as if it was the next char treated
1001 visibleChars++;
1004 state = StateForScript(aspScript);
1006 scriptLanguage = eScriptVBS;
1007 styler.ColourTo(i, SCE_H_ASP);
1008 // fold whole script
1009 if (foldHTMLPreprocessor)
1010 levelCurrent++;
1011 // should be better
1012 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1013 continue;
1016 /////////////////////////////////////
1017 // handle the start of SGML language (DTD)
1018 else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
1019 (chPrev == '<') &&
1020 (ch == '!') &&
1021 (StateToPrint != SCE_H_CDATA) &&
1022 (!IsCommentState(StateToPrint)) &&
1023 (!IsScriptCommentState(StateToPrint)) ) {
1024 beforePreProc = state;
1025 styler.ColourTo(i - 2, StateToPrint);
1026 if ((chNext == '-') && (chNext2 == '-')) {
1027 state = SCE_H_COMMENT; // wait for a pending command
1028 styler.ColourTo(i + 2, SCE_H_COMMENT);
1029 i += 2; // follow styling after the --
1030 } else if (isWordCdata(i + 1, i + 7, styler)) {
1031 state = SCE_H_CDATA;
1032 } else {
1033 styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
1034 scriptLanguage = eScriptSGML;
1035 state = SCE_H_SGML_COMMAND; // wait for a pending command
1037 // fold whole tag (-- when closing the tag)
1038 if (foldHTMLPreprocessor || (state == SCE_H_COMMENT))
1039 levelCurrent++;
1040 continue;
1043 // handle the end of Mako Python code
1044 else if (isMako &&
1045 ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1046 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1047 isMakoBlockEnd(ch, chNext, makoBlockType)) {
1048 if (state == SCE_H_ASPAT) {
1049 aspScript = segIsScriptingIndicator(styler,
1050 styler.GetStartSegment(), i - 1, aspScript);
1052 if (state == SCE_HP_WORD) {
1053 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1054 } else {
1055 styler.ColourTo(i - 1, StateToPrint);
1057 if (0 != strcmp(makoBlockType, "%") && (0 != strcmp(makoBlockType, "{")) && ch != '>') {
1058 i++;
1059 visibleChars++;
1061 if (0 != strcmp(makoBlockType, "%")) {
1062 styler.ColourTo(i, SCE_H_ASP);
1064 state = beforePreProc;
1065 if (inScriptType == eNonHtmlScriptPreProc)
1066 inScriptType = eNonHtmlScript;
1067 else
1068 inScriptType = eHtml;
1069 if (foldHTMLPreprocessor && ch != '\n' && ch != '\r') {
1070 levelCurrent--;
1072 scriptLanguage = eScriptNone;
1073 continue;
1076 // handle the end of Django template code
1077 else if (isDjango &&
1078 ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1079 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1080 isDjangoBlockEnd(ch, chNext, djangoBlockType)) {
1081 if (state == SCE_H_ASPAT) {
1082 aspScript = segIsScriptingIndicator(styler,
1083 styler.GetStartSegment(), i - 1, aspScript);
1085 if (state == SCE_HP_WORD) {
1086 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1087 } else {
1088 styler.ColourTo(i - 1, StateToPrint);
1090 i += 1;
1091 visibleChars += 1;
1092 styler.ColourTo(i, SCE_H_ASP);
1093 state = beforePreProc;
1094 if (inScriptType == eNonHtmlScriptPreProc)
1095 inScriptType = eNonHtmlScript;
1096 else
1097 inScriptType = eHtml;
1098 if (foldHTMLPreprocessor) {
1099 levelCurrent--;
1101 scriptLanguage = beforeLanguage;
1102 continue;
1105 // handle the end of a pre-processor = Non-HTML
1106 else if ((!isMako && !isDjango && ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1107 (((scriptLanguage != eScriptNone) && stateAllowsTermination(state))) &&
1108 (((ch == '%') || (ch == '?')) && (chNext == '>'))) ||
1109 ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
1110 if (state == SCE_H_ASPAT) {
1111 aspScript = segIsScriptingIndicator(styler,
1112 styler.GetStartSegment(), i - 1, aspScript);
1114 // Bounce out of any ASP mode
1115 switch (state) {
1116 case SCE_HJ_WORD:
1117 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1118 break;
1119 case SCE_HB_WORD:
1120 classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1121 break;
1122 case SCE_HP_WORD:
1123 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1124 break;
1125 case SCE_HPHP_WORD:
1126 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1127 break;
1128 case SCE_H_XCCOMMENT:
1129 styler.ColourTo(i - 1, state);
1130 break;
1131 default :
1132 styler.ColourTo(i - 1, StateToPrint);
1133 break;
1135 if (scriptLanguage != eScriptSGML) {
1136 i++;
1137 visibleChars++;
1139 if (ch == '%')
1140 styler.ColourTo(i, SCE_H_ASP);
1141 else if (scriptLanguage == eScriptXML)
1142 styler.ColourTo(i, SCE_H_XMLEND);
1143 else if (scriptLanguage == eScriptSGML)
1144 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1145 else
1146 styler.ColourTo(i, SCE_H_QUESTION);
1147 state = beforePreProc;
1148 if (inScriptType == eNonHtmlScriptPreProc)
1149 inScriptType = eNonHtmlScript;
1150 else
1151 inScriptType = eHtml;
1152 // Unfold all scripting languages, except for XML tag
1153 if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
1154 levelCurrent--;
1156 scriptLanguage = eScriptNone;
1157 continue;
1159 /////////////////////////////////////
1161 switch (state) {
1162 case SCE_H_DEFAULT:
1163 if (ch == '<') {
1164 // in HTML, fold on tag open and unfold on tag close
1165 tagOpened = true;
1166 tagClosing = (chNext == '/');
1167 styler.ColourTo(i - 1, StateToPrint);
1168 if (chNext != '!')
1169 state = SCE_H_TAGUNKNOWN;
1170 } else if (ch == '&') {
1171 styler.ColourTo(i - 1, SCE_H_DEFAULT);
1172 state = SCE_H_ENTITY;
1174 break;
1175 case SCE_H_SGML_DEFAULT:
1176 case SCE_H_SGML_BLOCK_DEFAULT:
1177 // if (scriptLanguage == eScriptSGMLblock)
1178 // StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
1180 if (ch == '\"') {
1181 styler.ColourTo(i - 1, StateToPrint);
1182 state = SCE_H_SGML_DOUBLESTRING;
1183 } else if (ch == '\'') {
1184 styler.ColourTo(i - 1, StateToPrint);
1185 state = SCE_H_SGML_SIMPLESTRING;
1186 } else if ((ch == '-') && (chPrev == '-')) {
1187 if (static_cast<int>(styler.GetStartSegment()) <= (i - 2)) {
1188 styler.ColourTo(i - 2, StateToPrint);
1190 state = SCE_H_SGML_COMMENT;
1191 } else if (isascii(ch) && isalpha(ch) && (chPrev == '%')) {
1192 styler.ColourTo(i - 2, StateToPrint);
1193 state = SCE_H_SGML_ENTITY;
1194 } else if (ch == '#') {
1195 styler.ColourTo(i - 1, StateToPrint);
1196 state = SCE_H_SGML_SPECIAL;
1197 } else if (ch == '[') {
1198 styler.ColourTo(i - 1, StateToPrint);
1199 scriptLanguage = eScriptSGMLblock;
1200 state = SCE_H_SGML_BLOCK_DEFAULT;
1201 } else if (ch == ']') {
1202 if (scriptLanguage == eScriptSGMLblock) {
1203 styler.ColourTo(i, StateToPrint);
1204 scriptLanguage = eScriptSGML;
1205 } else {
1206 styler.ColourTo(i - 1, StateToPrint);
1207 styler.ColourTo(i, SCE_H_SGML_ERROR);
1209 state = SCE_H_SGML_DEFAULT;
1210 } else if (scriptLanguage == eScriptSGMLblock) {
1211 if ((ch == '!') && (chPrev == '<')) {
1212 styler.ColourTo(i - 2, StateToPrint);
1213 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1214 state = SCE_H_SGML_COMMAND;
1215 } else if (ch == '>') {
1216 styler.ColourTo(i - 1, StateToPrint);
1217 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1220 break;
1221 case SCE_H_SGML_COMMAND:
1222 if ((ch == '-') && (chPrev == '-')) {
1223 styler.ColourTo(i - 2, StateToPrint);
1224 state = SCE_H_SGML_COMMENT;
1225 } else if (!issgmlwordchar(ch)) {
1226 if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
1227 styler.ColourTo(i - 1, StateToPrint);
1228 state = SCE_H_SGML_1ST_PARAM;
1229 } else {
1230 state = SCE_H_SGML_ERROR;
1233 break;
1234 case SCE_H_SGML_1ST_PARAM:
1235 // wait for the beginning of the word
1236 if ((ch == '-') && (chPrev == '-')) {
1237 if (scriptLanguage == eScriptSGMLblock) {
1238 styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
1239 } else {
1240 styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
1242 state = SCE_H_SGML_1ST_PARAM_COMMENT;
1243 } else if (issgmlwordchar(ch)) {
1244 if (scriptLanguage == eScriptSGMLblock) {
1245 styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
1246 } else {
1247 styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
1249 // find the length of the word
1250 int size = 1;
1251 while (setHTMLWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(i + size))))
1252 size++;
1253 styler.ColourTo(i + size - 1, StateToPrint);
1254 i += size - 1;
1255 visibleChars += size - 1;
1256 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1257 if (scriptLanguage == eScriptSGMLblock) {
1258 state = SCE_H_SGML_BLOCK_DEFAULT;
1259 } else {
1260 state = SCE_H_SGML_DEFAULT;
1262 continue;
1264 break;
1265 case SCE_H_SGML_ERROR:
1266 if ((ch == '-') && (chPrev == '-')) {
1267 styler.ColourTo(i - 2, StateToPrint);
1268 state = SCE_H_SGML_COMMENT;
1270 case SCE_H_SGML_DOUBLESTRING:
1271 if (ch == '\"') {
1272 styler.ColourTo(i, StateToPrint);
1273 state = SCE_H_SGML_DEFAULT;
1275 break;
1276 case SCE_H_SGML_SIMPLESTRING:
1277 if (ch == '\'') {
1278 styler.ColourTo(i, StateToPrint);
1279 state = SCE_H_SGML_DEFAULT;
1281 break;
1282 case SCE_H_SGML_COMMENT:
1283 if ((ch == '-') && (chPrev == '-')) {
1284 styler.ColourTo(i, StateToPrint);
1285 state = SCE_H_SGML_DEFAULT;
1287 break;
1288 case SCE_H_CDATA:
1289 if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
1290 styler.ColourTo(i, StateToPrint);
1291 state = SCE_H_DEFAULT;
1292 levelCurrent--;
1294 break;
1295 case SCE_H_COMMENT:
1296 if ((scriptLanguage != eScriptComment) && (chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
1297 styler.ColourTo(i, StateToPrint);
1298 state = SCE_H_DEFAULT;
1299 levelCurrent--;
1301 break;
1302 case SCE_H_SGML_1ST_PARAM_COMMENT:
1303 if ((ch == '-') && (chPrev == '-')) {
1304 styler.ColourTo(i, SCE_H_SGML_COMMENT);
1305 state = SCE_H_SGML_1ST_PARAM;
1307 break;
1308 case SCE_H_SGML_SPECIAL:
1309 if (!(isascii(ch) && isupper(ch))) {
1310 styler.ColourTo(i - 1, StateToPrint);
1311 if (isalnum(ch)) {
1312 state = SCE_H_SGML_ERROR;
1313 } else {
1314 state = SCE_H_SGML_DEFAULT;
1317 break;
1318 case SCE_H_SGML_ENTITY:
1319 if (ch == ';') {
1320 styler.ColourTo(i, StateToPrint);
1321 state = SCE_H_SGML_DEFAULT;
1322 } else if (!(isascii(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
1323 styler.ColourTo(i, SCE_H_SGML_ERROR);
1324 state = SCE_H_SGML_DEFAULT;
1326 break;
1327 case SCE_H_ENTITY:
1328 if (ch == ';') {
1329 styler.ColourTo(i, StateToPrint);
1330 state = SCE_H_DEFAULT;
1332 if (ch != '#' && !(isascii(ch) && isalnum(ch)) // Should check that '#' follows '&', but it is unlikely anyway...
1333 && ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
1334 if (!isascii(ch)) // Possibly start of a multibyte character so don't allow this byte to be in entity style
1335 styler.ColourTo(i-1, SCE_H_TAGUNKNOWN);
1336 else
1337 styler.ColourTo(i, SCE_H_TAGUNKNOWN);
1338 state = SCE_H_DEFAULT;
1340 break;
1341 case SCE_H_TAGUNKNOWN:
1342 if (!setTagContinue.Contains(ch) && !((ch == '/') && (chPrev == '<'))) {
1343 int eClass = classifyTagHTML(styler.GetStartSegment(),
1344 i - 1, keywords, styler, tagDontFold, caseSensitive, isXml, allowScripts);
1345 if (eClass == SCE_H_SCRIPT || eClass == SCE_H_COMMENT) {
1346 if (!tagClosing) {
1347 inScriptType = eNonHtmlScript;
1348 scriptLanguage = eClass == SCE_H_SCRIPT ? clientScript : eScriptComment;
1349 } else {
1350 scriptLanguage = eScriptNone;
1352 eClass = SCE_H_TAG;
1354 if (ch == '>') {
1355 styler.ColourTo(i, eClass);
1356 if (inScriptType == eNonHtmlScript) {
1357 state = StateForScript(scriptLanguage);
1358 } else {
1359 state = SCE_H_DEFAULT;
1361 tagOpened = false;
1362 if (!tagDontFold) {
1363 if (tagClosing) {
1364 levelCurrent--;
1365 } else {
1366 levelCurrent++;
1369 tagClosing = false;
1370 } else if (ch == '/' && chNext == '>') {
1371 if (eClass == SCE_H_TAGUNKNOWN) {
1372 styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
1373 } else {
1374 styler.ColourTo(i - 1, StateToPrint);
1375 styler.ColourTo(i + 1, SCE_H_TAGEND);
1377 i++;
1378 ch = chNext;
1379 state = SCE_H_DEFAULT;
1380 tagOpened = false;
1381 } else {
1382 if (eClass != SCE_H_TAGUNKNOWN) {
1383 if (eClass == SCE_H_SGML_DEFAULT) {
1384 state = SCE_H_SGML_DEFAULT;
1385 } else {
1386 state = SCE_H_OTHER;
1391 break;
1392 case SCE_H_ATTRIBUTE:
1393 if (!setAttributeContinue.Contains(ch)) {
1394 if (inScriptType == eNonHtmlScript) {
1395 int scriptLanguagePrev = scriptLanguage;
1396 clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
1397 scriptLanguage = clientScript;
1398 if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
1399 inScriptType = eHtml;
1401 classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
1402 if (ch == '>') {
1403 styler.ColourTo(i, SCE_H_TAG);
1404 if (inScriptType == eNonHtmlScript) {
1405 state = StateForScript(scriptLanguage);
1406 } else {
1407 state = SCE_H_DEFAULT;
1409 tagOpened = false;
1410 if (!tagDontFold) {
1411 if (tagClosing) {
1412 levelCurrent--;
1413 } else {
1414 levelCurrent++;
1417 tagClosing = false;
1418 } else if (ch == '=') {
1419 styler.ColourTo(i, SCE_H_OTHER);
1420 state = SCE_H_VALUE;
1421 } else {
1422 state = SCE_H_OTHER;
1425 break;
1426 case SCE_H_OTHER:
1427 if (ch == '>') {
1428 styler.ColourTo(i - 1, StateToPrint);
1429 styler.ColourTo(i, SCE_H_TAG);
1430 if (inScriptType == eNonHtmlScript) {
1431 state = StateForScript(scriptLanguage);
1432 } else {
1433 state = SCE_H_DEFAULT;
1435 tagOpened = false;
1436 if (!tagDontFold) {
1437 if (tagClosing) {
1438 levelCurrent--;
1439 } else {
1440 levelCurrent++;
1443 tagClosing = false;
1444 } else if (ch == '\"') {
1445 styler.ColourTo(i - 1, StateToPrint);
1446 state = SCE_H_DOUBLESTRING;
1447 } else if (ch == '\'') {
1448 styler.ColourTo(i - 1, StateToPrint);
1449 state = SCE_H_SINGLESTRING;
1450 } else if (ch == '=') {
1451 styler.ColourTo(i, StateToPrint);
1452 state = SCE_H_VALUE;
1453 } else if (ch == '/' && chNext == '>') {
1454 styler.ColourTo(i - 1, StateToPrint);
1455 styler.ColourTo(i + 1, SCE_H_TAGEND);
1456 i++;
1457 ch = chNext;
1458 state = SCE_H_DEFAULT;
1459 tagOpened = false;
1460 } else if (ch == '?' && chNext == '>') {
1461 styler.ColourTo(i - 1, StateToPrint);
1462 styler.ColourTo(i + 1, SCE_H_XMLEND);
1463 i++;
1464 ch = chNext;
1465 state = SCE_H_DEFAULT;
1466 } else if (setHTMLWord.Contains(ch)) {
1467 styler.ColourTo(i - 1, StateToPrint);
1468 state = SCE_H_ATTRIBUTE;
1470 break;
1471 case SCE_H_DOUBLESTRING:
1472 if (ch == '\"') {
1473 if (inScriptType == eNonHtmlScript) {
1474 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1476 styler.ColourTo(i, SCE_H_DOUBLESTRING);
1477 state = SCE_H_OTHER;
1479 break;
1480 case SCE_H_SINGLESTRING:
1481 if (ch == '\'') {
1482 if (inScriptType == eNonHtmlScript) {
1483 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1485 styler.ColourTo(i, SCE_H_SINGLESTRING);
1486 state = SCE_H_OTHER;
1488 break;
1489 case SCE_H_VALUE:
1490 if (!setHTMLWord.Contains(ch)) {
1491 if (ch == '\"' && chPrev == '=') {
1492 // Should really test for being first character
1493 state = SCE_H_DOUBLESTRING;
1494 } else if (ch == '\'' && chPrev == '=') {
1495 state = SCE_H_SINGLESTRING;
1496 } else {
1497 if (IsNumber(styler.GetStartSegment(), styler)) {
1498 styler.ColourTo(i - 1, SCE_H_NUMBER);
1499 } else {
1500 styler.ColourTo(i - 1, StateToPrint);
1502 if (ch == '>') {
1503 styler.ColourTo(i, SCE_H_TAG);
1504 if (inScriptType == eNonHtmlScript) {
1505 state = StateForScript(scriptLanguage);
1506 } else {
1507 state = SCE_H_DEFAULT;
1509 tagOpened = false;
1510 if (!tagDontFold) {
1511 if (tagClosing) {
1512 levelCurrent--;
1513 } else {
1514 levelCurrent++;
1517 tagClosing = false;
1518 } else {
1519 state = SCE_H_OTHER;
1523 break;
1524 case SCE_HJ_DEFAULT:
1525 case SCE_HJ_START:
1526 case SCE_HJ_SYMBOLS:
1527 if (IsAWordStart(ch)) {
1528 styler.ColourTo(i - 1, StateToPrint);
1529 state = SCE_HJ_WORD;
1530 } else if (ch == '/' && chNext == '*') {
1531 styler.ColourTo(i - 1, StateToPrint);
1532 if (chNext2 == '*')
1533 state = SCE_HJ_COMMENTDOC;
1534 else
1535 state = SCE_HJ_COMMENT;
1536 } else if (ch == '/' && chNext == '/') {
1537 styler.ColourTo(i - 1, StateToPrint);
1538 state = SCE_HJ_COMMENTLINE;
1539 } else if (ch == '/' && isOKBeforeRE(chPrevNonWhite)) {
1540 styler.ColourTo(i - 1, StateToPrint);
1541 state = SCE_HJ_REGEX;
1542 } else if (ch == '\"') {
1543 styler.ColourTo(i - 1, StateToPrint);
1544 state = SCE_HJ_DOUBLESTRING;
1545 } else if (ch == '\'') {
1546 styler.ColourTo(i - 1, StateToPrint);
1547 state = SCE_HJ_SINGLESTRING;
1548 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1549 styler.SafeGetCharAt(i + 3) == '-') {
1550 styler.ColourTo(i - 1, StateToPrint);
1551 state = SCE_HJ_COMMENTLINE;
1552 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1553 styler.ColourTo(i - 1, StateToPrint);
1554 state = SCE_HJ_COMMENTLINE;
1555 i += 2;
1556 } else if (IsOperator(ch)) {
1557 styler.ColourTo(i - 1, StateToPrint);
1558 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1559 state = SCE_HJ_DEFAULT;
1560 } else if ((ch == ' ') || (ch == '\t')) {
1561 if (state == SCE_HJ_START) {
1562 styler.ColourTo(i - 1, StateToPrint);
1563 state = SCE_HJ_DEFAULT;
1566 break;
1567 case SCE_HJ_WORD:
1568 if (!IsAWordChar(ch)) {
1569 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1570 //styler.ColourTo(i - 1, eHTJSKeyword);
1571 state = SCE_HJ_DEFAULT;
1572 if (ch == '/' && chNext == '*') {
1573 if (chNext2 == '*')
1574 state = SCE_HJ_COMMENTDOC;
1575 else
1576 state = SCE_HJ_COMMENT;
1577 } else if (ch == '/' && chNext == '/') {
1578 state = SCE_HJ_COMMENTLINE;
1579 } else if (ch == '\"') {
1580 state = SCE_HJ_DOUBLESTRING;
1581 } else if (ch == '\'') {
1582 state = SCE_HJ_SINGLESTRING;
1583 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1584 styler.ColourTo(i - 1, StateToPrint);
1585 state = SCE_HJ_COMMENTLINE;
1586 i += 2;
1587 } else if (IsOperator(ch)) {
1588 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1589 state = SCE_HJ_DEFAULT;
1592 break;
1593 case SCE_HJ_COMMENT:
1594 case SCE_HJ_COMMENTDOC:
1595 if (ch == '/' && chPrev == '*') {
1596 styler.ColourTo(i, StateToPrint);
1597 state = SCE_HJ_DEFAULT;
1598 ch = ' ';
1600 break;
1601 case SCE_HJ_COMMENTLINE:
1602 if (ch == '\r' || ch == '\n') {
1603 styler.ColourTo(i - 1, statePrintForState(SCE_HJ_COMMENTLINE, inScriptType));
1604 state = SCE_HJ_DEFAULT;
1605 ch = ' ';
1607 break;
1608 case SCE_HJ_DOUBLESTRING:
1609 if (ch == '\\') {
1610 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1611 i++;
1613 } else if (ch == '\"') {
1614 styler.ColourTo(i, statePrintForState(SCE_HJ_DOUBLESTRING, inScriptType));
1615 state = SCE_HJ_DEFAULT;
1616 } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1617 styler.ColourTo(i - 1, StateToPrint);
1618 state = SCE_HJ_COMMENTLINE;
1619 i += 2;
1620 } else if (isLineEnd(ch)) {
1621 styler.ColourTo(i - 1, StateToPrint);
1622 state = SCE_HJ_STRINGEOL;
1624 break;
1625 case SCE_HJ_SINGLESTRING:
1626 if (ch == '\\') {
1627 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1628 i++;
1630 } else if (ch == '\'') {
1631 styler.ColourTo(i, statePrintForState(SCE_HJ_SINGLESTRING, inScriptType));
1632 state = SCE_HJ_DEFAULT;
1633 } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1634 styler.ColourTo(i - 1, StateToPrint);
1635 state = SCE_HJ_COMMENTLINE;
1636 i += 2;
1637 } else if (isLineEnd(ch)) {
1638 styler.ColourTo(i - 1, StateToPrint);
1639 state = SCE_HJ_STRINGEOL;
1641 break;
1642 case SCE_HJ_STRINGEOL:
1643 if (!isLineEnd(ch)) {
1644 styler.ColourTo(i - 1, StateToPrint);
1645 state = SCE_HJ_DEFAULT;
1646 } else if (!isLineEnd(chNext)) {
1647 styler.ColourTo(i, StateToPrint);
1648 state = SCE_HJ_DEFAULT;
1650 break;
1651 case SCE_HJ_REGEX:
1652 if (ch == '\r' || ch == '\n' || ch == '/') {
1653 if (ch == '/') {
1654 while (isascii(chNext) && islower(chNext)) { // gobble regex flags
1655 i++;
1656 ch = chNext;
1657 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1660 styler.ColourTo(i, StateToPrint);
1661 state = SCE_HJ_DEFAULT;
1662 } else if (ch == '\\') {
1663 // Gobble up the quoted character
1664 if (chNext == '\\' || chNext == '/') {
1665 i++;
1666 ch = chNext;
1667 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1670 break;
1671 case SCE_HB_DEFAULT:
1672 case SCE_HB_START:
1673 if (IsAWordStart(ch)) {
1674 styler.ColourTo(i - 1, StateToPrint);
1675 state = SCE_HB_WORD;
1676 } else if (ch == '\'') {
1677 styler.ColourTo(i - 1, StateToPrint);
1678 state = SCE_HB_COMMENTLINE;
1679 } else if (ch == '\"') {
1680 styler.ColourTo(i - 1, StateToPrint);
1681 state = SCE_HB_STRING;
1682 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1683 styler.SafeGetCharAt(i + 3) == '-') {
1684 styler.ColourTo(i - 1, StateToPrint);
1685 state = SCE_HB_COMMENTLINE;
1686 } else if (IsOperator(ch)) {
1687 styler.ColourTo(i - 1, StateToPrint);
1688 styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1689 state = SCE_HB_DEFAULT;
1690 } else if ((ch == ' ') || (ch == '\t')) {
1691 if (state == SCE_HB_START) {
1692 styler.ColourTo(i - 1, StateToPrint);
1693 state = SCE_HB_DEFAULT;
1696 break;
1697 case SCE_HB_WORD:
1698 if (!IsAWordChar(ch)) {
1699 state = classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1700 if (state == SCE_HB_DEFAULT) {
1701 if (ch == '\"') {
1702 state = SCE_HB_STRING;
1703 } else if (ch == '\'') {
1704 state = SCE_HB_COMMENTLINE;
1705 } else if (IsOperator(ch)) {
1706 styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1707 state = SCE_HB_DEFAULT;
1711 break;
1712 case SCE_HB_STRING:
1713 if (ch == '\"') {
1714 styler.ColourTo(i, StateToPrint);
1715 state = SCE_HB_DEFAULT;
1716 } else if (ch == '\r' || ch == '\n') {
1717 styler.ColourTo(i - 1, StateToPrint);
1718 state = SCE_HB_STRINGEOL;
1720 break;
1721 case SCE_HB_COMMENTLINE:
1722 if (ch == '\r' || ch == '\n') {
1723 styler.ColourTo(i - 1, StateToPrint);
1724 state = SCE_HB_DEFAULT;
1726 break;
1727 case SCE_HB_STRINGEOL:
1728 if (!isLineEnd(ch)) {
1729 styler.ColourTo(i - 1, StateToPrint);
1730 state = SCE_HB_DEFAULT;
1731 } else if (!isLineEnd(chNext)) {
1732 styler.ColourTo(i, StateToPrint);
1733 state = SCE_HB_DEFAULT;
1735 break;
1736 case SCE_HP_DEFAULT:
1737 case SCE_HP_START:
1738 if (IsAWordStart(ch)) {
1739 styler.ColourTo(i - 1, StateToPrint);
1740 state = SCE_HP_WORD;
1741 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1742 styler.SafeGetCharAt(i + 3) == '-') {
1743 styler.ColourTo(i - 1, StateToPrint);
1744 state = SCE_HP_COMMENTLINE;
1745 } else if (ch == '#') {
1746 styler.ColourTo(i - 1, StateToPrint);
1747 state = SCE_HP_COMMENTLINE;
1748 } else if (ch == '\"') {
1749 styler.ColourTo(i - 1, StateToPrint);
1750 if (chNext == '\"' && chNext2 == '\"') {
1751 i += 2;
1752 state = SCE_HP_TRIPLEDOUBLE;
1753 ch = ' ';
1754 chPrev = ' ';
1755 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1756 } else {
1757 // state = statePrintForState(SCE_HP_STRING,inScriptType);
1758 state = SCE_HP_STRING;
1760 } else if (ch == '\'') {
1761 styler.ColourTo(i - 1, StateToPrint);
1762 if (chNext == '\'' && chNext2 == '\'') {
1763 i += 2;
1764 state = SCE_HP_TRIPLE;
1765 ch = ' ';
1766 chPrev = ' ';
1767 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1768 } else {
1769 state = SCE_HP_CHARACTER;
1771 } else if (IsOperator(ch)) {
1772 styler.ColourTo(i - 1, StateToPrint);
1773 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1774 } else if ((ch == ' ') || (ch == '\t')) {
1775 if (state == SCE_HP_START) {
1776 styler.ColourTo(i - 1, StateToPrint);
1777 state = SCE_HP_DEFAULT;
1780 break;
1781 case SCE_HP_WORD:
1782 if (!IsAWordChar(ch)) {
1783 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1784 state = SCE_HP_DEFAULT;
1785 if (ch == '#') {
1786 state = SCE_HP_COMMENTLINE;
1787 } else if (ch == '\"') {
1788 if (chNext == '\"' && chNext2 == '\"') {
1789 i += 2;
1790 state = SCE_HP_TRIPLEDOUBLE;
1791 ch = ' ';
1792 chPrev = ' ';
1793 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1794 } else {
1795 state = SCE_HP_STRING;
1797 } else if (ch == '\'') {
1798 if (chNext == '\'' && chNext2 == '\'') {
1799 i += 2;
1800 state = SCE_HP_TRIPLE;
1801 ch = ' ';
1802 chPrev = ' ';
1803 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1804 } else {
1805 state = SCE_HP_CHARACTER;
1807 } else if (IsOperator(ch)) {
1808 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1811 break;
1812 case SCE_HP_COMMENTLINE:
1813 if (ch == '\r' || ch == '\n') {
1814 styler.ColourTo(i - 1, StateToPrint);
1815 state = SCE_HP_DEFAULT;
1817 break;
1818 case SCE_HP_STRING:
1819 if (ch == '\\') {
1820 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1821 i++;
1822 ch = chNext;
1823 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1825 } else if (ch == '\"') {
1826 styler.ColourTo(i, StateToPrint);
1827 state = SCE_HP_DEFAULT;
1829 break;
1830 case SCE_HP_CHARACTER:
1831 if (ch == '\\') {
1832 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1833 i++;
1834 ch = chNext;
1835 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1837 } else if (ch == '\'') {
1838 styler.ColourTo(i, StateToPrint);
1839 state = SCE_HP_DEFAULT;
1841 break;
1842 case SCE_HP_TRIPLE:
1843 if (ch == '\'' && chPrev == '\'' && chPrev2 == '\'') {
1844 styler.ColourTo(i, StateToPrint);
1845 state = SCE_HP_DEFAULT;
1847 break;
1848 case SCE_HP_TRIPLEDOUBLE:
1849 if (ch == '\"' && chPrev == '\"' && chPrev2 == '\"') {
1850 styler.ColourTo(i, StateToPrint);
1851 state = SCE_HP_DEFAULT;
1853 break;
1854 ///////////// start - PHP state handling
1855 case SCE_HPHP_WORD:
1856 if (!IsAWordChar(ch)) {
1857 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1858 if (ch == '/' && chNext == '*') {
1859 i++;
1860 state = SCE_HPHP_COMMENT;
1861 } else if (ch == '/' && chNext == '/') {
1862 i++;
1863 state = SCE_HPHP_COMMENTLINE;
1864 } else if (ch == '#') {
1865 state = SCE_HPHP_COMMENTLINE;
1866 } else if (ch == '\"') {
1867 state = SCE_HPHP_HSTRING;
1868 strcpy(phpStringDelimiter, "\"");
1869 } else if (styler.Match(i, "<<<")) {
1870 bool isSimpleString = false;
1871 i = FindPhpStringDelimiter(phpStringDelimiter, sizeof(phpStringDelimiter), i + 3, lengthDoc, styler, isSimpleString);
1872 if (strlen(phpStringDelimiter)) {
1873 state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING);
1874 if (foldHeredoc) levelCurrent++;
1876 } else if (ch == '\'') {
1877 state = SCE_HPHP_SIMPLESTRING;
1878 strcpy(phpStringDelimiter, "\'");
1879 } else if (ch == '$' && IsPhpWordStart(chNext)) {
1880 state = SCE_HPHP_VARIABLE;
1881 } else if (IsOperator(ch)) {
1882 state = SCE_HPHP_OPERATOR;
1883 } else {
1884 state = SCE_HPHP_DEFAULT;
1887 break;
1888 case SCE_HPHP_NUMBER:
1889 // recognize bases 8,10 or 16 integers OR floating-point numbers
1890 if (!IsADigit(ch)
1891 && strchr(".xXabcdefABCDEF", ch) == NULL
1892 && ((ch != '-' && ch != '+') || (chPrev != 'e' && chPrev != 'E'))) {
1893 styler.ColourTo(i - 1, SCE_HPHP_NUMBER);
1894 if (IsOperator(ch))
1895 state = SCE_HPHP_OPERATOR;
1896 else
1897 state = SCE_HPHP_DEFAULT;
1899 break;
1900 case SCE_HPHP_VARIABLE:
1901 if (!IsPhpWordChar(chNext)) {
1902 styler.ColourTo(i, SCE_HPHP_VARIABLE);
1903 state = SCE_HPHP_DEFAULT;
1905 break;
1906 case SCE_HPHP_COMMENT:
1907 if (ch == '/' && chPrev == '*') {
1908 styler.ColourTo(i, StateToPrint);
1909 state = SCE_HPHP_DEFAULT;
1911 break;
1912 case SCE_HPHP_COMMENTLINE:
1913 if (ch == '\r' || ch == '\n') {
1914 styler.ColourTo(i - 1, StateToPrint);
1915 state = SCE_HPHP_DEFAULT;
1917 break;
1918 case SCE_HPHP_HSTRING:
1919 if (ch == '\\' && (phpStringDelimiter[0] == '\"' || chNext == '$' || chNext == '{')) {
1920 // skip the next char
1921 i++;
1922 } else if (((ch == '{' && chNext == '$') || (ch == '$' && chNext == '{'))
1923 && IsPhpWordStart(chNext2)) {
1924 styler.ColourTo(i - 1, StateToPrint);
1925 state = SCE_HPHP_COMPLEX_VARIABLE;
1926 } else if (ch == '$' && IsPhpWordStart(chNext)) {
1927 styler.ColourTo(i - 1, StateToPrint);
1928 state = SCE_HPHP_HSTRING_VARIABLE;
1929 } else if (styler.Match(i, phpStringDelimiter)) {
1930 if (phpStringDelimiter[0] == '\"') {
1931 styler.ColourTo(i, StateToPrint);
1932 state = SCE_HPHP_DEFAULT;
1933 } else if (isLineEnd(chPrev)) {
1934 const int psdLength = strlen(phpStringDelimiter);
1935 const char chAfterPsd = styler.SafeGetCharAt(i + psdLength);
1936 const char chAfterPsd2 = styler.SafeGetCharAt(i + psdLength + 1);
1937 if (isLineEnd(chAfterPsd) ||
1938 (chAfterPsd == ';' && isLineEnd(chAfterPsd2))) {
1939 i += (((i + psdLength) < lengthDoc) ? psdLength : lengthDoc) - 1;
1940 styler.ColourTo(i, StateToPrint);
1941 state = SCE_HPHP_DEFAULT;
1942 if (foldHeredoc) levelCurrent--;
1946 break;
1947 case SCE_HPHP_SIMPLESTRING:
1948 if (phpStringDelimiter[0] == '\'') {
1949 if (ch == '\\') {
1950 // skip the next char
1951 i++;
1952 } else if (ch == '\'') {
1953 styler.ColourTo(i, StateToPrint);
1954 state = SCE_HPHP_DEFAULT;
1956 } else if (isLineEnd(chPrev) && styler.Match(i, phpStringDelimiter)) {
1957 const int psdLength = strlen(phpStringDelimiter);
1958 const char chAfterPsd = styler.SafeGetCharAt(i + psdLength);
1959 const char chAfterPsd2 = styler.SafeGetCharAt(i + psdLength + 1);
1960 if (isLineEnd(chAfterPsd) ||
1961 (chAfterPsd == ';' && isLineEnd(chAfterPsd2))) {
1962 i += (((i + psdLength) < lengthDoc) ? psdLength : lengthDoc) - 1;
1963 styler.ColourTo(i, StateToPrint);
1964 state = SCE_HPHP_DEFAULT;
1965 if (foldHeredoc) levelCurrent--;
1968 break;
1969 case SCE_HPHP_HSTRING_VARIABLE:
1970 if (!IsPhpWordChar(chNext)) {
1971 styler.ColourTo(i, StateToPrint);
1972 state = SCE_HPHP_HSTRING;
1974 break;
1975 case SCE_HPHP_COMPLEX_VARIABLE:
1976 if (ch == '}') {
1977 styler.ColourTo(i, StateToPrint);
1978 state = SCE_HPHP_HSTRING;
1980 break;
1981 case SCE_HPHP_OPERATOR:
1982 case SCE_HPHP_DEFAULT:
1983 styler.ColourTo(i - 1, StateToPrint);
1984 if (IsADigit(ch) || (ch == '.' && IsADigit(chNext))) {
1985 state = SCE_HPHP_NUMBER;
1986 } else if (IsAWordStart(ch)) {
1987 state = SCE_HPHP_WORD;
1988 } else if (ch == '/' && chNext == '*') {
1989 i++;
1990 state = SCE_HPHP_COMMENT;
1991 } else if (ch == '/' && chNext == '/') {
1992 i++;
1993 state = SCE_HPHP_COMMENTLINE;
1994 } else if (ch == '#') {
1995 state = SCE_HPHP_COMMENTLINE;
1996 } else if (ch == '\"') {
1997 state = SCE_HPHP_HSTRING;
1998 strcpy(phpStringDelimiter, "\"");
1999 } else if (styler.Match(i, "<<<")) {
2000 bool isSimpleString = false;
2001 i = FindPhpStringDelimiter(phpStringDelimiter, sizeof(phpStringDelimiter), i + 3, lengthDoc, styler, isSimpleString);
2002 if (strlen(phpStringDelimiter)) {
2003 state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING);
2004 if (foldHeredoc) levelCurrent++;
2006 } else if (ch == '\'') {
2007 state = SCE_HPHP_SIMPLESTRING;
2008 strcpy(phpStringDelimiter, "\'");
2009 } else if (ch == '$' && IsPhpWordStart(chNext)) {
2010 state = SCE_HPHP_VARIABLE;
2011 } else if (IsOperator(ch)) {
2012 state = SCE_HPHP_OPERATOR;
2013 } else if ((state == SCE_HPHP_OPERATOR) && (IsASpace(ch))) {
2014 state = SCE_HPHP_DEFAULT;
2016 break;
2017 ///////////// end - PHP state handling
2020 // Some of the above terminated their lexeme but since the same character starts
2021 // the same class again, only reenter if non empty segment.
2023 bool nonEmptySegment = i >= static_cast<int>(styler.GetStartSegment());
2024 if (state == SCE_HB_DEFAULT) { // One of the above succeeded
2025 if ((ch == '\"') && (nonEmptySegment)) {
2026 state = SCE_HB_STRING;
2027 } else if (ch == '\'') {
2028 state = SCE_HB_COMMENTLINE;
2029 } else if (IsAWordStart(ch)) {
2030 state = SCE_HB_WORD;
2031 } else if (IsOperator(ch)) {
2032 styler.ColourTo(i, SCE_HB_DEFAULT);
2034 } else if (state == SCE_HBA_DEFAULT) { // One of the above succeeded
2035 if ((ch == '\"') && (nonEmptySegment)) {
2036 state = SCE_HBA_STRING;
2037 } else if (ch == '\'') {
2038 state = SCE_HBA_COMMENTLINE;
2039 } else if (IsAWordStart(ch)) {
2040 state = SCE_HBA_WORD;
2041 } else if (IsOperator(ch)) {
2042 styler.ColourTo(i, SCE_HBA_DEFAULT);
2044 } else if (state == SCE_HJ_DEFAULT) { // One of the above succeeded
2045 if (ch == '/' && chNext == '*') {
2046 if (styler.SafeGetCharAt(i + 2) == '*')
2047 state = SCE_HJ_COMMENTDOC;
2048 else
2049 state = SCE_HJ_COMMENT;
2050 } else if (ch == '/' && chNext == '/') {
2051 state = SCE_HJ_COMMENTLINE;
2052 } else if ((ch == '\"') && (nonEmptySegment)) {
2053 state = SCE_HJ_DOUBLESTRING;
2054 } else if ((ch == '\'') && (nonEmptySegment)) {
2055 state = SCE_HJ_SINGLESTRING;
2056 } else if (IsAWordStart(ch)) {
2057 state = SCE_HJ_WORD;
2058 } else if (IsOperator(ch)) {
2059 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
2064 switch (state) {
2065 case SCE_HJ_WORD:
2066 classifyWordHTJS(styler.GetStartSegment(), lengthDoc - 1, keywords2, styler, inScriptType);
2067 break;
2068 case SCE_HB_WORD:
2069 classifyWordHTVB(styler.GetStartSegment(), lengthDoc - 1, keywords3, styler, inScriptType);
2070 break;
2071 case SCE_HP_WORD:
2072 classifyWordHTPy(styler.GetStartSegment(), lengthDoc - 1, keywords4, styler, prevWord, inScriptType);
2073 break;
2074 case SCE_HPHP_WORD:
2075 classifyWordHTPHP(styler.GetStartSegment(), lengthDoc - 1, keywords5, styler);
2076 break;
2077 default:
2078 StateToPrint = statePrintForState(state, inScriptType);
2079 styler.ColourTo(lengthDoc - 1, StateToPrint);
2080 break;
2083 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
2084 if (fold) {
2085 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
2086 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
2090 static void ColouriseXMLDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
2091 Accessor &styler) {
2092 // Passing in true because we're lexing XML
2093 ColouriseHyperTextDoc(startPos, length, initStyle, keywordlists, styler, true);
2096 static void ColouriseHTMLDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
2097 Accessor &styler) {
2098 // Passing in false because we're notlexing XML
2099 ColouriseHyperTextDoc(startPos, length, initStyle, keywordlists, styler, false);
2102 static void ColourisePHPScriptDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
2103 Accessor &styler) {
2104 if (startPos == 0)
2105 initStyle = SCE_HPHP_DEFAULT;
2106 ColouriseHTMLDoc(startPos, length, initStyle, keywordlists, styler);
2109 static const char * const htmlWordListDesc[] = {
2110 "HTML elements and attributes",
2111 "JavaScript keywords",
2112 "VBScript keywords",
2113 "Python keywords",
2114 "PHP keywords",
2115 "SGML and DTD keywords",
2119 static const char * const phpscriptWordListDesc[] = {
2120 "", //Unused
2121 "", //Unused
2122 "", //Unused
2123 "", //Unused
2124 "PHP keywords",
2125 "", //Unused
2129 LexerModule lmHTML(SCLEX_HTML, ColouriseHTMLDoc, "hypertext", 0, htmlWordListDesc, 8);
2130 LexerModule lmXML(SCLEX_XML, ColouriseXMLDoc, "xml", 0, htmlWordListDesc, 8);
2131 LexerModule lmPHPSCRIPT(SCLEX_PHPSCRIPT, ColourisePHPScriptDoc, "phpscript", 0, phpscriptWordListDesc, 8);