Refactor snippets_complete_constructs().
[geany-mirror.git] / scintilla / LexHTML.cxx
blob6038326e3eea86bff63c7753a25b81ac43cbeafa
1 // Scintilla source code edit control
2 /** @file LexHTML.cxx
3 ** Lexer for HTML.
4 **/
5 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <stdlib.h>
9 #include <string.h>
10 #include <ctype.h>
11 #include <stdio.h>
12 #include <stdarg.h>
14 #include "Platform.h"
16 #include "PropSet.h"
17 #include "Accessor.h"
18 #include "StyleContext.h"
19 #include "KeyWords.h"
20 #include "Scintilla.h"
21 #include "SciLexer.h"
22 #include "CharacterSet.h"
24 #ifdef SCI_NAMESPACE
25 using namespace Scintilla;
26 #endif
28 #define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
29 #define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
30 #define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)
32 enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock, eScriptComment };
33 enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
35 static inline bool IsAWordChar(const int ch) {
36 return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
39 static inline bool IsAWordStart(const int ch) {
40 return (ch < 0x80) && (isalnum(ch) || ch == '_');
43 inline bool IsOperator(int ch) {
44 if (isascii(ch) && isalnum(ch))
45 return false;
46 // '.' left out as it is used to make up numbers
47 if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
48 ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
49 ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
50 ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
51 ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
52 ch == '?' || ch == '!' || ch == '.' || ch == '~')
53 return true;
54 return false;
57 static inline int MakeLowerCase(int ch) {
58 if (ch < 'A' || ch > 'Z')
59 return ch;
60 else
61 return ch - 'A' + 'a';
64 static void GetTextSegment(Accessor &styler, unsigned int start, unsigned int end, char *s, size_t len) {
65 size_t i = 0;
66 for (; (i < end - start + 1) && (i < len-1); i++) {
67 s[i] = static_cast<char>(MakeLowerCase(styler[start + i]));
69 s[i] = '\0';
72 static const char *GetNextWord(Accessor &styler, unsigned int start, char *s, size_t sLen) {
74 size_t i = 0;
75 for (; i < sLen-1; i++) {
76 char ch = static_cast<char>(styler.SafeGetCharAt(start + i));
77 if ((i == 0) && !IsAWordStart(ch))
78 break;
79 if ((i > 0) && !IsAWordChar(ch))
80 break;
81 s[i] = ch;
83 s[i] = '\0';
85 return s;
88 static script_type segIsScriptingIndicator(Accessor &styler, unsigned int start, unsigned int end, script_type prevValue) {
89 char s[100];
90 GetTextSegment(styler, start, end, s, sizeof(s));
91 //Platform::DebugPrintf("Scripting indicator [%s]\n", s);
92 if (strstr(s, "src")) // External script
93 return eScriptNone;
94 if (strstr(s, "vbs"))
95 return eScriptVBS;
96 if (strstr(s, "pyth"))
97 return eScriptPython;
98 if (strstr(s, "javas"))
99 return eScriptJS;
100 if (strstr(s, "jscr"))
101 return eScriptJS;
102 if (strstr(s, "php"))
103 return eScriptPHP;
104 if (strstr(s, "xml")) {
105 const char *xml = strstr(s, "xml");
106 for (const char *t=s; t<xml; t++) {
107 if (!IsASpace(*t)) {
108 return prevValue;
111 return eScriptXML;
114 return prevValue;
117 static int PrintScriptingIndicatorOffset(Accessor &styler, unsigned int start, unsigned int end) {
118 int iResult = 0;
119 char s[100];
120 GetTextSegment(styler, start, end, s, sizeof(s));
121 if (0 == strncmp(s, "php", 3)) {
122 iResult = 3;
125 return iResult;
128 static script_type ScriptOfState(int state) {
129 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
130 return eScriptPython;
131 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
132 return eScriptVBS;
133 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
134 return eScriptJS;
135 } else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
136 return eScriptPHP;
137 } else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
138 return eScriptSGML;
139 } else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
140 return eScriptSGMLblock;
141 } else {
142 return eScriptNone;
146 static int statePrintForState(int state, script_mode inScriptType) {
147 int StateToPrint = state;
149 if (state >= SCE_HJ_START) {
150 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
151 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
152 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
153 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
154 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
155 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
159 return StateToPrint;
162 static int stateForPrintState(int StateToPrint) {
163 int state;
165 if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
166 state = StateToPrint - SCE_HA_PYTHON;
167 } else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
168 state = StateToPrint - SCE_HA_VBS;
169 } else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
170 state = StateToPrint - SCE_HA_JS;
171 } else {
172 state = StateToPrint;
175 return state;
178 static inline bool IsNumber(unsigned int start, Accessor &styler) {
179 return IsADigit(styler[start]) || (styler[start] == '.') ||
180 (styler[start] == '-') || (styler[start] == '#');
183 static inline bool isStringState(int state) {
184 bool bResult;
186 switch (state) {
187 case SCE_HJ_DOUBLESTRING:
188 case SCE_HJ_SINGLESTRING:
189 case SCE_HJA_DOUBLESTRING:
190 case SCE_HJA_SINGLESTRING:
191 case SCE_HB_STRING:
192 case SCE_HBA_STRING:
193 case SCE_HP_STRING:
194 case SCE_HP_CHARACTER:
195 case SCE_HP_TRIPLE:
196 case SCE_HP_TRIPLEDOUBLE:
197 case SCE_HPA_STRING:
198 case SCE_HPA_CHARACTER:
199 case SCE_HPA_TRIPLE:
200 case SCE_HPA_TRIPLEDOUBLE:
201 case SCE_HPHP_HSTRING:
202 case SCE_HPHP_SIMPLESTRING:
203 case SCE_HPHP_HSTRING_VARIABLE:
204 case SCE_HPHP_COMPLEX_VARIABLE:
205 bResult = true;
206 break;
207 default :
208 bResult = false;
209 break;
211 return bResult;
214 static inline bool stateAllowsTermination(int state) {
215 bool allowTermination = !isStringState(state);
216 if (allowTermination) {
217 switch (state) {
218 case SCE_HB_COMMENTLINE:
219 case SCE_HPHP_COMMENT:
220 case SCE_HP_COMMENTLINE:
221 case SCE_HPA_COMMENTLINE:
222 allowTermination = false;
225 return allowTermination;
228 // not really well done, since it's only comments that should lex the %> and <%
229 static inline bool isCommentASPState(int state) {
230 bool bResult;
232 switch (state) {
233 case SCE_HJ_COMMENT:
234 case SCE_HJ_COMMENTLINE:
235 case SCE_HJ_COMMENTDOC:
236 case SCE_HB_COMMENTLINE:
237 case SCE_HP_COMMENTLINE:
238 case SCE_HPHP_COMMENT:
239 case SCE_HPHP_COMMENTLINE:
240 bResult = true;
241 break;
242 default :
243 bResult = false;
244 break;
246 return bResult;
249 static void classifyAttribHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
250 bool wordIsNumber = IsNumber(start, styler);
251 char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
252 if (wordIsNumber) {
253 chAttr = SCE_H_NUMBER;
254 } else {
255 char s[100];
256 GetTextSegment(styler, start, end, s, sizeof(s));
257 if (keywords.InList(s))
258 chAttr = SCE_H_ATTRIBUTE;
260 if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
261 // No keywords -> all are known
262 chAttr = SCE_H_ATTRIBUTE;
263 styler.ColourTo(end, chAttr);
266 static int classifyTagHTML(unsigned int start, unsigned int end,
267 WordList &keywords, Accessor &styler, bool &tagDontFold,
268 bool caseSensitive, bool isXml, bool allowScripts) {
269 char s[30 + 2];
270 // Copy after the '<'
271 unsigned int i = 0;
272 for (unsigned int cPos = start; cPos <= end && i < 30; cPos++) {
273 char ch = styler[cPos];
274 if ((ch != '<') && (ch != '/')) {
275 s[i++] = caseSensitive ? ch : static_cast<char>(MakeLowerCase(ch));
279 //The following is only a quick hack, to see if this whole thing would work
280 //we first need the tagname with a trailing space...
281 s[i] = ' ';
282 s[i+1] = '\0';
284 // if the current language is XML, I can fold any tag
285 // if the current language is HTML, I don't want to fold certain tags (input, meta, etc.)
286 //...to find it in the list of no-container-tags
287 tagDontFold = (!isXml) && (NULL != strstr("meta link img area br hr input ", s));
289 //now we can remove the trailing space
290 s[i] = '\0';
292 // No keywords -> all are known
293 char chAttr = SCE_H_TAGUNKNOWN;
294 if (s[0] == '!') {
295 chAttr = SCE_H_SGML_DEFAULT;
296 } else if (!keywords || keywords.InList(s)) {
297 chAttr = SCE_H_TAG;
299 styler.ColourTo(end, chAttr);
300 if (chAttr == SCE_H_TAG) {
301 if (allowScripts && 0 == strcmp(s, "script")) {
302 // check to see if this is a self-closing tag by sniffing ahead
303 bool isSelfClose = false;
304 for (unsigned int cPos = end; cPos <= end + 100; cPos++) {
305 char ch = styler.SafeGetCharAt(cPos, '\0');
306 if (ch == '\0' || ch == '>')
307 break;
308 else if (ch == '/' && styler.SafeGetCharAt(cPos + 1, '\0') == '>') {
309 isSelfClose = true;
310 break;
314 // do not enter a script state if the tag self-closed
315 if (!isSelfClose)
316 chAttr = SCE_H_SCRIPT;
317 } else if (!isXml && 0 == strcmp(s, "comment")) {
318 chAttr = SCE_H_COMMENT;
321 return chAttr;
324 static void classifyWordHTJS(unsigned int start, unsigned int end,
325 WordList &keywords, Accessor &styler, script_mode inScriptType) {
326 char chAttr = SCE_HJ_WORD;
327 bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
328 if (wordIsNumber)
329 chAttr = SCE_HJ_NUMBER;
330 else {
331 char s[30 + 1];
332 unsigned int i = 0;
333 for (; i < end - start + 1 && i < 30; i++) {
334 s[i] = styler[start + i];
336 s[i] = '\0';
337 if (keywords.InList(s))
338 chAttr = SCE_HJ_KEYWORD;
340 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
343 static int classifyWordHTVB(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, script_mode inScriptType) {
344 char chAttr = SCE_HB_IDENTIFIER;
345 bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
346 if (wordIsNumber)
347 chAttr = SCE_HB_NUMBER;
348 else {
349 char s[100];
350 GetTextSegment(styler, start, end, s, sizeof(s));
351 if (keywords.InList(s)) {
352 chAttr = SCE_HB_WORD;
353 if (strcmp(s, "rem") == 0)
354 chAttr = SCE_HB_COMMENTLINE;
357 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
358 if (chAttr == SCE_HB_COMMENTLINE)
359 return SCE_HB_COMMENTLINE;
360 else
361 return SCE_HB_DEFAULT;
364 static void classifyWordHTPy(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord, script_mode inScriptType) {
365 bool wordIsNumber = IsADigit(styler[start]);
366 char s[30 + 1];
367 unsigned int i = 0;
368 for (; i < end - start + 1 && i < 30; i++) {
369 s[i] = styler[start + i];
371 s[i] = '\0';
372 char chAttr = SCE_HP_IDENTIFIER;
373 if (0 == strcmp(prevWord, "class"))
374 chAttr = SCE_HP_CLASSNAME;
375 else if (0 == strcmp(prevWord, "def"))
376 chAttr = SCE_HP_DEFNAME;
377 else if (wordIsNumber)
378 chAttr = SCE_HP_NUMBER;
379 else if (keywords.InList(s))
380 chAttr = SCE_HP_WORD;
381 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
382 strcpy(prevWord, s);
385 // Update the word colour to default or keyword
386 // Called when in a PHP word
387 static void classifyWordHTPHP(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
388 char chAttr = SCE_HPHP_DEFAULT;
389 bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
390 if (wordIsNumber)
391 chAttr = SCE_HPHP_NUMBER;
392 else {
393 char s[100];
394 GetTextSegment(styler, start, end, s, sizeof(s));
395 if (keywords.InList(s))
396 chAttr = SCE_HPHP_WORD;
398 styler.ColourTo(end, chAttr);
401 static bool isWordHSGML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
402 char s[30 + 1];
403 unsigned int i = 0;
404 for (; i < end - start + 1 && i < 30; i++) {
405 s[i] = styler[start + i];
407 s[i] = '\0';
408 return keywords.InList(s);
411 static bool isWordCdata(unsigned int start, unsigned int end, Accessor &styler) {
412 char s[30 + 1];
413 unsigned int i = 0;
414 for (; i < end - start + 1 && i < 30; i++) {
415 s[i] = styler[start + i];
417 s[i] = '\0';
418 return (0 == strcmp(s, "[CDATA["));
421 // Return the first state to reach when entering a scripting language
422 static int StateForScript(script_type scriptLanguage) {
423 int Result;
424 switch (scriptLanguage) {
425 case eScriptVBS:
426 Result = SCE_HB_START;
427 break;
428 case eScriptPython:
429 Result = SCE_HP_START;
430 break;
431 case eScriptPHP:
432 Result = SCE_HPHP_DEFAULT;
433 break;
434 case eScriptXML:
435 Result = SCE_H_TAGUNKNOWN;
436 break;
437 case eScriptSGML:
438 Result = SCE_H_SGML_DEFAULT;
439 break;
440 case eScriptComment:
441 Result = SCE_H_COMMENT;
442 break;
443 default :
444 Result = SCE_HJ_START;
445 break;
447 return Result;
450 static inline bool ishtmlwordchar(int ch) {
451 return !isascii(ch) ||
452 (isalnum(ch) || ch == '.' || ch == '-' || ch == '_' || ch == ':' || ch == '!' || ch == '#');
455 static inline bool issgmlwordchar(int ch) {
456 return !isascii(ch) ||
457 (isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
460 static inline bool IsPhpWordStart(int ch) {
461 return (isascii(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
464 static inline bool IsPhpWordChar(int ch) {
465 return IsADigit(ch) || IsPhpWordStart(ch);
468 static bool InTagState(int state) {
469 return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
470 state == SCE_H_SCRIPT ||
471 state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
472 state == SCE_H_NUMBER || state == SCE_H_OTHER ||
473 state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
476 static bool IsCommentState(const int state) {
477 return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
480 static bool IsScriptCommentState(const int state) {
481 return state == SCE_HJ_COMMENT || state == SCE_HJ_COMMENTLINE || state == SCE_HJA_COMMENT ||
482 state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
485 static bool isLineEnd(int ch) {
486 return ch == '\r' || ch == '\n';
489 static bool isOKBeforeRE(int ch) {
490 return (ch == '(') || (ch == '=') || (ch == ',');
493 static bool isMakoBlockEnd(const int ch, const int chNext, const char *blockType) {
494 if (strlen(blockType) == 0) {
495 return ((ch == '%') && (chNext == '>'));
496 } else if ((0 == strcmp(blockType, "inherit")) ||
497 (0 == strcmp(blockType, "namespace")) ||
498 (0 == strcmp(blockType, "include")) ||
499 (0 == strcmp(blockType, "page"))) {
500 return ((ch == '/') && (chNext == '>'));
501 } else if (0 == strcmp(blockType, "%")) {
502 return isLineEnd(ch);
503 } else if (0 == strcmp(blockType, "{")) {
504 return ch == '}';
505 } else {
506 return (ch == '>');
510 static bool isPHPStringState(int state) {
511 return
512 (state == SCE_HPHP_HSTRING) ||
513 (state == SCE_HPHP_SIMPLESTRING) ||
514 (state == SCE_HPHP_HSTRING_VARIABLE) ||
515 (state == SCE_HPHP_COMPLEX_VARIABLE);
518 static int FindPhpStringDelimiter(char *phpStringDelimiter, const int phpStringDelimiterSize, int i, const int lengthDoc, Accessor &styler, bool &isSimpleString) {
519 int j;
520 const int beginning = i - 1;
521 bool isValidSimpleString = false;
523 while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t'))
524 i++;
526 char ch = styler.SafeGetCharAt(i);
527 const char chNext = styler.SafeGetCharAt(i + 1);
528 if (!IsPhpWordStart(ch)) {
529 if (ch == '\'' && IsPhpWordStart(chNext)) {
530 i++;
531 ch = chNext;
532 isSimpleString = true;
533 } else {
534 phpStringDelimiter[0] = '\0';
535 return beginning;
538 phpStringDelimiter[0] = ch;
539 i++;
541 for (j = i; j < lengthDoc && !isLineEnd(styler[j]); j++) {
542 if (!IsPhpWordChar(styler[j])) {
543 if (isSimpleString && (styler[j] == '\'') && isLineEnd(styler.SafeGetCharAt(j + 1))) {
544 isValidSimpleString = true;
545 j++;
546 break;
547 } else {
548 phpStringDelimiter[0] = '\0';
549 return beginning;
552 if (j - i < phpStringDelimiterSize - 2)
553 phpStringDelimiter[j-i+1] = styler[j];
554 else
555 i++;
557 if (isSimpleString && !isValidSimpleString) {
558 phpStringDelimiter[0] = '\0';
559 return beginning;
561 phpStringDelimiter[j-i+1 - (isSimpleString ? 1 : 0)] = '\0';
562 return j - 1;
565 static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
566 Accessor &styler, bool isXml) {
567 WordList &keywords = *keywordlists[0];
568 WordList &keywords2 = *keywordlists[1];
569 WordList &keywords3 = *keywordlists[2];
570 WordList &keywords4 = *keywordlists[3];
571 WordList &keywords5 = *keywordlists[4];
572 WordList &keywords6 = *keywordlists[5]; // SGML (DTD) keywords
574 // Lexer for HTML requires more lexical states (8 bits worth) than most lexers
575 styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
576 char prevWord[200];
577 prevWord[0] = '\0';
578 char nextWord[200];
579 nextWord[0] = '\0';
580 char phpStringDelimiter[200]; // PHP is not limited in length, we are
581 phpStringDelimiter[0] = '\0';
582 int StateToPrint = initStyle;
583 int state = stateForPrintState(StateToPrint);
584 char makoBlockType[200];
585 makoBlockType[0] = '\0';
587 // If inside a tag, it may be a script tag, so reread from the start to ensure any language tags are seen
588 if (InTagState(state)) {
589 while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
590 startPos--;
591 length++;
593 state = SCE_H_DEFAULT;
595 // String can be heredoc, must find a delimiter first. Reread from beginning of line containing the string, to get the correct lineState
596 if (isPHPStringState(state)) {
597 while (startPos > 0 && (isPHPStringState(state) || !isLineEnd(styler[startPos - 1]))) {
598 startPos--;
599 length++;
600 state = styler.StyleAt(startPos);
602 if (startPos == 0)
603 state = SCE_H_DEFAULT;
605 styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
607 int lineCurrent = styler.GetLine(startPos);
608 int lineState;
609 if (lineCurrent > 0) {
610 lineState = styler.GetLineState(lineCurrent);
611 } else {
612 // Default client and ASP scripting language is JavaScript
613 lineState = eScriptJS << 8;
615 // property asp.default.language
616 // Script in ASP code is initially assumed to be in JavaScript.
617 // To change this to VBScript set asp.default.language to 2. Python is 3.
618 lineState |= styler.GetPropertyInt("asp.default.language", eScriptJS) << 4;
620 script_mode inScriptType = script_mode((lineState >> 0) & 0x03); // 2 bits of scripting mode
621 bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
622 bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
623 bool tagDontFold = false; //some HTML tags should not be folded
624 script_type aspScript = script_type((lineState >> 4) & 0x0F); // 4 bits of script name
625 script_type clientScript = script_type((lineState >> 8) & 0x0F); // 4 bits of script name
626 int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
628 script_type scriptLanguage = ScriptOfState(state);
629 // If eNonHtmlScript coincides with SCE_H_COMMENT, assume eScriptComment
630 if (inScriptType == eNonHtmlScript && state == SCE_H_COMMENT) {
631 scriptLanguage = eScriptComment;
634 // property fold.html
635 // Folding is turned on or off for HTML and XML files with this option.
636 // The fold option must also be on for folding to occur.
637 const bool foldHTML = styler.GetPropertyInt("fold.html", 0) != 0;
639 const bool fold = foldHTML && styler.GetPropertyInt("fold", 0);
641 // property fold.html.preprocessor
642 // Folding is turned on or off for scripts embedded in HTML files with this option.
643 // The default is on.
644 const bool foldHTMLPreprocessor = foldHTML && styler.GetPropertyInt("fold.html.preprocessor", 1);
646 const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
648 // property fold.hypertext.comment
649 // Allow folding for comments in scripts embedded in HTML.
650 // The default is off.
651 const bool foldComment = fold && styler.GetPropertyInt("fold.hypertext.comment", 0) != 0;
653 // property fold.hypertext.heredoc
654 // Allow folding for heredocs in scripts embedded in HTML.
655 // The default is off.
656 const bool foldHeredoc = fold && styler.GetPropertyInt("fold.hypertext.heredoc", 0) != 0;
658 // property html.tags.case.sensitive
659 // For XML and HTML, setting this property to 1 will make tags match in a case
660 // sensitive way which is the expected behaviour for XML and XHTML.
661 const bool caseSensitive = styler.GetPropertyInt("html.tags.case.sensitive", 0) != 0;
663 // property lexer.xml.allow.scripts
664 // Set to 0 to disable scripts in XML.
665 const bool allowScripts = styler.GetPropertyInt("lexer.xml.allow.scripts", 1) != 0;
667 // property lexer.html.mako
668 // Set to 1 to enable the mako template language.
669 const bool isMako = styler.GetPropertyInt("lexer.html.mako", 0) != 0;
671 const CharacterSet setHTMLWord(CharacterSet::setAlphaNum, ".-_:!#", 0x80, true);
672 const CharacterSet setTagContinue(CharacterSet::setAlphaNum, ".-_:!#[", 0x80, true);
673 const CharacterSet setAttributeContinue(CharacterSet::setAlphaNum, ".-_:!#/", 0x80, true);
675 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
676 int levelCurrent = levelPrev;
677 int visibleChars = 0;
678 int lineStartVisibleChars = 0;
680 int chPrev = ' ';
681 int ch = ' ';
682 int chPrevNonWhite = ' ';
683 // look back to set chPrevNonWhite properly for better regex colouring
684 if (scriptLanguage == eScriptJS && startPos > 0) {
685 int back = startPos;
686 int style = 0;
687 while (--back) {
688 style = styler.StyleAt(back);
689 if (style < SCE_HJ_DEFAULT || style > SCE_HJ_COMMENTDOC)
690 // includes SCE_HJ_COMMENT & SCE_HJ_COMMENTLINE
691 break;
693 if (style == SCE_HJ_SYMBOLS) {
694 chPrevNonWhite = static_cast<unsigned char>(styler.SafeGetCharAt(back));
698 styler.StartSegment(startPos);
699 const int lengthDoc = startPos + length;
700 for (int i = startPos; i < lengthDoc; i++) {
701 const int chPrev2 = chPrev;
702 chPrev = ch;
703 if (!IsASpace(ch) && state != SCE_HJ_COMMENT &&
704 state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
705 chPrevNonWhite = ch;
706 ch = static_cast<unsigned char>(styler[i]);
707 int chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
708 const int chNext2 = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2));
710 // Handle DBCS codepages
711 if (styler.IsLeadByte(static_cast<char>(ch))) {
712 chPrev = ' ';
713 i += 1;
714 continue;
717 if ((!IsASpace(ch) || !foldCompact) && fold)
718 visibleChars++;
719 if (!IsASpace(ch))
720 lineStartVisibleChars++;
722 // decide what is the current state to print (depending of the script tag)
723 StateToPrint = statePrintForState(state, inScriptType);
725 // handle script folding
726 if (fold) {
727 switch (scriptLanguage) {
728 case eScriptJS:
729 case eScriptPHP:
730 //not currently supported case eScriptVBS:
732 if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
733 //Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
734 //if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
735 if ((ch == '{') || (ch == '}') || (foldComment && (ch == '/') && (chNext == '*'))) {
736 levelCurrent += ((ch == '{') || (ch == '/')) ? 1 : -1;
738 } else if (((state == SCE_HPHP_COMMENT) || (state == SCE_HJ_COMMENT)) && foldComment && (ch == '*') && (chNext == '/')) {
739 levelCurrent--;
741 break;
742 case eScriptPython:
743 if (state != SCE_HP_COMMENTLINE) {
744 if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
745 levelCurrent++;
746 } else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
747 // check if the number of tabs is lower than the level
748 int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
749 for (int j = 0; Findlevel > 0; j++) {
750 char chTmp = styler.SafeGetCharAt(i + j + 1);
751 if (chTmp == '\t') {
752 Findlevel -= 8;
753 } else if (chTmp == ' ') {
754 Findlevel--;
755 } else {
756 break;
760 if (Findlevel > 0) {
761 levelCurrent -= Findlevel / 8;
762 if (Findlevel % 8)
763 levelCurrent--;
767 break;
768 default:
769 break;
773 if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
774 // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
775 // Avoid triggering two times on Dos/Win
776 // New line -> record any line state onto /next/ line
777 if (fold) {
778 int lev = levelPrev;
779 if (visibleChars == 0)
780 lev |= SC_FOLDLEVELWHITEFLAG;
781 if ((levelCurrent > levelPrev) && (visibleChars > 0))
782 lev |= SC_FOLDLEVELHEADERFLAG;
784 styler.SetLevel(lineCurrent, lev);
785 visibleChars = 0;
786 levelPrev = levelCurrent;
788 lineCurrent++;
789 lineStartVisibleChars = 0;
790 styler.SetLineState(lineCurrent,
791 ((inScriptType & 0x03) << 0) |
792 ((tagOpened & 0x01) << 2) |
793 ((tagClosing & 0x01) << 3) |
794 ((aspScript & 0x0F) << 4) |
795 ((clientScript & 0x0F) << 8) |
796 ((beforePreProc & 0xFF) << 12));
799 // Allow falling through to mako handling code if newline is going to end a block
800 if (((ch == '\r' && chNext != '\n') || (ch == '\n')) &&
801 (!isMako || (0 != strcmp(makoBlockType, "%")))) {
804 // generic end of script processing
805 else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
806 // Check if it's the end of the script tag (or any other HTML tag)
807 switch (state) {
808 // in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
809 case SCE_H_DOUBLESTRING:
810 case SCE_H_SINGLESTRING:
811 case SCE_HJ_COMMENT:
812 case SCE_HJ_COMMENTDOC:
813 //case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
814 // the end of script marker from some JS interpreters.
815 case SCE_HB_COMMENTLINE:
816 case SCE_HBA_COMMENTLINE:
817 case SCE_HJ_DOUBLESTRING:
818 case SCE_HJ_SINGLESTRING:
819 case SCE_HJ_REGEX:
820 case SCE_HB_STRING:
821 case SCE_HBA_STRING:
822 case SCE_HP_STRING:
823 case SCE_HP_TRIPLE:
824 case SCE_HP_TRIPLEDOUBLE:
825 case SCE_HPHP_HSTRING:
826 case SCE_HPHP_SIMPLESTRING:
827 case SCE_HPHP_COMMENT:
828 case SCE_HPHP_COMMENTLINE:
829 break;
830 default :
831 // check if the closing tag is a script tag
832 if (const char *tag =
833 state == SCE_HJ_COMMENTLINE || isXml ? "script" :
834 state == SCE_H_COMMENT ? "comment" : 0) {
835 int j = i + 2;
836 int chr;
837 do {
838 chr = static_cast<int>(*tag++);
839 } while (chr != 0 && chr == MakeLowerCase(styler.SafeGetCharAt(j++)));
840 if (chr != 0) break;
842 // closing tag of the script (it's a closing HTML tag anyway)
843 styler.ColourTo(i - 1, StateToPrint);
844 state = SCE_H_TAGUNKNOWN;
845 inScriptType = eHtml;
846 scriptLanguage = eScriptNone;
847 clientScript = eScriptJS;
848 i += 2;
849 visibleChars += 2;
850 tagClosing = true;
851 continue;
855 /////////////////////////////////////
856 // handle the start of PHP pre-processor = Non-HTML
857 else if ((state != SCE_H_ASPAT) &&
858 !isPHPStringState(state) &&
859 (state != SCE_HPHP_COMMENT) &&
860 (ch == '<') &&
861 (chNext == '?') &&
862 !IsScriptCommentState(state) ) {
863 scriptLanguage = segIsScriptingIndicator(styler, i + 2, i + 6, eScriptPHP);
864 if (scriptLanguage != eScriptPHP && isStringState(state)) continue;
865 styler.ColourTo(i - 1, StateToPrint);
866 beforePreProc = state;
867 i++;
868 visibleChars++;
869 i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 6);
870 if (scriptLanguage == eScriptXML)
871 styler.ColourTo(i, SCE_H_XMLSTART);
872 else
873 styler.ColourTo(i, SCE_H_QUESTION);
874 state = StateForScript(scriptLanguage);
875 if (inScriptType == eNonHtmlScript)
876 inScriptType = eNonHtmlScriptPreProc;
877 else
878 inScriptType = eNonHtmlPreProc;
879 // Fold whole script, but not if the XML first tag (all XML-like tags in this case)
880 if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
881 levelCurrent++;
883 // should be better
884 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
885 continue;
888 // handle the start Mako template Python code
889 else if (isMako && scriptLanguage == eScriptNone && ((ch == '<' && chNext == '%') ||
890 (lineStartVisibleChars == 1 && ch == '%') ||
891 (ch == '$' && chNext == '{') ||
892 (ch == '<' && chNext == '/' && chNext2 == '%'))) {
893 if (ch == '%')
894 strcpy(makoBlockType, "%");
895 else if (ch == '$')
896 strcpy(makoBlockType, "{");
897 else if (chNext == '/')
898 GetNextWord(styler, i+3, makoBlockType, sizeof(makoBlockType));
899 else
900 GetNextWord(styler, i+2, makoBlockType, sizeof(makoBlockType));
901 styler.ColourTo(i - 1, StateToPrint);
902 beforePreProc = state;
903 if (inScriptType == eNonHtmlScript)
904 inScriptType = eNonHtmlScriptPreProc;
905 else
906 inScriptType = eNonHtmlPreProc;
908 if (chNext == '/') {
909 i += 2;
910 visibleChars += 2;
911 } else if (ch != '%') {
912 i++;
913 visibleChars++;
915 state = SCE_HP_START;
916 scriptLanguage = eScriptPython;
917 styler.ColourTo(i, SCE_H_ASP);
918 if (foldHTMLPreprocessor && ch == '<')
919 levelCurrent++;
921 if (ch != '%' && ch != '$') {
922 i += strlen(makoBlockType);
923 visibleChars += strlen(makoBlockType);
924 if (keywords4.InList(makoBlockType))
925 styler.ColourTo(i, SCE_HP_WORD);
926 else
927 styler.ColourTo(i, SCE_H_TAGUNKNOWN);
930 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
931 continue;
934 // handle the start of ASP pre-processor = Non-HTML
935 else if (!isMako && !isCommentASPState(state) && (ch == '<') && (chNext == '%') && !isPHPStringState(state)) {
936 styler.ColourTo(i - 1, StateToPrint);
937 beforePreProc = state;
938 if (inScriptType == eNonHtmlScript)
939 inScriptType = eNonHtmlScriptPreProc;
940 else
941 inScriptType = eNonHtmlPreProc;
943 if (chNext2 == '@') {
944 i += 2; // place as if it was the second next char treated
945 visibleChars += 2;
946 state = SCE_H_ASPAT;
947 } else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
948 styler.ColourTo(i + 3, SCE_H_ASP);
949 state = SCE_H_XCCOMMENT;
950 scriptLanguage = eScriptVBS;
951 continue;
952 } else {
953 if (chNext2 == '=') {
954 i += 2; // place as if it was the second next char treated
955 visibleChars += 2;
956 } else {
957 i++; // place as if it was the next char treated
958 visibleChars++;
961 state = StateForScript(aspScript);
963 scriptLanguage = eScriptVBS;
964 styler.ColourTo(i, SCE_H_ASP);
965 // fold whole script
966 if (foldHTMLPreprocessor)
967 levelCurrent++;
968 // should be better
969 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
970 continue;
973 /////////////////////////////////////
974 // handle the start of SGML language (DTD)
975 else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
976 (chPrev == '<') &&
977 (ch == '!') &&
978 (StateToPrint != SCE_H_CDATA) &&
979 (!IsCommentState(StateToPrint)) &&
980 (!IsScriptCommentState(StateToPrint)) ) {
981 beforePreProc = state;
982 styler.ColourTo(i - 2, StateToPrint);
983 if ((chNext == '-') && (chNext2 == '-')) {
984 state = SCE_H_COMMENT; // wait for a pending command
985 styler.ColourTo(i + 2, SCE_H_COMMENT);
986 i += 2; // follow styling after the --
987 } else if (isWordCdata(i + 1, i + 7, styler)) {
988 state = SCE_H_CDATA;
989 } else {
990 styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
991 scriptLanguage = eScriptSGML;
992 state = SCE_H_SGML_COMMAND; // wait for a pending command
994 // fold whole tag (-- when closing the tag)
995 if (foldHTMLPreprocessor || (state == SCE_H_COMMENT))
996 levelCurrent++;
997 continue;
1000 // handle the end of Mako Python code
1001 else if (isMako &&
1002 ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1003 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1004 isMakoBlockEnd(ch, chNext, makoBlockType)) {
1005 if (state == SCE_H_ASPAT) {
1006 aspScript = segIsScriptingIndicator(styler,
1007 styler.GetStartSegment(), i - 1, aspScript);
1009 if (state == SCE_HP_WORD) {
1010 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1011 } else {
1012 styler.ColourTo(i - 1, StateToPrint);
1014 if (0 != strcmp(makoBlockType, "%") && (0 != strcmp(makoBlockType, "{")) && ch != '>') {
1015 i++;
1016 visibleChars++;
1018 if (0 != strcmp(makoBlockType, "%")) {
1019 styler.ColourTo(i, SCE_H_ASP);
1021 state = beforePreProc;
1022 if (inScriptType == eNonHtmlScriptPreProc)
1023 inScriptType = eNonHtmlScript;
1024 else
1025 inScriptType = eHtml;
1026 if (foldHTMLPreprocessor && ch != '\n' && ch != '\r') {
1027 levelCurrent--;
1029 scriptLanguage = eScriptNone;
1030 continue;
1033 // handle the end of a pre-processor = Non-HTML
1034 else if ((!isMako && ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1035 (((scriptLanguage != eScriptNone) && stateAllowsTermination(state))) &&
1036 (((ch == '%') || (ch == '?')) && (chNext == '>'))) ||
1037 ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
1038 if (state == SCE_H_ASPAT) {
1039 aspScript = segIsScriptingIndicator(styler,
1040 styler.GetStartSegment(), i - 1, aspScript);
1042 // Bounce out of any ASP mode
1043 switch (state) {
1044 case SCE_HJ_WORD:
1045 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1046 break;
1047 case SCE_HB_WORD:
1048 classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1049 break;
1050 case SCE_HP_WORD:
1051 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1052 break;
1053 case SCE_HPHP_WORD:
1054 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1055 break;
1056 case SCE_H_XCCOMMENT:
1057 styler.ColourTo(i - 1, state);
1058 break;
1059 default :
1060 styler.ColourTo(i - 1, StateToPrint);
1061 break;
1063 if (scriptLanguage != eScriptSGML) {
1064 i++;
1065 visibleChars++;
1067 if (ch == '%')
1068 styler.ColourTo(i, SCE_H_ASP);
1069 else if (scriptLanguage == eScriptXML)
1070 styler.ColourTo(i, SCE_H_XMLEND);
1071 else if (scriptLanguage == eScriptSGML)
1072 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1073 else
1074 styler.ColourTo(i, SCE_H_QUESTION);
1075 state = beforePreProc;
1076 if (inScriptType == eNonHtmlScriptPreProc)
1077 inScriptType = eNonHtmlScript;
1078 else
1079 inScriptType = eHtml;
1080 // Unfold all scripting languages, except for XML tag
1081 if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
1082 levelCurrent--;
1084 scriptLanguage = eScriptNone;
1085 continue;
1087 /////////////////////////////////////
1089 switch (state) {
1090 case SCE_H_DEFAULT:
1091 if (ch == '<') {
1092 // in HTML, fold on tag open and unfold on tag close
1093 tagOpened = true;
1094 tagClosing = (chNext == '/');
1095 styler.ColourTo(i - 1, StateToPrint);
1096 if (chNext != '!')
1097 state = SCE_H_TAGUNKNOWN;
1098 } else if (ch == '&') {
1099 styler.ColourTo(i - 1, SCE_H_DEFAULT);
1100 state = SCE_H_ENTITY;
1102 break;
1103 case SCE_H_SGML_DEFAULT:
1104 case SCE_H_SGML_BLOCK_DEFAULT:
1105 // if (scriptLanguage == eScriptSGMLblock)
1106 // StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
1108 if (ch == '\"') {
1109 styler.ColourTo(i - 1, StateToPrint);
1110 state = SCE_H_SGML_DOUBLESTRING;
1111 } else if (ch == '\'') {
1112 styler.ColourTo(i - 1, StateToPrint);
1113 state = SCE_H_SGML_SIMPLESTRING;
1114 } else if ((ch == '-') && (chPrev == '-')) {
1115 if (static_cast<int>(styler.GetStartSegment()) <= (i - 2)) {
1116 styler.ColourTo(i - 2, StateToPrint);
1118 state = SCE_H_SGML_COMMENT;
1119 } else if (isascii(ch) && isalpha(ch) && (chPrev == '%')) {
1120 styler.ColourTo(i - 2, StateToPrint);
1121 state = SCE_H_SGML_ENTITY;
1122 } else if (ch == '#') {
1123 styler.ColourTo(i - 1, StateToPrint);
1124 state = SCE_H_SGML_SPECIAL;
1125 } else if (ch == '[') {
1126 styler.ColourTo(i - 1, StateToPrint);
1127 scriptLanguage = eScriptSGMLblock;
1128 state = SCE_H_SGML_BLOCK_DEFAULT;
1129 } else if (ch == ']') {
1130 if (scriptLanguage == eScriptSGMLblock) {
1131 styler.ColourTo(i, StateToPrint);
1132 scriptLanguage = eScriptSGML;
1133 } else {
1134 styler.ColourTo(i - 1, StateToPrint);
1135 styler.ColourTo(i, SCE_H_SGML_ERROR);
1137 state = SCE_H_SGML_DEFAULT;
1138 } else if (scriptLanguage == eScriptSGMLblock) {
1139 if ((ch == '!') && (chPrev == '<')) {
1140 styler.ColourTo(i - 2, StateToPrint);
1141 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1142 state = SCE_H_SGML_COMMAND;
1143 } else if (ch == '>') {
1144 styler.ColourTo(i - 1, StateToPrint);
1145 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1148 break;
1149 case SCE_H_SGML_COMMAND:
1150 if ((ch == '-') && (chPrev == '-')) {
1151 styler.ColourTo(i - 2, StateToPrint);
1152 state = SCE_H_SGML_COMMENT;
1153 } else if (!issgmlwordchar(ch)) {
1154 if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
1155 styler.ColourTo(i - 1, StateToPrint);
1156 state = SCE_H_SGML_1ST_PARAM;
1157 } else {
1158 state = SCE_H_SGML_ERROR;
1161 break;
1162 case SCE_H_SGML_1ST_PARAM:
1163 // wait for the beginning of the word
1164 if ((ch == '-') && (chPrev == '-')) {
1165 if (scriptLanguage == eScriptSGMLblock) {
1166 styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
1167 } else {
1168 styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
1170 state = SCE_H_SGML_1ST_PARAM_COMMENT;
1171 } else if (issgmlwordchar(ch)) {
1172 if (scriptLanguage == eScriptSGMLblock) {
1173 styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
1174 } else {
1175 styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
1177 // find the length of the word
1178 int size = 1;
1179 while (setHTMLWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(i + size))))
1180 size++;
1181 styler.ColourTo(i + size - 1, StateToPrint);
1182 i += size - 1;
1183 visibleChars += size - 1;
1184 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1185 if (scriptLanguage == eScriptSGMLblock) {
1186 state = SCE_H_SGML_BLOCK_DEFAULT;
1187 } else {
1188 state = SCE_H_SGML_DEFAULT;
1190 continue;
1192 break;
1193 case SCE_H_SGML_ERROR:
1194 if ((ch == '-') && (chPrev == '-')) {
1195 styler.ColourTo(i - 2, StateToPrint);
1196 state = SCE_H_SGML_COMMENT;
1198 case SCE_H_SGML_DOUBLESTRING:
1199 if (ch == '\"') {
1200 styler.ColourTo(i, StateToPrint);
1201 state = SCE_H_SGML_DEFAULT;
1203 break;
1204 case SCE_H_SGML_SIMPLESTRING:
1205 if (ch == '\'') {
1206 styler.ColourTo(i, StateToPrint);
1207 state = SCE_H_SGML_DEFAULT;
1209 break;
1210 case SCE_H_SGML_COMMENT:
1211 if ((ch == '-') && (chPrev == '-')) {
1212 styler.ColourTo(i, StateToPrint);
1213 state = SCE_H_SGML_DEFAULT;
1215 break;
1216 case SCE_H_CDATA:
1217 if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
1218 styler.ColourTo(i, StateToPrint);
1219 state = SCE_H_DEFAULT;
1220 levelCurrent--;
1222 break;
1223 case SCE_H_COMMENT:
1224 if ((scriptLanguage != eScriptComment) && (chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
1225 styler.ColourTo(i, StateToPrint);
1226 state = SCE_H_DEFAULT;
1227 levelCurrent--;
1229 break;
1230 case SCE_H_SGML_1ST_PARAM_COMMENT:
1231 if ((ch == '-') && (chPrev == '-')) {
1232 styler.ColourTo(i, SCE_H_SGML_COMMENT);
1233 state = SCE_H_SGML_1ST_PARAM;
1235 break;
1236 case SCE_H_SGML_SPECIAL:
1237 if (!(isascii(ch) && isupper(ch))) {
1238 styler.ColourTo(i - 1, StateToPrint);
1239 if (isalnum(ch)) {
1240 state = SCE_H_SGML_ERROR;
1241 } else {
1242 state = SCE_H_SGML_DEFAULT;
1245 break;
1246 case SCE_H_SGML_ENTITY:
1247 if (ch == ';') {
1248 styler.ColourTo(i, StateToPrint);
1249 state = SCE_H_SGML_DEFAULT;
1250 } else if (!(isascii(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
1251 styler.ColourTo(i, SCE_H_SGML_ERROR);
1252 state = SCE_H_SGML_DEFAULT;
1254 break;
1255 case SCE_H_ENTITY:
1256 if (ch == ';') {
1257 styler.ColourTo(i, StateToPrint);
1258 state = SCE_H_DEFAULT;
1260 if (ch != '#' && !(isascii(ch) && isalnum(ch)) // Should check that '#' follows '&', but it is unlikely anyway...
1261 && ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
1262 if (!isascii(ch)) // Possibly start of a multibyte character so don't allow this byte to be in entity style
1263 styler.ColourTo(i-1, SCE_H_TAGUNKNOWN);
1264 else
1265 styler.ColourTo(i, SCE_H_TAGUNKNOWN);
1266 state = SCE_H_DEFAULT;
1268 break;
1269 case SCE_H_TAGUNKNOWN:
1270 if (!setTagContinue.Contains(ch) && !((ch == '/') && (chPrev == '<'))) {
1271 int eClass = classifyTagHTML(styler.GetStartSegment(),
1272 i - 1, keywords, styler, tagDontFold, caseSensitive, isXml, allowScripts);
1273 if (eClass == SCE_H_SCRIPT || eClass == SCE_H_COMMENT) {
1274 if (!tagClosing) {
1275 inScriptType = eNonHtmlScript;
1276 scriptLanguage = eClass == SCE_H_SCRIPT ? clientScript : eScriptComment;
1277 } else {
1278 scriptLanguage = eScriptNone;
1280 eClass = SCE_H_TAG;
1282 if (ch == '>') {
1283 styler.ColourTo(i, eClass);
1284 if (inScriptType == eNonHtmlScript) {
1285 state = StateForScript(scriptLanguage);
1286 } else {
1287 state = SCE_H_DEFAULT;
1289 tagOpened = false;
1290 if (!tagDontFold) {
1291 if (tagClosing) {
1292 levelCurrent--;
1293 } else {
1294 levelCurrent++;
1297 tagClosing = false;
1298 } else if (ch == '/' && chNext == '>') {
1299 if (eClass == SCE_H_TAGUNKNOWN) {
1300 styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
1301 } else {
1302 styler.ColourTo(i - 1, StateToPrint);
1303 styler.ColourTo(i + 1, SCE_H_TAGEND);
1305 i++;
1306 ch = chNext;
1307 state = SCE_H_DEFAULT;
1308 tagOpened = false;
1309 } else {
1310 if (eClass != SCE_H_TAGUNKNOWN) {
1311 if (eClass == SCE_H_SGML_DEFAULT) {
1312 state = SCE_H_SGML_DEFAULT;
1313 } else {
1314 state = SCE_H_OTHER;
1319 break;
1320 case SCE_H_ATTRIBUTE:
1321 if (!setAttributeContinue.Contains(ch)) {
1322 if (inScriptType == eNonHtmlScript) {
1323 int scriptLanguagePrev = scriptLanguage;
1324 clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
1325 scriptLanguage = clientScript;
1326 if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
1327 inScriptType = eHtml;
1329 classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
1330 if (ch == '>') {
1331 styler.ColourTo(i, SCE_H_TAG);
1332 if (inScriptType == eNonHtmlScript) {
1333 state = StateForScript(scriptLanguage);
1334 } else {
1335 state = SCE_H_DEFAULT;
1337 tagOpened = false;
1338 if (!tagDontFold) {
1339 if (tagClosing) {
1340 levelCurrent--;
1341 } else {
1342 levelCurrent++;
1345 tagClosing = false;
1346 } else if (ch == '=') {
1347 styler.ColourTo(i, SCE_H_OTHER);
1348 state = SCE_H_VALUE;
1349 } else {
1350 state = SCE_H_OTHER;
1353 break;
1354 case SCE_H_OTHER:
1355 if (ch == '>') {
1356 styler.ColourTo(i - 1, StateToPrint);
1357 styler.ColourTo(i, SCE_H_TAG);
1358 if (inScriptType == eNonHtmlScript) {
1359 state = StateForScript(scriptLanguage);
1360 } else {
1361 state = SCE_H_DEFAULT;
1363 tagOpened = false;
1364 if (!tagDontFold) {
1365 if (tagClosing) {
1366 levelCurrent--;
1367 } else {
1368 levelCurrent++;
1371 tagClosing = false;
1372 } else if (ch == '\"') {
1373 styler.ColourTo(i - 1, StateToPrint);
1374 state = SCE_H_DOUBLESTRING;
1375 } else if (ch == '\'') {
1376 styler.ColourTo(i - 1, StateToPrint);
1377 state = SCE_H_SINGLESTRING;
1378 } else if (ch == '=') {
1379 styler.ColourTo(i, StateToPrint);
1380 state = SCE_H_VALUE;
1381 } else if (ch == '/' && chNext == '>') {
1382 styler.ColourTo(i - 1, StateToPrint);
1383 styler.ColourTo(i + 1, SCE_H_TAGEND);
1384 i++;
1385 ch = chNext;
1386 state = SCE_H_DEFAULT;
1387 tagOpened = false;
1388 } else if (ch == '?' && chNext == '>') {
1389 styler.ColourTo(i - 1, StateToPrint);
1390 styler.ColourTo(i + 1, SCE_H_XMLEND);
1391 i++;
1392 ch = chNext;
1393 state = SCE_H_DEFAULT;
1394 } else if (setHTMLWord.Contains(ch)) {
1395 styler.ColourTo(i - 1, StateToPrint);
1396 state = SCE_H_ATTRIBUTE;
1398 break;
1399 case SCE_H_DOUBLESTRING:
1400 if (ch == '\"') {
1401 if (inScriptType == eNonHtmlScript) {
1402 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1404 styler.ColourTo(i, SCE_H_DOUBLESTRING);
1405 state = SCE_H_OTHER;
1407 break;
1408 case SCE_H_SINGLESTRING:
1409 if (ch == '\'') {
1410 if (inScriptType == eNonHtmlScript) {
1411 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1413 styler.ColourTo(i, SCE_H_SINGLESTRING);
1414 state = SCE_H_OTHER;
1416 break;
1417 case SCE_H_VALUE:
1418 if (!setHTMLWord.Contains(ch)) {
1419 if (ch == '\"' && chPrev == '=') {
1420 // Should really test for being first character
1421 state = SCE_H_DOUBLESTRING;
1422 } else if (ch == '\'' && chPrev == '=') {
1423 state = SCE_H_SINGLESTRING;
1424 } else {
1425 if (IsNumber(styler.GetStartSegment(), styler)) {
1426 styler.ColourTo(i - 1, SCE_H_NUMBER);
1427 } else {
1428 styler.ColourTo(i - 1, StateToPrint);
1430 if (ch == '>') {
1431 styler.ColourTo(i, SCE_H_TAG);
1432 if (inScriptType == eNonHtmlScript) {
1433 state = StateForScript(scriptLanguage);
1434 } else {
1435 state = SCE_H_DEFAULT;
1437 tagOpened = false;
1438 if (!tagDontFold) {
1439 if (tagClosing) {
1440 levelCurrent--;
1441 } else {
1442 levelCurrent++;
1445 tagClosing = false;
1446 } else {
1447 state = SCE_H_OTHER;
1451 break;
1452 case SCE_HJ_DEFAULT:
1453 case SCE_HJ_START:
1454 case SCE_HJ_SYMBOLS:
1455 if (IsAWordStart(ch)) {
1456 styler.ColourTo(i - 1, StateToPrint);
1457 state = SCE_HJ_WORD;
1458 } else if (ch == '/' && chNext == '*') {
1459 styler.ColourTo(i - 1, StateToPrint);
1460 if (chNext2 == '*')
1461 state = SCE_HJ_COMMENTDOC;
1462 else
1463 state = SCE_HJ_COMMENT;
1464 } else if (ch == '/' && chNext == '/') {
1465 styler.ColourTo(i - 1, StateToPrint);
1466 state = SCE_HJ_COMMENTLINE;
1467 } else if (ch == '/' && isOKBeforeRE(chPrevNonWhite)) {
1468 styler.ColourTo(i - 1, StateToPrint);
1469 state = SCE_HJ_REGEX;
1470 } else if (ch == '\"') {
1471 styler.ColourTo(i - 1, StateToPrint);
1472 state = SCE_HJ_DOUBLESTRING;
1473 } else if (ch == '\'') {
1474 styler.ColourTo(i - 1, StateToPrint);
1475 state = SCE_HJ_SINGLESTRING;
1476 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1477 styler.SafeGetCharAt(i + 3) == '-') {
1478 styler.ColourTo(i - 1, StateToPrint);
1479 state = SCE_HJ_COMMENTLINE;
1480 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1481 styler.ColourTo(i - 1, StateToPrint);
1482 state = SCE_HJ_COMMENTLINE;
1483 i += 2;
1484 } else if (IsOperator(ch)) {
1485 styler.ColourTo(i - 1, StateToPrint);
1486 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1487 state = SCE_HJ_DEFAULT;
1488 } else if ((ch == ' ') || (ch == '\t')) {
1489 if (state == SCE_HJ_START) {
1490 styler.ColourTo(i - 1, StateToPrint);
1491 state = SCE_HJ_DEFAULT;
1494 break;
1495 case SCE_HJ_WORD:
1496 if (!IsAWordChar(ch)) {
1497 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1498 //styler.ColourTo(i - 1, eHTJSKeyword);
1499 state = SCE_HJ_DEFAULT;
1500 if (ch == '/' && chNext == '*') {
1501 if (chNext2 == '*')
1502 state = SCE_HJ_COMMENTDOC;
1503 else
1504 state = SCE_HJ_COMMENT;
1505 } else if (ch == '/' && chNext == '/') {
1506 state = SCE_HJ_COMMENTLINE;
1507 } else if (ch == '\"') {
1508 state = SCE_HJ_DOUBLESTRING;
1509 } else if (ch == '\'') {
1510 state = SCE_HJ_SINGLESTRING;
1511 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1512 styler.ColourTo(i - 1, StateToPrint);
1513 state = SCE_HJ_COMMENTLINE;
1514 i += 2;
1515 } else if (IsOperator(ch)) {
1516 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1517 state = SCE_HJ_DEFAULT;
1520 break;
1521 case SCE_HJ_COMMENT:
1522 case SCE_HJ_COMMENTDOC:
1523 if (ch == '/' && chPrev == '*') {
1524 styler.ColourTo(i, StateToPrint);
1525 state = SCE_HJ_DEFAULT;
1526 ch = ' ';
1528 break;
1529 case SCE_HJ_COMMENTLINE:
1530 if (ch == '\r' || ch == '\n') {
1531 styler.ColourTo(i - 1, statePrintForState(SCE_HJ_COMMENTLINE, inScriptType));
1532 state = SCE_HJ_DEFAULT;
1533 ch = ' ';
1535 break;
1536 case SCE_HJ_DOUBLESTRING:
1537 if (ch == '\\') {
1538 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1539 i++;
1541 } else if (ch == '\"') {
1542 styler.ColourTo(i, statePrintForState(SCE_HJ_DOUBLESTRING, inScriptType));
1543 state = SCE_HJ_DEFAULT;
1544 } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1545 styler.ColourTo(i - 1, StateToPrint);
1546 state = SCE_HJ_COMMENTLINE;
1547 i += 2;
1548 } else if (isLineEnd(ch)) {
1549 styler.ColourTo(i - 1, StateToPrint);
1550 state = SCE_HJ_STRINGEOL;
1552 break;
1553 case SCE_HJ_SINGLESTRING:
1554 if (ch == '\\') {
1555 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1556 i++;
1558 } else if (ch == '\'') {
1559 styler.ColourTo(i, statePrintForState(SCE_HJ_SINGLESTRING, inScriptType));
1560 state = SCE_HJ_DEFAULT;
1561 } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1562 styler.ColourTo(i - 1, StateToPrint);
1563 state = SCE_HJ_COMMENTLINE;
1564 i += 2;
1565 } else if (isLineEnd(ch)) {
1566 styler.ColourTo(i - 1, StateToPrint);
1567 state = SCE_HJ_STRINGEOL;
1569 break;
1570 case SCE_HJ_STRINGEOL:
1571 if (!isLineEnd(ch)) {
1572 styler.ColourTo(i - 1, StateToPrint);
1573 state = SCE_HJ_DEFAULT;
1574 } else if (!isLineEnd(chNext)) {
1575 styler.ColourTo(i, StateToPrint);
1576 state = SCE_HJ_DEFAULT;
1578 break;
1579 case SCE_HJ_REGEX:
1580 if (ch == '\r' || ch == '\n' || ch == '/') {
1581 if (ch == '/') {
1582 while (isascii(chNext) && islower(chNext)) { // gobble regex flags
1583 i++;
1584 ch = chNext;
1585 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1588 styler.ColourTo(i, StateToPrint);
1589 state = SCE_HJ_DEFAULT;
1590 } else if (ch == '\\') {
1591 // Gobble up the quoted character
1592 if (chNext == '\\' || chNext == '/') {
1593 i++;
1594 ch = chNext;
1595 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1598 break;
1599 case SCE_HB_DEFAULT:
1600 case SCE_HB_START:
1601 if (IsAWordStart(ch)) {
1602 styler.ColourTo(i - 1, StateToPrint);
1603 state = SCE_HB_WORD;
1604 } else if (ch == '\'') {
1605 styler.ColourTo(i - 1, StateToPrint);
1606 state = SCE_HB_COMMENTLINE;
1607 } else if (ch == '\"') {
1608 styler.ColourTo(i - 1, StateToPrint);
1609 state = SCE_HB_STRING;
1610 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1611 styler.SafeGetCharAt(i + 3) == '-') {
1612 styler.ColourTo(i - 1, StateToPrint);
1613 state = SCE_HB_COMMENTLINE;
1614 } else if (IsOperator(ch)) {
1615 styler.ColourTo(i - 1, StateToPrint);
1616 styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1617 state = SCE_HB_DEFAULT;
1618 } else if ((ch == ' ') || (ch == '\t')) {
1619 if (state == SCE_HB_START) {
1620 styler.ColourTo(i - 1, StateToPrint);
1621 state = SCE_HB_DEFAULT;
1624 break;
1625 case SCE_HB_WORD:
1626 if (!IsAWordChar(ch)) {
1627 state = classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1628 if (state == SCE_HB_DEFAULT) {
1629 if (ch == '\"') {
1630 state = SCE_HB_STRING;
1631 } else if (ch == '\'') {
1632 state = SCE_HB_COMMENTLINE;
1633 } else if (IsOperator(ch)) {
1634 styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1635 state = SCE_HB_DEFAULT;
1639 break;
1640 case SCE_HB_STRING:
1641 if (ch == '\"') {
1642 styler.ColourTo(i, StateToPrint);
1643 state = SCE_HB_DEFAULT;
1644 } else if (ch == '\r' || ch == '\n') {
1645 styler.ColourTo(i - 1, StateToPrint);
1646 state = SCE_HB_STRINGEOL;
1648 break;
1649 case SCE_HB_COMMENTLINE:
1650 if (ch == '\r' || ch == '\n') {
1651 styler.ColourTo(i - 1, StateToPrint);
1652 state = SCE_HB_DEFAULT;
1654 break;
1655 case SCE_HB_STRINGEOL:
1656 if (!isLineEnd(ch)) {
1657 styler.ColourTo(i - 1, StateToPrint);
1658 state = SCE_HB_DEFAULT;
1659 } else if (!isLineEnd(chNext)) {
1660 styler.ColourTo(i, StateToPrint);
1661 state = SCE_HB_DEFAULT;
1663 break;
1664 case SCE_HP_DEFAULT:
1665 case SCE_HP_START:
1666 if (IsAWordStart(ch)) {
1667 styler.ColourTo(i - 1, StateToPrint);
1668 state = SCE_HP_WORD;
1669 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1670 styler.SafeGetCharAt(i + 3) == '-') {
1671 styler.ColourTo(i - 1, StateToPrint);
1672 state = SCE_HP_COMMENTLINE;
1673 } else if (ch == '#') {
1674 styler.ColourTo(i - 1, StateToPrint);
1675 state = SCE_HP_COMMENTLINE;
1676 } else if (ch == '\"') {
1677 styler.ColourTo(i - 1, StateToPrint);
1678 if (chNext == '\"' && chNext2 == '\"') {
1679 i += 2;
1680 state = SCE_HP_TRIPLEDOUBLE;
1681 ch = ' ';
1682 chPrev = ' ';
1683 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1684 } else {
1685 // state = statePrintForState(SCE_HP_STRING,inScriptType);
1686 state = SCE_HP_STRING;
1688 } else if (ch == '\'') {
1689 styler.ColourTo(i - 1, StateToPrint);
1690 if (chNext == '\'' && chNext2 == '\'') {
1691 i += 2;
1692 state = SCE_HP_TRIPLE;
1693 ch = ' ';
1694 chPrev = ' ';
1695 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1696 } else {
1697 state = SCE_HP_CHARACTER;
1699 } else if (IsOperator(ch)) {
1700 styler.ColourTo(i - 1, StateToPrint);
1701 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1702 } else if ((ch == ' ') || (ch == '\t')) {
1703 if (state == SCE_HP_START) {
1704 styler.ColourTo(i - 1, StateToPrint);
1705 state = SCE_HP_DEFAULT;
1708 break;
1709 case SCE_HP_WORD:
1710 if (!IsAWordChar(ch)) {
1711 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1712 state = SCE_HP_DEFAULT;
1713 if (ch == '#') {
1714 state = SCE_HP_COMMENTLINE;
1715 } else if (ch == '\"') {
1716 if (chNext == '\"' && chNext2 == '\"') {
1717 i += 2;
1718 state = SCE_HP_TRIPLEDOUBLE;
1719 ch = ' ';
1720 chPrev = ' ';
1721 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1722 } else {
1723 state = SCE_HP_STRING;
1725 } else if (ch == '\'') {
1726 if (chNext == '\'' && chNext2 == '\'') {
1727 i += 2;
1728 state = SCE_HP_TRIPLE;
1729 ch = ' ';
1730 chPrev = ' ';
1731 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1732 } else {
1733 state = SCE_HP_CHARACTER;
1735 } else if (IsOperator(ch)) {
1736 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1739 break;
1740 case SCE_HP_COMMENTLINE:
1741 if (ch == '\r' || ch == '\n') {
1742 styler.ColourTo(i - 1, StateToPrint);
1743 state = SCE_HP_DEFAULT;
1745 break;
1746 case SCE_HP_STRING:
1747 if (ch == '\\') {
1748 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1749 i++;
1750 ch = chNext;
1751 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1753 } else if (ch == '\"') {
1754 styler.ColourTo(i, StateToPrint);
1755 state = SCE_HP_DEFAULT;
1757 break;
1758 case SCE_HP_CHARACTER:
1759 if (ch == '\\') {
1760 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1761 i++;
1762 ch = chNext;
1763 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1765 } else if (ch == '\'') {
1766 styler.ColourTo(i, StateToPrint);
1767 state = SCE_HP_DEFAULT;
1769 break;
1770 case SCE_HP_TRIPLE:
1771 if (ch == '\'' && chPrev == '\'' && chPrev2 == '\'') {
1772 styler.ColourTo(i, StateToPrint);
1773 state = SCE_HP_DEFAULT;
1775 break;
1776 case SCE_HP_TRIPLEDOUBLE:
1777 if (ch == '\"' && chPrev == '\"' && chPrev2 == '\"') {
1778 styler.ColourTo(i, StateToPrint);
1779 state = SCE_HP_DEFAULT;
1781 break;
1782 ///////////// start - PHP state handling
1783 case SCE_HPHP_WORD:
1784 if (!IsAWordChar(ch)) {
1785 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1786 if (ch == '/' && chNext == '*') {
1787 i++;
1788 state = SCE_HPHP_COMMENT;
1789 } else if (ch == '/' && chNext == '/') {
1790 i++;
1791 state = SCE_HPHP_COMMENTLINE;
1792 } else if (ch == '#') {
1793 state = SCE_HPHP_COMMENTLINE;
1794 } else if (ch == '\"') {
1795 state = SCE_HPHP_HSTRING;
1796 strcpy(phpStringDelimiter, "\"");
1797 } else if (styler.Match(i, "<<<")) {
1798 bool isSimpleString = false;
1799 i = FindPhpStringDelimiter(phpStringDelimiter, sizeof(phpStringDelimiter), i + 3, lengthDoc, styler, isSimpleString);
1800 if (strlen(phpStringDelimiter)) {
1801 state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING);
1802 if (foldHeredoc) levelCurrent++;
1804 } else if (ch == '\'') {
1805 state = SCE_HPHP_SIMPLESTRING;
1806 strcpy(phpStringDelimiter, "\'");
1807 } else if (ch == '$' && IsPhpWordStart(chNext)) {
1808 state = SCE_HPHP_VARIABLE;
1809 } else if (IsOperator(ch)) {
1810 state = SCE_HPHP_OPERATOR;
1811 } else {
1812 state = SCE_HPHP_DEFAULT;
1815 break;
1816 case SCE_HPHP_NUMBER:
1817 // recognize bases 8,10 or 16 integers OR floating-point numbers
1818 if (!IsADigit(ch)
1819 && strchr(".xXabcdefABCDEF", ch) == NULL
1820 && ((ch != '-' && ch != '+') || (chPrev != 'e' && chPrev != 'E'))) {
1821 styler.ColourTo(i - 1, SCE_HPHP_NUMBER);
1822 if (IsOperator(ch))
1823 state = SCE_HPHP_OPERATOR;
1824 else
1825 state = SCE_HPHP_DEFAULT;
1827 break;
1828 case SCE_HPHP_VARIABLE:
1829 if (!IsPhpWordChar(chNext)) {
1830 styler.ColourTo(i, SCE_HPHP_VARIABLE);
1831 state = SCE_HPHP_DEFAULT;
1833 break;
1834 case SCE_HPHP_COMMENT:
1835 if (ch == '/' && chPrev == '*') {
1836 styler.ColourTo(i, StateToPrint);
1837 state = SCE_HPHP_DEFAULT;
1839 break;
1840 case SCE_HPHP_COMMENTLINE:
1841 if (ch == '\r' || ch == '\n') {
1842 styler.ColourTo(i - 1, StateToPrint);
1843 state = SCE_HPHP_DEFAULT;
1845 break;
1846 case SCE_HPHP_HSTRING:
1847 if (ch == '\\' && (phpStringDelimiter[0] == '\"' || chNext == '$' || chNext == '{')) {
1848 // skip the next char
1849 i++;
1850 } else if (((ch == '{' && chNext == '$') || (ch == '$' && chNext == '{'))
1851 && IsPhpWordStart(chNext2)) {
1852 styler.ColourTo(i - 1, StateToPrint);
1853 state = SCE_HPHP_COMPLEX_VARIABLE;
1854 } else if (ch == '$' && IsPhpWordStart(chNext)) {
1855 styler.ColourTo(i - 1, StateToPrint);
1856 state = SCE_HPHP_HSTRING_VARIABLE;
1857 } else if (styler.Match(i, phpStringDelimiter)) {
1858 if (phpStringDelimiter[0] == '\"') {
1859 styler.ColourTo(i, StateToPrint);
1860 state = SCE_HPHP_DEFAULT;
1861 } else if (isLineEnd(chPrev)) {
1862 const int psdLength = strlen(phpStringDelimiter);
1863 const char chAfterPsd = styler.SafeGetCharAt(i + psdLength);
1864 const char chAfterPsd2 = styler.SafeGetCharAt(i + psdLength + 1);
1865 if (isLineEnd(chAfterPsd) ||
1866 (chAfterPsd == ';' && isLineEnd(chAfterPsd2))) {
1867 i += (((i + psdLength) < lengthDoc) ? psdLength : lengthDoc) - 1;
1868 styler.ColourTo(i, StateToPrint);
1869 state = SCE_HPHP_DEFAULT;
1870 if (foldHeredoc) levelCurrent--;
1874 break;
1875 case SCE_HPHP_SIMPLESTRING:
1876 if (phpStringDelimiter[0] == '\'') {
1877 if (ch == '\\') {
1878 // skip the next char
1879 i++;
1880 } else if (ch == '\'') {
1881 styler.ColourTo(i, StateToPrint);
1882 state = SCE_HPHP_DEFAULT;
1884 } else if (isLineEnd(chPrev) && styler.Match(i, phpStringDelimiter)) {
1885 const int psdLength = strlen(phpStringDelimiter);
1886 const char chAfterPsd = styler.SafeGetCharAt(i + psdLength);
1887 const char chAfterPsd2 = styler.SafeGetCharAt(i + psdLength + 1);
1888 if (isLineEnd(chAfterPsd) ||
1889 (chAfterPsd == ';' && isLineEnd(chAfterPsd2))) {
1890 i += (((i + psdLength) < lengthDoc) ? psdLength : lengthDoc) - 1;
1891 styler.ColourTo(i, StateToPrint);
1892 state = SCE_HPHP_DEFAULT;
1893 if (foldHeredoc) levelCurrent--;
1896 break;
1897 case SCE_HPHP_HSTRING_VARIABLE:
1898 if (!IsPhpWordChar(chNext)) {
1899 styler.ColourTo(i, StateToPrint);
1900 state = SCE_HPHP_HSTRING;
1902 break;
1903 case SCE_HPHP_COMPLEX_VARIABLE:
1904 if (ch == '}') {
1905 styler.ColourTo(i, StateToPrint);
1906 state = SCE_HPHP_HSTRING;
1908 break;
1909 case SCE_HPHP_OPERATOR:
1910 case SCE_HPHP_DEFAULT:
1911 styler.ColourTo(i - 1, StateToPrint);
1912 if (IsADigit(ch) || (ch == '.' && IsADigit(chNext))) {
1913 state = SCE_HPHP_NUMBER;
1914 } else if (IsAWordStart(ch)) {
1915 state = SCE_HPHP_WORD;
1916 } else if (ch == '/' && chNext == '*') {
1917 i++;
1918 state = SCE_HPHP_COMMENT;
1919 } else if (ch == '/' && chNext == '/') {
1920 i++;
1921 state = SCE_HPHP_COMMENTLINE;
1922 } else if (ch == '#') {
1923 state = SCE_HPHP_COMMENTLINE;
1924 } else if (ch == '\"') {
1925 state = SCE_HPHP_HSTRING;
1926 strcpy(phpStringDelimiter, "\"");
1927 } else if (styler.Match(i, "<<<")) {
1928 bool isSimpleString = false;
1929 i = FindPhpStringDelimiter(phpStringDelimiter, sizeof(phpStringDelimiter), i + 3, lengthDoc, styler, isSimpleString);
1930 if (strlen(phpStringDelimiter)) {
1931 state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING);
1932 if (foldHeredoc) levelCurrent++;
1934 } else if (ch == '\'') {
1935 state = SCE_HPHP_SIMPLESTRING;
1936 strcpy(phpStringDelimiter, "\'");
1937 } else if (ch == '$' && IsPhpWordStart(chNext)) {
1938 state = SCE_HPHP_VARIABLE;
1939 } else if (IsOperator(ch)) {
1940 state = SCE_HPHP_OPERATOR;
1941 } else if ((state == SCE_HPHP_OPERATOR) && (IsASpace(ch))) {
1942 state = SCE_HPHP_DEFAULT;
1944 break;
1945 ///////////// end - PHP state handling
1948 // Some of the above terminated their lexeme but since the same character starts
1949 // the same class again, only reenter if non empty segment.
1951 bool nonEmptySegment = i >= static_cast<int>(styler.GetStartSegment());
1952 if (state == SCE_HB_DEFAULT) { // One of the above succeeded
1953 if ((ch == '\"') && (nonEmptySegment)) {
1954 state = SCE_HB_STRING;
1955 } else if (ch == '\'') {
1956 state = SCE_HB_COMMENTLINE;
1957 } else if (IsAWordStart(ch)) {
1958 state = SCE_HB_WORD;
1959 } else if (IsOperator(ch)) {
1960 styler.ColourTo(i, SCE_HB_DEFAULT);
1962 } else if (state == SCE_HBA_DEFAULT) { // One of the above succeeded
1963 if ((ch == '\"') && (nonEmptySegment)) {
1964 state = SCE_HBA_STRING;
1965 } else if (ch == '\'') {
1966 state = SCE_HBA_COMMENTLINE;
1967 } else if (IsAWordStart(ch)) {
1968 state = SCE_HBA_WORD;
1969 } else if (IsOperator(ch)) {
1970 styler.ColourTo(i, SCE_HBA_DEFAULT);
1972 } else if (state == SCE_HJ_DEFAULT) { // One of the above succeeded
1973 if (ch == '/' && chNext == '*') {
1974 if (styler.SafeGetCharAt(i + 2) == '*')
1975 state = SCE_HJ_COMMENTDOC;
1976 else
1977 state = SCE_HJ_COMMENT;
1978 } else if (ch == '/' && chNext == '/') {
1979 state = SCE_HJ_COMMENTLINE;
1980 } else if ((ch == '\"') && (nonEmptySegment)) {
1981 state = SCE_HJ_DOUBLESTRING;
1982 } else if ((ch == '\'') && (nonEmptySegment)) {
1983 state = SCE_HJ_SINGLESTRING;
1984 } else if (IsAWordStart(ch)) {
1985 state = SCE_HJ_WORD;
1986 } else if (IsOperator(ch)) {
1987 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1992 switch (state) {
1993 case SCE_HJ_WORD:
1994 classifyWordHTJS(styler.GetStartSegment(), lengthDoc - 1, keywords2, styler, inScriptType);
1995 break;
1996 case SCE_HB_WORD:
1997 classifyWordHTVB(styler.GetStartSegment(), lengthDoc - 1, keywords3, styler, inScriptType);
1998 break;
1999 case SCE_HP_WORD:
2000 classifyWordHTPy(styler.GetStartSegment(), lengthDoc - 1, keywords4, styler, prevWord, inScriptType);
2001 break;
2002 case SCE_HPHP_WORD:
2003 classifyWordHTPHP(styler.GetStartSegment(), lengthDoc - 1, keywords5, styler);
2004 break;
2005 default:
2006 StateToPrint = statePrintForState(state, inScriptType);
2007 styler.ColourTo(lengthDoc - 1, StateToPrint);
2008 break;
2011 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
2012 if (fold) {
2013 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
2014 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
2018 static void ColouriseXMLDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
2019 Accessor &styler) {
2020 // Passing in true because we're lexing XML
2021 ColouriseHyperTextDoc(startPos, length, initStyle, keywordlists, styler, true);
2024 static void ColouriseHTMLDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
2025 Accessor &styler) {
2026 // Passing in false because we're notlexing XML
2027 ColouriseHyperTextDoc(startPos, length, initStyle, keywordlists, styler, false);
2030 static void ColourisePHPScriptDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
2031 Accessor &styler) {
2032 if (startPos == 0)
2033 initStyle = SCE_HPHP_DEFAULT;
2034 ColouriseHTMLDoc(startPos, length, initStyle, keywordlists, styler);
2037 static const char * const htmlWordListDesc[] = {
2038 "HTML elements and attributes",
2039 "JavaScript keywords",
2040 "VBScript keywords",
2041 "Python keywords",
2042 "PHP keywords",
2043 "SGML and DTD keywords",
2047 static const char * const phpscriptWordListDesc[] = {
2048 "", //Unused
2049 "", //Unused
2050 "", //Unused
2051 "", //Unused
2052 "PHP keywords",
2053 "", //Unused
2057 LexerModule lmHTML(SCLEX_HTML, ColouriseHTMLDoc, "hypertext", 0, htmlWordListDesc, 8);
2058 LexerModule lmXML(SCLEX_XML, ColouriseXMLDoc, "xml", 0, htmlWordListDesc, 8);
2059 LexerModule lmPHPSCRIPT(SCLEX_PHPSCRIPT, ColourisePHPScriptDoc, "phpscript", 0, phpscriptWordListDesc, 8);