Merge pull request #2212 from TwlyY29/bibtex-parser
[geany-mirror.git] / scintilla / lexers / LexHTML.cxx
blob650112220e280c2e519ffbdc8864f9680ff1097e
1 // Scintilla source code edit control
2 /** @file LexHTML.cxx
3 ** Lexer for HTML.
4 **/
5 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <stdarg.h>
12 #include <assert.h>
13 #include <ctype.h>
14 #include <string>
15 #include <map>
16 #include <set>
18 #include "ILexer.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
21 #include "StringCopy.h"
22 #include "WordList.h"
23 #include "LexAccessor.h"
24 #include "Accessor.h"
25 #include "StyleContext.h"
26 #include "CharacterSet.h"
27 #include "LexerModule.h"
28 #include "OptionSet.h"
29 #include "DefaultLexer.h"
31 using namespace Scintilla;
33 namespace {
35 #define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
36 #define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
37 #define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)
39 enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock, eScriptComment };
40 enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
42 inline bool IsAWordChar(const int ch) {
43 return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
46 inline bool IsAWordStart(const int ch) {
47 return (ch < 0x80) && (isalnum(ch) || ch == '_');
50 inline bool IsOperator(int ch) {
51 if (IsASCII(ch) && isalnum(ch))
52 return false;
53 // '.' left out as it is used to make up numbers
54 if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
55 ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
56 ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
57 ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
58 ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
59 ch == '?' || ch == '!' || ch == '.' || ch == '~')
60 return true;
61 return false;
64 void GetTextSegment(Accessor &styler, Sci_PositionU start, Sci_PositionU end, char *s, size_t len) {
65 Sci_PositionU i = 0;
66 for (; (i < end - start + 1) && (i < len-1); i++) {
67 s[i] = MakeLowerCase(styler[start + i]);
69 s[i] = '\0';
72 std::string GetStringSegment(Accessor &styler, Sci_PositionU start, Sci_PositionU end) {
73 std::string s;
74 Sci_PositionU i = 0;
75 for (; (i < end - start + 1); i++) {
76 s.push_back(MakeLowerCase(styler[start + i]));
78 return s;
81 std::string GetNextWord(Accessor &styler, Sci_PositionU start) {
82 std::string ret;
83 Sci_PositionU i = 0;
84 for (; i < 200; i++) { // Put an upper limit to bound time taken for unexpected text.
85 const char ch = styler.SafeGetCharAt(start + i);
86 if ((i == 0) && !IsAWordStart(ch))
87 break;
88 if ((i > 0) && !IsAWordChar(ch))
89 break;
90 ret.push_back(ch);
92 return ret;
95 script_type segIsScriptingIndicator(Accessor &styler, Sci_PositionU start, Sci_PositionU end, script_type prevValue) {
96 char s[100];
97 GetTextSegment(styler, start, end, s, sizeof(s));
98 //Platform::DebugPrintf("Scripting indicator [%s]\n", s);
99 if (strstr(s, "src")) // External script
100 return eScriptNone;
101 if (strstr(s, "vbs"))
102 return eScriptVBS;
103 if (strstr(s, "pyth"))
104 return eScriptPython;
105 if (strstr(s, "javas"))
106 return eScriptJS;
107 if (strstr(s, "jscr"))
108 return eScriptJS;
109 if (strstr(s, "php"))
110 return eScriptPHP;
111 if (strstr(s, "xml")) {
112 const char *xml = strstr(s, "xml");
113 for (const char *t=s; t<xml; t++) {
114 if (!IsASpace(*t)) {
115 return prevValue;
118 return eScriptXML;
121 return prevValue;
124 int PrintScriptingIndicatorOffset(Accessor &styler, Sci_PositionU start, Sci_PositionU end) {
125 int iResult = 0;
126 std::string s = GetStringSegment(styler, start, end);
127 if (0 == strncmp(s.c_str(), "php", 3)) {
128 iResult = 3;
130 return iResult;
133 script_type ScriptOfState(int state) {
134 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
135 return eScriptPython;
136 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
137 return eScriptVBS;
138 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
139 return eScriptJS;
140 } else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
141 return eScriptPHP;
142 } else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
143 return eScriptSGML;
144 } else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
145 return eScriptSGMLblock;
146 } else {
147 return eScriptNone;
151 int statePrintForState(int state, script_mode inScriptType) {
152 int StateToPrint = state;
154 if (state >= SCE_HJ_START) {
155 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
156 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
157 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
158 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
159 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
160 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
164 return StateToPrint;
167 int stateForPrintState(int StateToPrint) {
168 int state;
170 if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
171 state = StateToPrint - SCE_HA_PYTHON;
172 } else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
173 state = StateToPrint - SCE_HA_VBS;
174 } else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
175 state = StateToPrint - SCE_HA_JS;
176 } else {
177 state = StateToPrint;
180 return state;
183 inline bool IsNumber(Sci_PositionU start, Accessor &styler) {
184 return IsADigit(styler[start]) || (styler[start] == '.') ||
185 (styler[start] == '-') || (styler[start] == '#');
188 inline bool isStringState(int state) {
189 bool bResult;
191 switch (state) {
192 case SCE_HJ_DOUBLESTRING:
193 case SCE_HJ_SINGLESTRING:
194 case SCE_HJA_DOUBLESTRING:
195 case SCE_HJA_SINGLESTRING:
196 case SCE_HB_STRING:
197 case SCE_HBA_STRING:
198 case SCE_HP_STRING:
199 case SCE_HP_CHARACTER:
200 case SCE_HP_TRIPLE:
201 case SCE_HP_TRIPLEDOUBLE:
202 case SCE_HPA_STRING:
203 case SCE_HPA_CHARACTER:
204 case SCE_HPA_TRIPLE:
205 case SCE_HPA_TRIPLEDOUBLE:
206 case SCE_HPHP_HSTRING:
207 case SCE_HPHP_SIMPLESTRING:
208 case SCE_HPHP_HSTRING_VARIABLE:
209 case SCE_HPHP_COMPLEX_VARIABLE:
210 bResult = true;
211 break;
212 default :
213 bResult = false;
214 break;
216 return bResult;
219 inline bool stateAllowsTermination(int state) {
220 bool allowTermination = !isStringState(state);
221 if (allowTermination) {
222 switch (state) {
223 case SCE_HB_COMMENTLINE:
224 case SCE_HPHP_COMMENT:
225 case SCE_HP_COMMENTLINE:
226 case SCE_HPA_COMMENTLINE:
227 allowTermination = false;
230 return allowTermination;
233 // not really well done, since it's only comments that should lex the %> and <%
234 inline bool isCommentASPState(int state) {
235 bool bResult;
237 switch (state) {
238 case SCE_HJ_COMMENT:
239 case SCE_HJ_COMMENTLINE:
240 case SCE_HJ_COMMENTDOC:
241 case SCE_HB_COMMENTLINE:
242 case SCE_HP_COMMENTLINE:
243 case SCE_HPHP_COMMENT:
244 case SCE_HPHP_COMMENTLINE:
245 bResult = true;
246 break;
247 default :
248 bResult = false;
249 break;
251 return bResult;
254 void classifyAttribHTML(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, Accessor &styler) {
255 const bool wordIsNumber = IsNumber(start, styler);
256 char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
257 if (wordIsNumber) {
258 chAttr = SCE_H_NUMBER;
259 } else {
260 std::string s = GetStringSegment(styler, start, end);
261 if (keywords.InList(s.c_str()))
262 chAttr = SCE_H_ATTRIBUTE;
264 if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
265 // No keywords -> all are known
266 chAttr = SCE_H_ATTRIBUTE;
267 styler.ColourTo(end, chAttr);
270 int classifyTagHTML(Sci_PositionU start, Sci_PositionU end,
271 const WordList &keywords, Accessor &styler, bool &tagDontFold,
272 bool caseSensitive, bool isXml, bool allowScripts,
273 const std::set<std::string> &nonFoldingTags) {
274 std::string tag;
275 // Copy after the '<'
276 for (Sci_PositionU cPos = start; cPos <= end; cPos++) {
277 const char ch = styler[cPos];
278 if ((ch != '<') && (ch != '/')) {
279 tag.push_back(caseSensitive ? ch : MakeLowerCase(ch));
282 // if the current language is XML, I can fold any tag
283 // if the current language is HTML, I don't want to fold certain tags (input, meta, etc.)
284 //...to find it in the list of no-container-tags
285 tagDontFold = (!isXml) && (nonFoldingTags.count(tag) > 0);
286 // No keywords -> all are known
287 char chAttr = SCE_H_TAGUNKNOWN;
288 if (!tag.empty() && (tag[0] == '!')) {
289 chAttr = SCE_H_SGML_DEFAULT;
290 } else if (!keywords || keywords.InList(tag.c_str())) {
291 chAttr = SCE_H_TAG;
293 styler.ColourTo(end, chAttr);
294 if (chAttr == SCE_H_TAG) {
295 if (allowScripts && (tag == "script")) {
296 // check to see if this is a self-closing tag by sniffing ahead
297 bool isSelfClose = false;
298 for (Sci_PositionU cPos = end; cPos <= end + 200; cPos++) {
299 const char ch = styler.SafeGetCharAt(cPos, '\0');
300 if (ch == '\0' || ch == '>')
301 break;
302 else if (ch == '/' && styler.SafeGetCharAt(cPos + 1, '\0') == '>') {
303 isSelfClose = true;
304 break;
308 // do not enter a script state if the tag self-closed
309 if (!isSelfClose)
310 chAttr = SCE_H_SCRIPT;
311 } else if (!isXml && (tag == "comment")) {
312 chAttr = SCE_H_COMMENT;
315 return chAttr;
318 void classifyWordHTJS(Sci_PositionU start, Sci_PositionU end,
319 const WordList &keywords, Accessor &styler, script_mode inScriptType) {
320 char s[30 + 1];
321 Sci_PositionU i = 0;
322 for (; i < end - start + 1 && i < 30; i++) {
323 s[i] = styler[start + i];
325 s[i] = '\0';
327 char chAttr = SCE_HJ_WORD;
328 const bool wordIsNumber = IsADigit(s[0]) || ((s[0] == '.') && IsADigit(s[1]));
329 if (wordIsNumber) {
330 chAttr = SCE_HJ_NUMBER;
331 } else if (keywords.InList(s)) {
332 chAttr = SCE_HJ_KEYWORD;
334 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
337 int classifyWordHTVB(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, Accessor &styler, script_mode inScriptType) {
338 char chAttr = SCE_HB_IDENTIFIER;
339 const bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
340 if (wordIsNumber) {
341 chAttr = SCE_HB_NUMBER;
342 } else {
343 std::string s = GetStringSegment(styler, start, end);
344 if (keywords.InList(s.c_str())) {
345 chAttr = SCE_HB_WORD;
346 if (s == "rem")
347 chAttr = SCE_HB_COMMENTLINE;
350 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
351 if (chAttr == SCE_HB_COMMENTLINE)
352 return SCE_HB_COMMENTLINE;
353 else
354 return SCE_HB_DEFAULT;
357 void classifyWordHTPy(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, Accessor &styler, std::string &prevWord, script_mode inScriptType, bool isMako) {
358 const bool wordIsNumber = IsADigit(styler[start]);
359 std::string s;
360 for (Sci_PositionU i = 0; i < end - start + 1 && i < 30; i++) {
361 s.push_back(styler[start + i]);
363 char chAttr = SCE_HP_IDENTIFIER;
364 if (prevWord == "class")
365 chAttr = SCE_HP_CLASSNAME;
366 else if (prevWord == "def")
367 chAttr = SCE_HP_DEFNAME;
368 else if (wordIsNumber)
369 chAttr = SCE_HP_NUMBER;
370 else if (keywords.InList(s.c_str()))
371 chAttr = SCE_HP_WORD;
372 else if (isMako && (s == "block"))
373 chAttr = SCE_HP_WORD;
374 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
375 prevWord = s;
378 // Update the word colour to default or keyword
379 // Called when in a PHP word
380 void classifyWordHTPHP(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, Accessor &styler) {
381 char chAttr = SCE_HPHP_DEFAULT;
382 const bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
383 if (wordIsNumber) {
384 chAttr = SCE_HPHP_NUMBER;
385 } else {
386 std::string s = GetStringSegment(styler, start, end);
387 if (keywords.InList(s.c_str()))
388 chAttr = SCE_HPHP_WORD;
390 styler.ColourTo(end, chAttr);
393 bool isWordHSGML(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, Accessor &styler) {
394 std::string s;
395 for (Sci_PositionU i = 0; i < end - start + 1 && i < 30; i++) {
396 s.push_back(styler[start + i]);
398 return keywords.InList(s.c_str());
401 bool isWordCdata(Sci_PositionU start, Sci_PositionU end, Accessor &styler) {
402 std::string s;
403 for (Sci_PositionU i = 0; i < end - start + 1 && i < 30; i++) {
404 s.push_back(styler[start + i]);
406 return s == "[CDATA[";
409 // Return the first state to reach when entering a scripting language
410 int StateForScript(script_type scriptLanguage) {
411 int Result;
412 switch (scriptLanguage) {
413 case eScriptVBS:
414 Result = SCE_HB_START;
415 break;
416 case eScriptPython:
417 Result = SCE_HP_START;
418 break;
419 case eScriptPHP:
420 Result = SCE_HPHP_DEFAULT;
421 break;
422 case eScriptXML:
423 Result = SCE_H_TAGUNKNOWN;
424 break;
425 case eScriptSGML:
426 Result = SCE_H_SGML_DEFAULT;
427 break;
428 case eScriptComment:
429 Result = SCE_H_COMMENT;
430 break;
431 default :
432 Result = SCE_HJ_START;
433 break;
435 return Result;
438 inline bool issgmlwordchar(int ch) {
439 return !IsASCII(ch) ||
440 (isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
443 inline bool IsPhpWordStart(int ch) {
444 return (IsASCII(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
447 inline bool IsPhpWordChar(int ch) {
448 return IsADigit(ch) || IsPhpWordStart(ch);
451 bool InTagState(int state) {
452 return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
453 state == SCE_H_SCRIPT ||
454 state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
455 state == SCE_H_NUMBER || state == SCE_H_OTHER ||
456 state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
459 bool IsCommentState(const int state) {
460 return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
463 bool IsScriptCommentState(const int state) {
464 return state == SCE_HJ_COMMENT || state == SCE_HJ_COMMENTLINE || state == SCE_HJA_COMMENT ||
465 state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
468 bool isLineEnd(int ch) {
469 return ch == '\r' || ch == '\n';
472 bool isMakoBlockEnd(const int ch, const int chNext, const std::string &blockType) {
473 if (blockType.empty()) {
474 return ((ch == '%') && (chNext == '>'));
475 } else if ((blockType == "inherit") ||
476 (blockType == "namespace") ||
477 (blockType == "include") ||
478 (blockType == "page")) {
479 return ((ch == '/') && (chNext == '>'));
480 } else if (blockType == "%") {
481 if (ch == '/' && isLineEnd(chNext))
482 return true;
483 else
484 return isLineEnd(ch);
485 } else if (blockType == "{") {
486 return ch == '}';
487 } else {
488 return (ch == '>');
492 bool isDjangoBlockEnd(const int ch, const int chNext, const std::string &blockType) {
493 if (blockType.empty()) {
494 return false;
495 } else if (blockType == "%") {
496 return ((ch == '%') && (chNext == '}'));
497 } else if (blockType == "{") {
498 return ((ch == '}') && (chNext == '}'));
499 } else {
500 return false;
504 bool isPHPStringState(int state) {
505 return
506 (state == SCE_HPHP_HSTRING) ||
507 (state == SCE_HPHP_SIMPLESTRING) ||
508 (state == SCE_HPHP_HSTRING_VARIABLE) ||
509 (state == SCE_HPHP_COMPLEX_VARIABLE);
512 Sci_Position FindPhpStringDelimiter(std::string &phpStringDelimiter, Sci_Position i, const Sci_Position lengthDoc, Accessor &styler, bool &isSimpleString) {
513 Sci_Position j;
514 const Sci_Position beginning = i - 1;
515 bool isValidSimpleString = false;
517 while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t'))
518 i++;
519 char ch = styler.SafeGetCharAt(i);
520 const char chNext = styler.SafeGetCharAt(i + 1);
521 phpStringDelimiter.clear();
522 if (!IsPhpWordStart(ch)) {
523 if (ch == '\'' && IsPhpWordStart(chNext)) {
524 i++;
525 ch = chNext;
526 isSimpleString = true;
527 } else {
528 return beginning;
531 phpStringDelimiter.push_back(ch);
532 i++;
533 for (j = i; j < lengthDoc && !isLineEnd(styler[j]); j++) {
534 if (!IsPhpWordChar(styler[j])) {
535 if (isSimpleString && (styler[j] == '\'') && isLineEnd(styler.SafeGetCharAt(j + 1))) {
536 isValidSimpleString = true;
537 j++;
538 break;
539 } else {
540 phpStringDelimiter.clear();
541 return beginning;
544 phpStringDelimiter.push_back(styler[j]);
546 if (isSimpleString && !isValidSimpleString) {
547 phpStringDelimiter.clear();
548 return beginning;
550 return j - 1;
553 // Options used for LexerHTML
554 struct OptionsHTML {
555 int aspDefaultLanguage = eScriptJS;
556 bool caseSensitive = false;
557 bool allowScripts = true;
558 bool isMako = false;
559 bool isDjango = false;
560 bool fold = false;
561 bool foldHTML = false;
562 bool foldHTMLPreprocessor = true;
563 bool foldCompact = true;
564 bool foldComment = false;
565 bool foldHeredoc = false;
566 OptionsHTML() noexcept {
570 const char * const htmlWordListDesc[] = {
571 "HTML elements and attributes",
572 "JavaScript keywords",
573 "VBScript keywords",
574 "Python keywords",
575 "PHP keywords",
576 "SGML and DTD keywords",
580 const char * const phpscriptWordListDesc[] = {
581 "", //Unused
582 "", //Unused
583 "", //Unused
584 "", //Unused
585 "PHP keywords",
586 "", //Unused
590 struct OptionSetHTML : public OptionSet<OptionsHTML> {
591 OptionSetHTML(bool isPHPScript_) {
593 DefineProperty("asp.default.language", &OptionsHTML::aspDefaultLanguage,
594 "Script in ASP code is initially assumed to be in JavaScript. "
595 "To change this to VBScript set asp.default.language to 2. Python is 3.");
597 DefineProperty("html.tags.case.sensitive", &OptionsHTML::caseSensitive,
598 "For XML and HTML, setting this property to 1 will make tags match in a case "
599 "sensitive way which is the expected behaviour for XML and XHTML.");
601 DefineProperty("lexer.xml.allow.scripts", &OptionsHTML::allowScripts,
602 "Set to 0 to disable scripts in XML.");
604 DefineProperty("lexer.html.mako", &OptionsHTML::isMako,
605 "Set to 1 to enable the mako template language.");
607 DefineProperty("lexer.html.django", &OptionsHTML::isDjango,
608 "Set to 1 to enable the django template language.");
610 DefineProperty("fold", &OptionsHTML::fold);
612 DefineProperty("fold.html", &OptionsHTML::foldHTML,
613 "Folding is turned on or off for HTML and XML files with this option. "
614 "The fold option must also be on for folding to occur.");
616 DefineProperty("fold.html.preprocessor", &OptionsHTML::foldHTMLPreprocessor,
617 "Folding is turned on or off for scripts embedded in HTML files with this option. "
618 "The default is on.");
620 DefineProperty("fold.compact", &OptionsHTML::foldCompact);
622 DefineProperty("fold.hypertext.comment", &OptionsHTML::foldComment,
623 "Allow folding for comments in scripts embedded in HTML. "
624 "The default is off.");
626 DefineProperty("fold.hypertext.heredoc", &OptionsHTML::foldHeredoc,
627 "Allow folding for heredocs in scripts embedded in HTML. "
628 "The default is off.");
630 DefineWordListSets(isPHPScript_ ? phpscriptWordListDesc : htmlWordListDesc);
634 LexicalClass lexicalClassesHTML[] = {
635 // Lexer HTML SCLEX_HTML SCE_H_ SCE_HJ_ SCE_HJA_ SCE_HB_ SCE_HBA_ SCE_HP_ SCE_HPHP_ SCE_HPA_:
636 0, "SCE_H_DEFAULT", "default", "Text",
637 1, "SCE_H_TAG", "tag", "Tags",
638 2, "SCE_H_ERRORTAGUNKNOWN", "error tag", "Unknown Tags",
639 3, "SCE_H_ATTRIBUTE", "attribute", "Attributes",
640 4, "SCE_H_ATTRIBUTEUNKNOWN", "error attribute", "Unknown Attributes",
641 5, "SCE_H_NUMBER", "literal numeric", "Numbers",
642 6, "SCE_H_DOUBLESTRING", "literal string", "Double quoted strings",
643 7, "SCE_H_SINGLESTRING", "literal string", "Single quoted strings",
644 8, "SCE_H_OTHER", "tag operator", "Other inside tag, including space and '='",
645 9, "SCE_H_COMMENT", "comment", "Comment",
646 10, "SCE_H_ENTITY", "literal", "Entities",
647 11, "SCE_H_TAGEND", "tag", "XML style tag ends '/>'",
648 12, "SCE_H_XMLSTART", "identifier", "XML identifier start '<?'",
649 13, "SCE_H_XMLEND", "identifier", "XML identifier end '?>'",
650 14, "SCE_H_SCRIPT", "error", "Internal state which should never be visible",
651 15, "SCE_H_ASP", "preprocessor", "ASP <% ... %>",
652 16, "SCE_H_ASPAT", "preprocessor", "ASP <% ... %>",
653 17, "SCE_H_CDATA", "literal", "CDATA",
654 18, "SCE_H_QUESTION", "preprocessor", "PHP",
655 19, "SCE_H_VALUE", "literal string", "Unquoted values",
656 20, "SCE_H_XCCOMMENT", "comment", "JSP Comment <%-- ... --%>",
657 21, "SCE_H_SGML_DEFAULT", "default", "SGML tags <! ... >",
658 22, "SCE_H_SGML_COMMAND", "preprocessor", "SGML command",
659 23, "SCE_H_SGML_1ST_PARAM", "preprocessor", "SGML 1st param",
660 24, "SCE_H_SGML_DOUBLESTRING", "literal string", "SGML double string",
661 25, "SCE_H_SGML_SIMPLESTRING", "literal string", "SGML single string",
662 26, "SCE_H_SGML_ERROR", "error", "SGML error",
663 27, "SCE_H_SGML_SPECIAL", "literal", "SGML special (#XXXX type)",
664 28, "SCE_H_SGML_ENTITY", "literal", "SGML entity",
665 29, "SCE_H_SGML_COMMENT", "comment", "SGML comment",
666 30, "SCE_H_SGML_1ST_PARAM_COMMENT", "error comment", "SGML first parameter - lexer internal. It is an error if any text is in this style.",
667 31, "SCE_H_SGML_BLOCK_DEFAULT", "default", "SGML block",
668 32, "", "predefined", "",
669 33, "", "predefined", "",
670 34, "", "predefined", "",
671 35, "", "predefined", "",
672 36, "", "predefined", "",
673 37, "", "predefined", "",
674 38, "", "predefined", "",
675 39, "", "predefined", "",
676 40, "SCE_HJ_START", "client javascript default", "JS Start - allows eol filled background to not start on same line as SCRIPT tag",
677 41, "SCE_HJ_DEFAULT", "client javascript default", "JS Default",
678 42, "SCE_HJ_COMMENT", "client javascript comment", "JS Comment",
679 43, "SCE_HJ_COMMENTLINE", "client javascript comment line", "JS Line Comment",
680 44, "SCE_HJ_COMMENTDOC", "client javascript comment documentation", "JS Doc comment",
681 45, "SCE_HJ_NUMBER", "client javascript literal numeric", "JS Number",
682 46, "SCE_HJ_WORD", "client javascript identifier", "JS Word",
683 47, "SCE_HJ_KEYWORD", "client javascript keyword", "JS Keyword",
684 48, "SCE_HJ_DOUBLESTRING", "client javascript literal string", "JS Double quoted string",
685 49, "SCE_HJ_SINGLESTRING", "client javascript literal string", "JS Single quoted string",
686 50, "SCE_HJ_SYMBOLS", "client javascript operator", "JS Symbols",
687 51, "SCE_HJ_STRINGEOL", "client javascript error literal string", "JavaScript EOL",
688 52, "SCE_HJ_REGEX", "client javascript literal regex", "JavaScript RegEx",
689 53, "", "unused", "",
690 54, "", "unused", "",
691 55, "SCE_HJA_START", "server javascript default", "JS Start - allows eol filled background to not start on same line as SCRIPT tag",
692 56, "SCE_HJA_DEFAULT", "server javascript default", "JS Default",
693 57, "SCE_HJA_COMMENT", "server javascript comment", "JS Comment",
694 58, "SCE_HJA_COMMENTLINE", "server javascript comment line", "JS Line Comment",
695 59, "SCE_HJA_COMMENTDOC", "server javascript comment documentation", "JS Doc comment",
696 60, "SCE_HJA_NUMBER", "server javascript literal numeric", "JS Number",
697 61, "SCE_HJA_WORD", "server javascript identifier", "JS Word",
698 62, "SCE_HJA_KEYWORD", "server javascript keyword", "JS Keyword",
699 63, "SCE_HJA_DOUBLESTRING", "server javascript literal string", "JS Double quoted string",
700 64, "SCE_HJA_SINGLESTRING", "server javascript literal string", "JS Single quoted string",
701 65, "SCE_HJA_SYMBOLS", "server javascript operator", "JS Symbols",
702 66, "SCE_HJA_STRINGEOL", "server javascript error literal string", "JavaScript EOL",
703 67, "SCE_HJA_REGEX", "server javascript literal regex", "JavaScript RegEx",
704 68, "", "unused", "",
705 69, "", "unused", "",
706 70, "SCE_HB_START", "client basic default", "Start",
707 71, "SCE_HB_DEFAULT", "client basic default", "Default",
708 72, "SCE_HB_COMMENTLINE", "client basic comment line", "Comment",
709 73, "SCE_HB_NUMBER", "client basic literal numeric", "Number",
710 74, "SCE_HB_WORD", "client basic keyword", "KeyWord",
711 75, "SCE_HB_STRING", "client basic literal string", "String",
712 76, "SCE_HB_IDENTIFIER", "client basic identifier", "Identifier",
713 77, "SCE_HB_STRINGEOL", "client basic literal string", "Unterminated string",
714 78, "", "unused", "",
715 79, "", "unused", "",
716 80, "SCE_HBA_START", "server basic default", "Start",
717 81, "SCE_HBA_DEFAULT", "server basic default", "Default",
718 82, "SCE_HBA_COMMENTLINE", "server basic comment line", "Comment",
719 83, "SCE_HBA_NUMBER", "server basic literal numeric", "Number",
720 84, "SCE_HBA_WORD", "server basic keyword", "KeyWord",
721 85, "SCE_HBA_STRING", "server basic literal string", "String",
722 86, "SCE_HBA_IDENTIFIER", "server basic identifier", "Identifier",
723 87, "SCE_HBA_STRINGEOL", "server basic literal string", "Unterminated string",
724 88, "", "unused", "",
725 89, "", "unused", "",
726 90, "SCE_HP_START", "client python default", "Embedded Python",
727 91, "SCE_HP_DEFAULT", "client python default", "Embedded Python",
728 92, "SCE_HP_COMMENTLINE", "client python comment line", "Comment",
729 93, "SCE_HP_NUMBER", "client python literal numeric", "Number",
730 94, "SCE_HP_STRING", "client python literal string", "String",
731 95, "SCE_HP_CHARACTER", "client python literal string character", "Single quoted string",
732 96, "SCE_HP_WORD", "client python keyword", "Keyword",
733 97, "SCE_HP_TRIPLE", "client python literal string", "Triple quotes",
734 98, "SCE_HP_TRIPLEDOUBLE", "client python literal string", "Triple double quotes",
735 99, "SCE_HP_CLASSNAME", "client python identifier", "Class name definition",
736 100, "SCE_HP_DEFNAME", "client python identifier", "Function or method name definition",
737 101, "SCE_HP_OPERATOR", "client python operator", "Operators",
738 102, "SCE_HP_IDENTIFIER", "client python identifier", "Identifiers",
739 103, "", "unused", "",
740 104, "SCE_HPHP_COMPLEX_VARIABLE", "server php identifier", "PHP complex variable",
741 105, "SCE_HPA_START", "server python default", "ASP Python",
742 106, "SCE_HPA_DEFAULT", "server python default", "ASP Python",
743 107, "SCE_HPA_COMMENTLINE", "server python comment line", "Comment",
744 108, "SCE_HPA_NUMBER", "server python literal numeric", "Number",
745 109, "SCE_HPA_STRING", "server python literal string", "String",
746 110, "SCE_HPA_CHARACTER", "server python literal string character", "Single quoted string",
747 111, "SCE_HPA_WORD", "server python keyword", "Keyword",
748 112, "SCE_HPA_TRIPLE", "server python literal string", "Triple quotes",
749 113, "SCE_HPA_TRIPLEDOUBLE", "server python literal string", "Triple double quotes",
750 114, "SCE_HPA_CLASSNAME", "server python identifier", "Class name definition",
751 115, "SCE_HPA_DEFNAME", "server python identifier", "Function or method name definition",
752 116, "SCE_HPA_OPERATOR", "server python operator", "Operators",
753 117, "SCE_HPA_IDENTIFIER", "server python identifier", "Identifiers",
754 118, "SCE_HPHP_DEFAULT", "server php default", "Default",
755 119, "SCE_HPHP_HSTRING", "server php literal string", "Double quoted String",
756 120, "SCE_HPHP_SIMPLESTRING", "server php literal string", "Single quoted string",
757 121, "SCE_HPHP_WORD", "server php keyword", "Keyword",
758 122, "SCE_HPHP_NUMBER", "server php literal numeric", "Number",
759 123, "SCE_HPHP_VARIABLE", "server php identifier", "Variable",
760 124, "SCE_HPHP_COMMENT", "server php comment", "Comment",
761 125, "SCE_HPHP_COMMENTLINE", "server php comment line", "One line comment",
762 126, "SCE_HPHP_HSTRING_VARIABLE", "server php literal string identifier", "PHP variable in double quoted string",
763 127, "SCE_HPHP_OPERATOR", "server php operator", "PHP operator",
766 LexicalClass lexicalClassesXML[] = {
767 // Lexer.Secondary XML SCLEX_XML SCE_H_:
768 0, "SCE_H_DEFAULT", "default", "Default",
769 1, "SCE_H_TAG", "tag", "Tags",
770 2, "SCE_H_TAGUNKNOWN", "error tag", "Unknown Tags",
771 3, "SCE_H_ATTRIBUTE", "attribute", "Attributes",
772 4, "SCE_H_ERRORATTRIBUTEUNKNOWN", "error attribute", "Unknown Attributes",
773 5, "SCE_H_NUMBER", "literal numeric", "Numbers",
774 6, "SCE_H_DOUBLESTRING", "literal string", "Double quoted strings",
775 7, "SCE_H_SINGLESTRING", "literal string", "Single quoted strings",
776 8, "SCE_H_OTHER", "tag operator", "Other inside tag, including space and '='",
777 9, "SCE_H_COMMENT", "comment", "Comment",
778 10, "SCE_H_ENTITY", "literal", "Entities",
779 11, "SCE_H_TAGEND", "tag", "XML style tag ends '/>'",
780 12, "SCE_H_XMLSTART", "identifier", "XML identifier start '<?'",
781 13, "SCE_H_XMLEND", "identifier", "XML identifier end '?>'",
782 14, "", "unused", "",
783 15, "", "unused", "",
784 16, "", "unused", "",
785 17, "SCE_H_CDATA", "literal", "CDATA",
786 18, "SCE_H_QUESTION", "preprocessor", "Question",
787 19, "SCE_H_VALUE", "literal string", "Unquoted Value",
788 20, "", "unused", "",
789 21, "SCE_H_SGML_DEFAULT", "default", "SGML tags <! ... >",
790 22, "SCE_H_SGML_COMMAND", "preprocessor", "SGML command",
791 23, "SCE_H_SGML_1ST_PARAM", "preprocessor", "SGML 1st param",
792 24, "SCE_H_SGML_DOUBLESTRING", "literal string", "SGML double string",
793 25, "SCE_H_SGML_SIMPLESTRING", "literal string", "SGML single string",
794 26, "SCE_H_SGML_ERROR", "error", "SGML error",
795 27, "SCE_H_SGML_SPECIAL", "literal", "SGML special (#XXXX type)",
796 28, "SCE_H_SGML_ENTITY", "literal", "SGML entity",
797 29, "SCE_H_SGML_COMMENT", "comment", "SGML comment",
798 30, "", "unused", "",
799 31, "SCE_H_SGML_BLOCK_DEFAULT", "default", "SGML block",
802 const char *tagsThatDoNotFold[] = {
803 "area",
804 "base",
805 "basefont",
806 "br",
807 "col",
808 "command",
809 "embed",
810 "frame",
811 "hr",
812 "img",
813 "input",
814 "isindex",
815 "keygen",
816 "link",
817 "meta",
818 "param",
819 "source",
820 "track",
821 "wbr"
825 class LexerHTML : public DefaultLexer {
826 bool isXml;
827 bool isPHPScript;
828 WordList keywords;
829 WordList keywords2;
830 WordList keywords3;
831 WordList keywords4;
832 WordList keywords5;
833 WordList keywords6; // SGML (DTD) keywords
834 OptionsHTML options;
835 OptionSetHTML osHTML;
836 std::set<std::string> nonFoldingTags;
837 public:
838 explicit LexerHTML(bool isXml_, bool isPHPScript_) :
839 DefaultLexer(isXml_ ? lexicalClassesHTML : lexicalClassesXML,
840 isXml_ ? ELEMENTS(lexicalClassesHTML) : ELEMENTS(lexicalClassesXML)),
841 isXml(isXml_),
842 isPHPScript(isPHPScript_),
843 osHTML(isPHPScript_),
844 nonFoldingTags(std::begin(tagsThatDoNotFold), std::end(tagsThatDoNotFold)) {
846 ~LexerHTML() override {
848 void SCI_METHOD Release() override {
849 delete this;
851 const char *SCI_METHOD PropertyNames() override {
852 return osHTML.PropertyNames();
854 int SCI_METHOD PropertyType(const char *name) override {
855 return osHTML.PropertyType(name);
857 const char *SCI_METHOD DescribeProperty(const char *name) override {
858 return osHTML.DescribeProperty(name);
860 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
861 const char *SCI_METHOD DescribeWordListSets() override {
862 return osHTML.DescribeWordListSets();
864 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
865 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
866 // No Fold as all folding performs in Lex.
868 static ILexer *LexerFactoryHTML() {
869 return new LexerHTML(false, false);
871 static ILexer *LexerFactoryXML() {
872 return new LexerHTML(true, false);
874 static ILexer *LexerFactoryPHPScript() {
875 return new LexerHTML(false, true);
879 Sci_Position SCI_METHOD LexerHTML::PropertySet(const char *key, const char *val) {
880 if (osHTML.PropertySet(&options, key, val)) {
881 return 0;
883 return -1;
886 Sci_Position SCI_METHOD LexerHTML::WordListSet(int n, const char *wl) {
887 WordList *wordListN = 0;
888 switch (n) {
889 case 0:
890 wordListN = &keywords;
891 break;
892 case 1:
893 wordListN = &keywords2;
894 break;
895 case 2:
896 wordListN = &keywords3;
897 break;
898 case 3:
899 wordListN = &keywords4;
900 break;
901 case 4:
902 wordListN = &keywords5;
903 break;
904 case 5:
905 wordListN = &keywords6;
906 break;
908 Sci_Position firstModification = -1;
909 if (wordListN) {
910 WordList wlNew;
911 wlNew.Set(wl);
912 if (*wordListN != wlNew) {
913 wordListN->Set(wl);
914 firstModification = 0;
917 return firstModification;
920 void SCI_METHOD LexerHTML::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
921 Accessor styler(pAccess, nullptr);
922 if (isPHPScript && (startPos == 0)) {
923 initStyle = SCE_HPHP_DEFAULT;
925 styler.StartAt(startPos);
926 std::string prevWord;
927 std::string phpStringDelimiter;
928 int StateToPrint = initStyle;
929 int state = stateForPrintState(StateToPrint);
930 std::string makoBlockType;
931 int makoComment = 0;
932 std::string djangoBlockType;
933 // If inside a tag, it may be a script tag, so reread from the start of line starting tag to ensure any language tags are seen
934 if (InTagState(state)) {
935 while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
936 const Sci_Position backLineStart = styler.LineStart(styler.GetLine(startPos-1));
937 length += startPos - backLineStart;
938 startPos = backLineStart;
940 state = SCE_H_DEFAULT;
942 // String can be heredoc, must find a delimiter first. Reread from beginning of line containing the string, to get the correct lineState
943 if (isPHPStringState(state)) {
944 while (startPos > 0 && (isPHPStringState(state) || !isLineEnd(styler[startPos - 1]))) {
945 startPos--;
946 length++;
947 state = styler.StyleAt(startPos);
949 if (startPos == 0)
950 state = SCE_H_DEFAULT;
952 styler.StartAt(startPos);
954 /* Nothing handles getting out of these, so we need not start in any of them.
955 * As we're at line start and they can't span lines, we'll re-detect them anyway */
956 switch (state) {
957 case SCE_H_QUESTION:
958 case SCE_H_XMLSTART:
959 case SCE_H_XMLEND:
960 case SCE_H_ASP:
961 state = SCE_H_DEFAULT;
962 break;
965 Sci_Position lineCurrent = styler.GetLine(startPos);
966 int lineState;
967 if (lineCurrent > 0) {
968 lineState = styler.GetLineState(lineCurrent-1);
969 } else {
970 // Default client and ASP scripting language is JavaScript
971 lineState = eScriptJS << 8;
972 lineState |= options.aspDefaultLanguage << 4;
974 script_mode inScriptType = static_cast<script_mode>((lineState >> 0) & 0x03); // 2 bits of scripting mode
976 bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
977 bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
978 bool tagDontFold = false; //some HTML tags should not be folded
979 script_type aspScript = static_cast<script_type>((lineState >> 4) & 0x0F); // 4 bits of script name
980 script_type clientScript = static_cast<script_type>((lineState >> 8) & 0x0F); // 4 bits of script name
981 int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
983 script_type scriptLanguage = ScriptOfState(state);
984 // If eNonHtmlScript coincides with SCE_H_COMMENT, assume eScriptComment
985 if (inScriptType == eNonHtmlScript && state == SCE_H_COMMENT) {
986 scriptLanguage = eScriptComment;
988 script_type beforeLanguage = ScriptOfState(beforePreProc);
989 const bool foldHTML = options.foldHTML;
990 const bool fold = foldHTML && options.fold;
991 const bool foldHTMLPreprocessor = foldHTML && options.foldHTMLPreprocessor;
992 const bool foldCompact = options.foldCompact;
993 const bool foldComment = fold && options.foldComment;
994 const bool foldHeredoc = fold && options.foldHeredoc;
995 const bool caseSensitive = options.caseSensitive;
996 const bool allowScripts = options.allowScripts;
997 const bool isMako = options.isMako;
998 const bool isDjango = options.isDjango;
999 const CharacterSet setHTMLWord(CharacterSet::setAlphaNum, ".-_:!#", 0x80, true);
1000 const CharacterSet setTagContinue(CharacterSet::setAlphaNum, ".-_:!#[", 0x80, true);
1001 const CharacterSet setAttributeContinue(CharacterSet::setAlphaNum, ".-_:!#/", 0x80, true);
1002 // TODO: also handle + and - (except if they're part of ++ or --) and return keywords
1003 const CharacterSet setOKBeforeJSRE(CharacterSet::setNone, "([{=,:;!%^&*|?~");
1005 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
1006 int levelCurrent = levelPrev;
1007 int visibleChars = 0;
1008 int lineStartVisibleChars = 0;
1010 int chPrev = ' ';
1011 int ch = ' ';
1012 int chPrevNonWhite = ' ';
1013 // look back to set chPrevNonWhite properly for better regex colouring
1014 if (scriptLanguage == eScriptJS && startPos > 0) {
1015 Sci_Position back = startPos;
1016 int style = 0;
1017 while (--back) {
1018 style = styler.StyleAt(back);
1019 if (style < SCE_HJ_DEFAULT || style > SCE_HJ_COMMENTDOC)
1020 // includes SCE_HJ_COMMENT & SCE_HJ_COMMENTLINE
1021 break;
1023 if (style == SCE_HJ_SYMBOLS) {
1024 chPrevNonWhite = static_cast<unsigned char>(styler.SafeGetCharAt(back));
1028 styler.StartSegment(startPos);
1029 const Sci_Position lengthDoc = startPos + length;
1030 for (Sci_Position i = startPos; i < lengthDoc; i++) {
1031 const int chPrev2 = chPrev;
1032 chPrev = ch;
1033 if (!IsASpace(ch) && state != SCE_HJ_COMMENT &&
1034 state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
1035 chPrevNonWhite = ch;
1036 ch = static_cast<unsigned char>(styler[i]);
1037 int chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1038 const int chNext2 = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2));
1040 // Handle DBCS codepages
1041 if (styler.IsLeadByte(static_cast<char>(ch))) {
1042 chPrev = ' ';
1043 i += 1;
1044 continue;
1047 if ((!IsASpace(ch) || !foldCompact) && fold)
1048 visibleChars++;
1049 if (!IsASpace(ch))
1050 lineStartVisibleChars++;
1052 // decide what is the current state to print (depending of the script tag)
1053 StateToPrint = statePrintForState(state, inScriptType);
1055 // handle script folding
1056 if (fold) {
1057 switch (scriptLanguage) {
1058 case eScriptJS:
1059 case eScriptPHP:
1060 //not currently supported case eScriptVBS:
1062 if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
1063 //Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
1064 //if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
1065 if (ch == '#') {
1066 Sci_Position j = i + 1;
1067 while ((j < lengthDoc) && IsASpaceOrTab(styler.SafeGetCharAt(j))) {
1068 j++;
1070 if (styler.Match(j, "region") || styler.Match(j, "if")) {
1071 levelCurrent++;
1072 } else if (styler.Match(j, "end")) {
1073 levelCurrent--;
1075 } else if ((ch == '{') || (ch == '}') || (foldComment && (ch == '/') && (chNext == '*'))) {
1076 levelCurrent += (((ch == '{') || (ch == '/')) ? 1 : -1);
1078 } else if (((state == SCE_HPHP_COMMENT) || (state == SCE_HJ_COMMENT)) && foldComment && (ch == '*') && (chNext == '/')) {
1079 levelCurrent--;
1081 break;
1082 case eScriptPython:
1083 if (state != SCE_HP_COMMENTLINE && !isMako) {
1084 if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
1085 levelCurrent++;
1086 } else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
1087 // check if the number of tabs is lower than the level
1088 int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
1089 for (Sci_Position j = 0; Findlevel > 0; j++) {
1090 const char chTmp = styler.SafeGetCharAt(i + j + 1);
1091 if (chTmp == '\t') {
1092 Findlevel -= 8;
1093 } else if (chTmp == ' ') {
1094 Findlevel--;
1095 } else {
1096 break;
1100 if (Findlevel > 0) {
1101 levelCurrent -= Findlevel / 8;
1102 if (Findlevel % 8)
1103 levelCurrent--;
1107 break;
1108 default:
1109 break;
1113 if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
1114 // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
1115 // Avoid triggering two times on Dos/Win
1116 // New line -> record any line state onto /next/ line
1117 if (fold) {
1118 int lev = levelPrev;
1119 if (visibleChars == 0)
1120 lev |= SC_FOLDLEVELWHITEFLAG;
1121 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1122 lev |= SC_FOLDLEVELHEADERFLAG;
1124 styler.SetLevel(lineCurrent, lev);
1125 visibleChars = 0;
1126 levelPrev = levelCurrent;
1128 styler.SetLineState(lineCurrent,
1129 ((inScriptType & 0x03) << 0) |
1130 ((tagOpened ? 1 : 0) << 2) |
1131 ((tagClosing ? 1 : 0) << 3) |
1132 ((aspScript & 0x0F) << 4) |
1133 ((clientScript & 0x0F) << 8) |
1134 ((beforePreProc & 0xFF) << 12));
1135 lineCurrent++;
1136 lineStartVisibleChars = 0;
1139 // handle start of Mako comment line
1140 if (isMako && ch == '#' && chNext == '#') {
1141 makoComment = 1;
1142 state = SCE_HP_COMMENTLINE;
1145 // handle end of Mako comment line
1146 else if (isMako && makoComment && (ch == '\r' || ch == '\n')) {
1147 makoComment = 0;
1148 styler.ColourTo(i - 1, StateToPrint);
1149 if (scriptLanguage == eScriptPython) {
1150 state = SCE_HP_DEFAULT;
1151 } else {
1152 state = SCE_H_DEFAULT;
1155 // Allow falling through to mako handling code if newline is going to end a block
1156 if (((ch == '\r' && chNext != '\n') || (ch == '\n')) &&
1157 (!isMako || (makoBlockType != "%"))) {
1159 // Ignore everything in mako comment until the line ends
1160 else if (isMako && makoComment) {
1163 // generic end of script processing
1164 else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
1165 // Check if it's the end of the script tag (or any other HTML tag)
1166 switch (state) {
1167 // in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
1168 case SCE_H_DOUBLESTRING:
1169 case SCE_H_SINGLESTRING:
1170 case SCE_HJ_COMMENT:
1171 case SCE_HJ_COMMENTDOC:
1172 //case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
1173 // the end of script marker from some JS interpreters.
1174 case SCE_HB_COMMENTLINE:
1175 case SCE_HBA_COMMENTLINE:
1176 case SCE_HJ_DOUBLESTRING:
1177 case SCE_HJ_SINGLESTRING:
1178 case SCE_HJ_REGEX:
1179 case SCE_HB_STRING:
1180 case SCE_HBA_STRING:
1181 case SCE_HP_STRING:
1182 case SCE_HP_TRIPLE:
1183 case SCE_HP_TRIPLEDOUBLE:
1184 case SCE_HPHP_HSTRING:
1185 case SCE_HPHP_SIMPLESTRING:
1186 case SCE_HPHP_COMMENT:
1187 case SCE_HPHP_COMMENTLINE:
1188 break;
1189 default :
1190 // check if the closing tag is a script tag
1191 if (const char *tag =
1192 state == SCE_HJ_COMMENTLINE || isXml ? "script" :
1193 state == SCE_H_COMMENT ? "comment" : 0) {
1194 Sci_Position j = i + 2;
1195 int chr;
1196 do {
1197 chr = static_cast<int>(*tag++);
1198 } while (chr != 0 && chr == MakeLowerCase(styler.SafeGetCharAt(j++)));
1199 if (chr != 0) break;
1201 // closing tag of the script (it's a closing HTML tag anyway)
1202 styler.ColourTo(i - 1, StateToPrint);
1203 state = SCE_H_TAGUNKNOWN;
1204 inScriptType = eHtml;
1205 scriptLanguage = eScriptNone;
1206 clientScript = eScriptJS;
1207 i += 2;
1208 visibleChars += 2;
1209 tagClosing = true;
1210 continue;
1214 /////////////////////////////////////
1215 // handle the start of PHP pre-processor = Non-HTML
1216 else if ((state != SCE_H_ASPAT) &&
1217 !isStringState(state) &&
1218 (state != SCE_HPHP_COMMENT) &&
1219 (state != SCE_HPHP_COMMENTLINE) &&
1220 (ch == '<') &&
1221 (chNext == '?') &&
1222 !IsScriptCommentState(state)) {
1223 beforeLanguage = scriptLanguage;
1224 scriptLanguage = segIsScriptingIndicator(styler, i + 2, i + 6, isXml ? eScriptXML : eScriptPHP);
1225 if ((scriptLanguage != eScriptPHP) && (isStringState(state) || (state==SCE_H_COMMENT))) continue;
1226 styler.ColourTo(i - 1, StateToPrint);
1227 beforePreProc = state;
1228 i++;
1229 visibleChars++;
1230 i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 6);
1231 if (scriptLanguage == eScriptXML)
1232 styler.ColourTo(i, SCE_H_XMLSTART);
1233 else
1234 styler.ColourTo(i, SCE_H_QUESTION);
1235 state = StateForScript(scriptLanguage);
1236 if (inScriptType == eNonHtmlScript)
1237 inScriptType = eNonHtmlScriptPreProc;
1238 else
1239 inScriptType = eNonHtmlPreProc;
1240 // Fold whole script, but not if the XML first tag (all XML-like tags in this case)
1241 if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
1242 levelCurrent++;
1244 // should be better
1245 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1246 continue;
1249 // handle the start Mako template Python code
1250 else if (isMako && scriptLanguage == eScriptNone && ((ch == '<' && chNext == '%') ||
1251 (lineStartVisibleChars == 1 && ch == '%') ||
1252 (lineStartVisibleChars == 1 && ch == '/' && chNext == '%') ||
1253 (ch == '$' && chNext == '{') ||
1254 (ch == '<' && chNext == '/' && chNext2 == '%'))) {
1255 if (ch == '%' || ch == '/')
1256 makoBlockType = "%";
1257 else if (ch == '$')
1258 makoBlockType = "{";
1259 else if (chNext == '/')
1260 makoBlockType = GetNextWord(styler, i+3);
1261 else
1262 makoBlockType = GetNextWord(styler, i+2);
1263 styler.ColourTo(i - 1, StateToPrint);
1264 beforePreProc = state;
1265 if (inScriptType == eNonHtmlScript)
1266 inScriptType = eNonHtmlScriptPreProc;
1267 else
1268 inScriptType = eNonHtmlPreProc;
1270 if (chNext == '/') {
1271 i += 2;
1272 visibleChars += 2;
1273 } else if (ch != '%') {
1274 i++;
1275 visibleChars++;
1277 state = SCE_HP_START;
1278 scriptLanguage = eScriptPython;
1279 styler.ColourTo(i, SCE_H_ASP);
1280 if (ch != '%' && ch != '$' && ch != '/') {
1281 i += makoBlockType.length();
1282 visibleChars += static_cast<int>(makoBlockType.length());
1283 if (keywords4.InList(makoBlockType.c_str()))
1284 styler.ColourTo(i, SCE_HP_WORD);
1285 else
1286 styler.ColourTo(i, SCE_H_TAGUNKNOWN);
1289 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1290 continue;
1293 // handle the start/end of Django comment
1294 else if (isDjango && state != SCE_H_COMMENT && (ch == '{' && chNext == '#')) {
1295 styler.ColourTo(i - 1, StateToPrint);
1296 beforePreProc = state;
1297 beforeLanguage = scriptLanguage;
1298 if (inScriptType == eNonHtmlScript)
1299 inScriptType = eNonHtmlScriptPreProc;
1300 else
1301 inScriptType = eNonHtmlPreProc;
1302 i += 1;
1303 visibleChars += 1;
1304 scriptLanguage = eScriptComment;
1305 state = SCE_H_COMMENT;
1306 styler.ColourTo(i, SCE_H_ASP);
1307 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1308 continue;
1309 } else if (isDjango && state == SCE_H_COMMENT && (ch == '#' && chNext == '}')) {
1310 styler.ColourTo(i - 1, StateToPrint);
1311 i += 1;
1312 visibleChars += 1;
1313 styler.ColourTo(i, SCE_H_ASP);
1314 state = beforePreProc;
1315 if (inScriptType == eNonHtmlScriptPreProc)
1316 inScriptType = eNonHtmlScript;
1317 else
1318 inScriptType = eHtml;
1319 scriptLanguage = beforeLanguage;
1320 continue;
1323 // handle the start Django template code
1324 else if (isDjango && scriptLanguage != eScriptPython && scriptLanguage != eScriptComment && (ch == '{' && (chNext == '%' || chNext == '{'))) {
1325 if (chNext == '%')
1326 djangoBlockType = "%";
1327 else
1328 djangoBlockType = "{";
1329 styler.ColourTo(i - 1, StateToPrint);
1330 beforePreProc = state;
1331 if (inScriptType == eNonHtmlScript)
1332 inScriptType = eNonHtmlScriptPreProc;
1333 else
1334 inScriptType = eNonHtmlPreProc;
1336 i += 1;
1337 visibleChars += 1;
1338 state = SCE_HP_START;
1339 beforeLanguage = scriptLanguage;
1340 scriptLanguage = eScriptPython;
1341 styler.ColourTo(i, SCE_H_ASP);
1343 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1344 continue;
1347 // handle the start of ASP pre-processor = Non-HTML
1348 else if (!isMako && !isDjango && !isCommentASPState(state) && (ch == '<') && (chNext == '%') && !isPHPStringState(state)) {
1349 styler.ColourTo(i - 1, StateToPrint);
1350 beforePreProc = state;
1351 if (inScriptType == eNonHtmlScript)
1352 inScriptType = eNonHtmlScriptPreProc;
1353 else
1354 inScriptType = eNonHtmlPreProc;
1356 if (chNext2 == '@') {
1357 i += 2; // place as if it was the second next char treated
1358 visibleChars += 2;
1359 state = SCE_H_ASPAT;
1360 } else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
1361 styler.ColourTo(i + 3, SCE_H_ASP);
1362 state = SCE_H_XCCOMMENT;
1363 scriptLanguage = eScriptVBS;
1364 continue;
1365 } else {
1366 if (chNext2 == '=') {
1367 i += 2; // place as if it was the second next char treated
1368 visibleChars += 2;
1369 } else {
1370 i++; // place as if it was the next char treated
1371 visibleChars++;
1374 state = StateForScript(aspScript);
1376 scriptLanguage = eScriptVBS;
1377 styler.ColourTo(i, SCE_H_ASP);
1378 // fold whole script
1379 if (foldHTMLPreprocessor)
1380 levelCurrent++;
1381 // should be better
1382 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1383 continue;
1386 /////////////////////////////////////
1387 // handle the start of SGML language (DTD)
1388 else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
1389 (chPrev == '<') &&
1390 (ch == '!') &&
1391 (StateToPrint != SCE_H_CDATA) &&
1392 (!IsCommentState(StateToPrint)) &&
1393 (!IsScriptCommentState(StateToPrint))) {
1394 beforePreProc = state;
1395 styler.ColourTo(i - 2, StateToPrint);
1396 if ((chNext == '-') && (chNext2 == '-')) {
1397 state = SCE_H_COMMENT; // wait for a pending command
1398 styler.ColourTo(i + 2, SCE_H_COMMENT);
1399 i += 2; // follow styling after the --
1400 } else if (isWordCdata(i + 1, i + 7, styler)) {
1401 state = SCE_H_CDATA;
1402 } else {
1403 styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
1404 scriptLanguage = eScriptSGML;
1405 state = SCE_H_SGML_COMMAND; // wait for a pending command
1407 // fold whole tag (-- when closing the tag)
1408 if (foldHTMLPreprocessor || state == SCE_H_COMMENT || state == SCE_H_CDATA)
1409 levelCurrent++;
1410 continue;
1413 // handle the end of Mako Python code
1414 else if (isMako &&
1415 ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1416 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1417 isMakoBlockEnd(ch, chNext, makoBlockType)) {
1418 if (state == SCE_H_ASPAT) {
1419 aspScript = segIsScriptingIndicator(styler,
1420 styler.GetStartSegment(), i - 1, aspScript);
1422 if (state == SCE_HP_WORD) {
1423 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1424 } else {
1425 styler.ColourTo(i - 1, StateToPrint);
1427 if ((makoBlockType != "%") && (makoBlockType != "{") && ch != '>') {
1428 i++;
1429 visibleChars++;
1431 else if ((makoBlockType == "%") && ch == '/') {
1432 i++;
1433 visibleChars++;
1435 if ((makoBlockType != "%") || ch == '/') {
1436 styler.ColourTo(i, SCE_H_ASP);
1438 state = beforePreProc;
1439 if (inScriptType == eNonHtmlScriptPreProc)
1440 inScriptType = eNonHtmlScript;
1441 else
1442 inScriptType = eHtml;
1443 scriptLanguage = eScriptNone;
1444 continue;
1447 // handle the end of Django template code
1448 else if (isDjango &&
1449 ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1450 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1451 isDjangoBlockEnd(ch, chNext, djangoBlockType)) {
1452 if (state == SCE_H_ASPAT) {
1453 aspScript = segIsScriptingIndicator(styler,
1454 styler.GetStartSegment(), i - 1, aspScript);
1456 if (state == SCE_HP_WORD) {
1457 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1458 } else {
1459 styler.ColourTo(i - 1, StateToPrint);
1461 i += 1;
1462 visibleChars += 1;
1463 styler.ColourTo(i, SCE_H_ASP);
1464 state = beforePreProc;
1465 if (inScriptType == eNonHtmlScriptPreProc)
1466 inScriptType = eNonHtmlScript;
1467 else
1468 inScriptType = eHtml;
1469 scriptLanguage = beforeLanguage;
1470 continue;
1473 // handle the end of a pre-processor = Non-HTML
1474 else if ((!isMako && !isDjango && ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1475 (((scriptLanguage != eScriptNone) && stateAllowsTermination(state))) &&
1476 (((ch == '%') || (ch == '?')) && (chNext == '>'))) ||
1477 ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
1478 if (state == SCE_H_ASPAT) {
1479 aspScript = segIsScriptingIndicator(styler,
1480 styler.GetStartSegment(), i - 1, aspScript);
1482 // Bounce out of any ASP mode
1483 switch (state) {
1484 case SCE_HJ_WORD:
1485 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1486 break;
1487 case SCE_HB_WORD:
1488 classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1489 break;
1490 case SCE_HP_WORD:
1491 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1492 break;
1493 case SCE_HPHP_WORD:
1494 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1495 break;
1496 case SCE_H_XCCOMMENT:
1497 styler.ColourTo(i - 1, state);
1498 break;
1499 default :
1500 styler.ColourTo(i - 1, StateToPrint);
1501 break;
1503 if (scriptLanguage != eScriptSGML) {
1504 i++;
1505 visibleChars++;
1507 if (ch == '%')
1508 styler.ColourTo(i, SCE_H_ASP);
1509 else if (scriptLanguage == eScriptXML)
1510 styler.ColourTo(i, SCE_H_XMLEND);
1511 else if (scriptLanguage == eScriptSGML)
1512 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1513 else
1514 styler.ColourTo(i, SCE_H_QUESTION);
1515 state = beforePreProc;
1516 if (inScriptType == eNonHtmlScriptPreProc)
1517 inScriptType = eNonHtmlScript;
1518 else
1519 inScriptType = eHtml;
1520 // Unfold all scripting languages, except for XML tag
1521 if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
1522 levelCurrent--;
1524 scriptLanguage = beforeLanguage;
1525 continue;
1527 /////////////////////////////////////
1529 switch (state) {
1530 case SCE_H_DEFAULT:
1531 if (ch == '<') {
1532 // in HTML, fold on tag open and unfold on tag close
1533 tagOpened = true;
1534 tagClosing = (chNext == '/');
1535 styler.ColourTo(i - 1, StateToPrint);
1536 if (chNext != '!')
1537 state = SCE_H_TAGUNKNOWN;
1538 } else if (ch == '&') {
1539 styler.ColourTo(i - 1, SCE_H_DEFAULT);
1540 state = SCE_H_ENTITY;
1542 break;
1543 case SCE_H_SGML_DEFAULT:
1544 case SCE_H_SGML_BLOCK_DEFAULT:
1545 // if (scriptLanguage == eScriptSGMLblock)
1546 // StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
1548 if (ch == '\"') {
1549 styler.ColourTo(i - 1, StateToPrint);
1550 state = SCE_H_SGML_DOUBLESTRING;
1551 } else if (ch == '\'') {
1552 styler.ColourTo(i - 1, StateToPrint);
1553 state = SCE_H_SGML_SIMPLESTRING;
1554 } else if ((ch == '-') && (chPrev == '-')) {
1555 if (static_cast<Sci_Position>(styler.GetStartSegment()) <= (i - 2)) {
1556 styler.ColourTo(i - 2, StateToPrint);
1558 state = SCE_H_SGML_COMMENT;
1559 } else if (IsASCII(ch) && isalpha(ch) && (chPrev == '%')) {
1560 styler.ColourTo(i - 2, StateToPrint);
1561 state = SCE_H_SGML_ENTITY;
1562 } else if (ch == '#') {
1563 styler.ColourTo(i - 1, StateToPrint);
1564 state = SCE_H_SGML_SPECIAL;
1565 } else if (ch == '[') {
1566 styler.ColourTo(i - 1, StateToPrint);
1567 scriptLanguage = eScriptSGMLblock;
1568 state = SCE_H_SGML_BLOCK_DEFAULT;
1569 } else if (ch == ']') {
1570 if (scriptLanguage == eScriptSGMLblock) {
1571 styler.ColourTo(i, StateToPrint);
1572 scriptLanguage = eScriptSGML;
1573 } else {
1574 styler.ColourTo(i - 1, StateToPrint);
1575 styler.ColourTo(i, SCE_H_SGML_ERROR);
1577 state = SCE_H_SGML_DEFAULT;
1578 } else if (scriptLanguage == eScriptSGMLblock) {
1579 if ((ch == '!') && (chPrev == '<')) {
1580 styler.ColourTo(i - 2, StateToPrint);
1581 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1582 state = SCE_H_SGML_COMMAND;
1583 } else if (ch == '>') {
1584 styler.ColourTo(i - 1, StateToPrint);
1585 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1588 break;
1589 case SCE_H_SGML_COMMAND:
1590 if ((ch == '-') && (chPrev == '-')) {
1591 styler.ColourTo(i - 2, StateToPrint);
1592 state = SCE_H_SGML_COMMENT;
1593 } else if (!issgmlwordchar(ch)) {
1594 if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
1595 styler.ColourTo(i - 1, StateToPrint);
1596 state = SCE_H_SGML_1ST_PARAM;
1597 } else {
1598 state = SCE_H_SGML_ERROR;
1601 break;
1602 case SCE_H_SGML_1ST_PARAM:
1603 // wait for the beginning of the word
1604 if ((ch == '-') && (chPrev == '-')) {
1605 if (scriptLanguage == eScriptSGMLblock) {
1606 styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
1607 } else {
1608 styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
1610 state = SCE_H_SGML_1ST_PARAM_COMMENT;
1611 } else if (issgmlwordchar(ch)) {
1612 if (scriptLanguage == eScriptSGMLblock) {
1613 styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
1614 } else {
1615 styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
1617 // find the length of the word
1618 int size = 1;
1619 while (setHTMLWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(i + size))))
1620 size++;
1621 styler.ColourTo(i + size - 1, StateToPrint);
1622 i += size - 1;
1623 visibleChars += size - 1;
1624 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1625 if (scriptLanguage == eScriptSGMLblock) {
1626 state = SCE_H_SGML_BLOCK_DEFAULT;
1627 } else {
1628 state = SCE_H_SGML_DEFAULT;
1630 continue;
1632 break;
1633 case SCE_H_SGML_ERROR:
1634 if ((ch == '-') && (chPrev == '-')) {
1635 styler.ColourTo(i - 2, StateToPrint);
1636 state = SCE_H_SGML_COMMENT;
1638 break;
1639 case SCE_H_SGML_DOUBLESTRING:
1640 if (ch == '\"') {
1641 styler.ColourTo(i, StateToPrint);
1642 state = SCE_H_SGML_DEFAULT;
1644 break;
1645 case SCE_H_SGML_SIMPLESTRING:
1646 if (ch == '\'') {
1647 styler.ColourTo(i, StateToPrint);
1648 state = SCE_H_SGML_DEFAULT;
1650 break;
1651 case SCE_H_SGML_COMMENT:
1652 if ((ch == '-') && (chPrev == '-')) {
1653 styler.ColourTo(i, StateToPrint);
1654 state = SCE_H_SGML_DEFAULT;
1656 break;
1657 case SCE_H_CDATA:
1658 if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
1659 styler.ColourTo(i, StateToPrint);
1660 state = SCE_H_DEFAULT;
1661 levelCurrent--;
1663 break;
1664 case SCE_H_COMMENT:
1665 if ((scriptLanguage != eScriptComment) && (chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
1666 styler.ColourTo(i, StateToPrint);
1667 state = SCE_H_DEFAULT;
1668 levelCurrent--;
1670 break;
1671 case SCE_H_SGML_1ST_PARAM_COMMENT:
1672 if ((ch == '-') && (chPrev == '-')) {
1673 styler.ColourTo(i, SCE_H_SGML_COMMENT);
1674 state = SCE_H_SGML_1ST_PARAM;
1676 break;
1677 case SCE_H_SGML_SPECIAL:
1678 if (!(IsASCII(ch) && isupper(ch))) {
1679 styler.ColourTo(i - 1, StateToPrint);
1680 if (isalnum(ch)) {
1681 state = SCE_H_SGML_ERROR;
1682 } else {
1683 state = SCE_H_SGML_DEFAULT;
1686 break;
1687 case SCE_H_SGML_ENTITY:
1688 if (ch == ';') {
1689 styler.ColourTo(i, StateToPrint);
1690 state = SCE_H_SGML_DEFAULT;
1691 } else if (!(IsASCII(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
1692 styler.ColourTo(i, SCE_H_SGML_ERROR);
1693 state = SCE_H_SGML_DEFAULT;
1695 break;
1696 case SCE_H_ENTITY:
1697 if (ch == ';') {
1698 styler.ColourTo(i, StateToPrint);
1699 state = SCE_H_DEFAULT;
1701 if (ch != '#' && !(IsASCII(ch) && isalnum(ch)) // Should check that '#' follows '&', but it is unlikely anyway...
1702 && ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
1703 if (!IsASCII(ch)) // Possibly start of a multibyte character so don't allow this byte to be in entity style
1704 styler.ColourTo(i-1, SCE_H_TAGUNKNOWN);
1705 else
1706 styler.ColourTo(i, SCE_H_TAGUNKNOWN);
1707 state = SCE_H_DEFAULT;
1709 break;
1710 case SCE_H_TAGUNKNOWN:
1711 if (!setTagContinue.Contains(ch) && !((ch == '/') && (chPrev == '<'))) {
1712 int eClass = classifyTagHTML(styler.GetStartSegment(),
1713 i - 1, keywords, styler, tagDontFold, caseSensitive, isXml, allowScripts, nonFoldingTags);
1714 if (eClass == SCE_H_SCRIPT || eClass == SCE_H_COMMENT) {
1715 if (!tagClosing) {
1716 inScriptType = eNonHtmlScript;
1717 scriptLanguage = eClass == SCE_H_SCRIPT ? clientScript : eScriptComment;
1718 } else {
1719 scriptLanguage = eScriptNone;
1721 eClass = SCE_H_TAG;
1723 if (ch == '>') {
1724 styler.ColourTo(i, eClass);
1725 if (inScriptType == eNonHtmlScript) {
1726 state = StateForScript(scriptLanguage);
1727 } else {
1728 state = SCE_H_DEFAULT;
1730 tagOpened = false;
1731 if (!tagDontFold) {
1732 if (tagClosing) {
1733 levelCurrent--;
1734 } else {
1735 levelCurrent++;
1738 tagClosing = false;
1739 } else if (ch == '/' && chNext == '>') {
1740 if (eClass == SCE_H_TAGUNKNOWN) {
1741 styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
1742 } else {
1743 styler.ColourTo(i - 1, StateToPrint);
1744 styler.ColourTo(i + 1, SCE_H_TAGEND);
1746 i++;
1747 ch = chNext;
1748 state = SCE_H_DEFAULT;
1749 tagOpened = false;
1750 } else {
1751 if (eClass != SCE_H_TAGUNKNOWN) {
1752 if (eClass == SCE_H_SGML_DEFAULT) {
1753 state = SCE_H_SGML_DEFAULT;
1754 } else {
1755 state = SCE_H_OTHER;
1760 break;
1761 case SCE_H_ATTRIBUTE:
1762 if (!setAttributeContinue.Contains(ch)) {
1763 if (inScriptType == eNonHtmlScript) {
1764 const int scriptLanguagePrev = scriptLanguage;
1765 clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
1766 scriptLanguage = clientScript;
1767 if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
1768 inScriptType = eHtml;
1770 classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
1771 if (ch == '>') {
1772 styler.ColourTo(i, SCE_H_TAG);
1773 if (inScriptType == eNonHtmlScript) {
1774 state = StateForScript(scriptLanguage);
1775 } else {
1776 state = SCE_H_DEFAULT;
1778 tagOpened = false;
1779 if (!tagDontFold) {
1780 if (tagClosing) {
1781 levelCurrent--;
1782 } else {
1783 levelCurrent++;
1786 tagClosing = false;
1787 } else if (ch == '=') {
1788 styler.ColourTo(i, SCE_H_OTHER);
1789 state = SCE_H_VALUE;
1790 } else {
1791 state = SCE_H_OTHER;
1794 break;
1795 case SCE_H_OTHER:
1796 if (ch == '>') {
1797 styler.ColourTo(i - 1, StateToPrint);
1798 styler.ColourTo(i, SCE_H_TAG);
1799 if (inScriptType == eNonHtmlScript) {
1800 state = StateForScript(scriptLanguage);
1801 } else {
1802 state = SCE_H_DEFAULT;
1804 tagOpened = false;
1805 if (!tagDontFold) {
1806 if (tagClosing) {
1807 levelCurrent--;
1808 } else {
1809 levelCurrent++;
1812 tagClosing = false;
1813 } else if (ch == '\"') {
1814 styler.ColourTo(i - 1, StateToPrint);
1815 state = SCE_H_DOUBLESTRING;
1816 } else if (ch == '\'') {
1817 styler.ColourTo(i - 1, StateToPrint);
1818 state = SCE_H_SINGLESTRING;
1819 } else if (ch == '=') {
1820 styler.ColourTo(i, StateToPrint);
1821 state = SCE_H_VALUE;
1822 } else if (ch == '/' && chNext == '>') {
1823 styler.ColourTo(i - 1, StateToPrint);
1824 styler.ColourTo(i + 1, SCE_H_TAGEND);
1825 i++;
1826 ch = chNext;
1827 state = SCE_H_DEFAULT;
1828 tagOpened = false;
1829 } else if (ch == '?' && chNext == '>') {
1830 styler.ColourTo(i - 1, StateToPrint);
1831 styler.ColourTo(i + 1, SCE_H_XMLEND);
1832 i++;
1833 ch = chNext;
1834 state = SCE_H_DEFAULT;
1835 } else if (setHTMLWord.Contains(ch)) {
1836 styler.ColourTo(i - 1, StateToPrint);
1837 state = SCE_H_ATTRIBUTE;
1839 break;
1840 case SCE_H_DOUBLESTRING:
1841 if (ch == '\"') {
1842 if (inScriptType == eNonHtmlScript) {
1843 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1845 styler.ColourTo(i, SCE_H_DOUBLESTRING);
1846 state = SCE_H_OTHER;
1848 break;
1849 case SCE_H_SINGLESTRING:
1850 if (ch == '\'') {
1851 if (inScriptType == eNonHtmlScript) {
1852 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1854 styler.ColourTo(i, SCE_H_SINGLESTRING);
1855 state = SCE_H_OTHER;
1857 break;
1858 case SCE_H_VALUE:
1859 if (!setHTMLWord.Contains(ch)) {
1860 if (ch == '\"' && chPrev == '=') {
1861 // Should really test for being first character
1862 state = SCE_H_DOUBLESTRING;
1863 } else if (ch == '\'' && chPrev == '=') {
1864 state = SCE_H_SINGLESTRING;
1865 } else {
1866 if (IsNumber(styler.GetStartSegment(), styler)) {
1867 styler.ColourTo(i - 1, SCE_H_NUMBER);
1868 } else {
1869 styler.ColourTo(i - 1, StateToPrint);
1871 if (ch == '>') {
1872 styler.ColourTo(i, SCE_H_TAG);
1873 if (inScriptType == eNonHtmlScript) {
1874 state = StateForScript(scriptLanguage);
1875 } else {
1876 state = SCE_H_DEFAULT;
1878 tagOpened = false;
1879 if (!tagDontFold) {
1880 if (tagClosing) {
1881 levelCurrent--;
1882 } else {
1883 levelCurrent++;
1886 tagClosing = false;
1887 } else {
1888 state = SCE_H_OTHER;
1892 break;
1893 case SCE_HJ_DEFAULT:
1894 case SCE_HJ_START:
1895 case SCE_HJ_SYMBOLS:
1896 if (IsAWordStart(ch)) {
1897 styler.ColourTo(i - 1, StateToPrint);
1898 state = SCE_HJ_WORD;
1899 } else if (ch == '/' && chNext == '*') {
1900 styler.ColourTo(i - 1, StateToPrint);
1901 if (chNext2 == '*')
1902 state = SCE_HJ_COMMENTDOC;
1903 else
1904 state = SCE_HJ_COMMENT;
1905 if (chNext2 == '/') {
1906 // Eat the * so it isn't used for the end of the comment
1907 i++;
1909 } else if (ch == '/' && chNext == '/') {
1910 styler.ColourTo(i - 1, StateToPrint);
1911 state = SCE_HJ_COMMENTLINE;
1912 } else if (ch == '/' && setOKBeforeJSRE.Contains(chPrevNonWhite)) {
1913 styler.ColourTo(i - 1, StateToPrint);
1914 state = SCE_HJ_REGEX;
1915 } else if (ch == '\"') {
1916 styler.ColourTo(i - 1, StateToPrint);
1917 state = SCE_HJ_DOUBLESTRING;
1918 } else if (ch == '\'') {
1919 styler.ColourTo(i - 1, StateToPrint);
1920 state = SCE_HJ_SINGLESTRING;
1921 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1922 styler.SafeGetCharAt(i + 3) == '-') {
1923 styler.ColourTo(i - 1, StateToPrint);
1924 state = SCE_HJ_COMMENTLINE;
1925 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1926 styler.ColourTo(i - 1, StateToPrint);
1927 state = SCE_HJ_COMMENTLINE;
1928 i += 2;
1929 } else if (IsOperator(ch)) {
1930 styler.ColourTo(i - 1, StateToPrint);
1931 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1932 state = SCE_HJ_DEFAULT;
1933 } else if ((ch == ' ') || (ch == '\t')) {
1934 if (state == SCE_HJ_START) {
1935 styler.ColourTo(i - 1, StateToPrint);
1936 state = SCE_HJ_DEFAULT;
1939 break;
1940 case SCE_HJ_WORD:
1941 if (!IsAWordChar(ch)) {
1942 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1943 //styler.ColourTo(i - 1, eHTJSKeyword);
1944 state = SCE_HJ_DEFAULT;
1945 if (ch == '/' && chNext == '*') {
1946 if (chNext2 == '*')
1947 state = SCE_HJ_COMMENTDOC;
1948 else
1949 state = SCE_HJ_COMMENT;
1950 } else if (ch == '/' && chNext == '/') {
1951 state = SCE_HJ_COMMENTLINE;
1952 } else if (ch == '\"') {
1953 state = SCE_HJ_DOUBLESTRING;
1954 } else if (ch == '\'') {
1955 state = SCE_HJ_SINGLESTRING;
1956 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1957 styler.ColourTo(i - 1, StateToPrint);
1958 state = SCE_HJ_COMMENTLINE;
1959 i += 2;
1960 } else if (IsOperator(ch)) {
1961 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1962 state = SCE_HJ_DEFAULT;
1965 break;
1966 case SCE_HJ_COMMENT:
1967 case SCE_HJ_COMMENTDOC:
1968 if (ch == '/' && chPrev == '*') {
1969 styler.ColourTo(i, StateToPrint);
1970 state = SCE_HJ_DEFAULT;
1971 ch = ' ';
1973 break;
1974 case SCE_HJ_COMMENTLINE:
1975 if (ch == '\r' || ch == '\n') {
1976 styler.ColourTo(i - 1, statePrintForState(SCE_HJ_COMMENTLINE, inScriptType));
1977 state = SCE_HJ_DEFAULT;
1978 ch = ' ';
1980 break;
1981 case SCE_HJ_DOUBLESTRING:
1982 if (ch == '\\') {
1983 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1984 i++;
1986 } else if (ch == '\"') {
1987 styler.ColourTo(i, statePrintForState(SCE_HJ_DOUBLESTRING, inScriptType));
1988 state = SCE_HJ_DEFAULT;
1989 } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1990 styler.ColourTo(i - 1, StateToPrint);
1991 state = SCE_HJ_COMMENTLINE;
1992 i += 2;
1993 } else if (isLineEnd(ch)) {
1994 styler.ColourTo(i - 1, StateToPrint);
1995 state = SCE_HJ_STRINGEOL;
1997 break;
1998 case SCE_HJ_SINGLESTRING:
1999 if (ch == '\\') {
2000 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
2001 i++;
2003 } else if (ch == '\'') {
2004 styler.ColourTo(i, statePrintForState(SCE_HJ_SINGLESTRING, inScriptType));
2005 state = SCE_HJ_DEFAULT;
2006 } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
2007 styler.ColourTo(i - 1, StateToPrint);
2008 state = SCE_HJ_COMMENTLINE;
2009 i += 2;
2010 } else if (isLineEnd(ch)) {
2011 styler.ColourTo(i - 1, StateToPrint);
2012 if (chPrev != '\\' && (chPrev2 != '\\' || chPrev != '\r' || ch != '\n')) {
2013 state = SCE_HJ_STRINGEOL;
2016 break;
2017 case SCE_HJ_STRINGEOL:
2018 if (!isLineEnd(ch)) {
2019 styler.ColourTo(i - 1, StateToPrint);
2020 state = SCE_HJ_DEFAULT;
2021 } else if (!isLineEnd(chNext)) {
2022 styler.ColourTo(i, StateToPrint);
2023 state = SCE_HJ_DEFAULT;
2025 break;
2026 case SCE_HJ_REGEX:
2027 if (ch == '\r' || ch == '\n' || ch == '/') {
2028 if (ch == '/') {
2029 while (IsASCII(chNext) && islower(chNext)) { // gobble regex flags
2030 i++;
2031 ch = chNext;
2032 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2035 styler.ColourTo(i, StateToPrint);
2036 state = SCE_HJ_DEFAULT;
2037 } else if (ch == '\\') {
2038 // Gobble up the quoted character
2039 if (chNext == '\\' || chNext == '/') {
2040 i++;
2041 ch = chNext;
2042 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2045 break;
2046 case SCE_HB_DEFAULT:
2047 case SCE_HB_START:
2048 if (IsAWordStart(ch)) {
2049 styler.ColourTo(i - 1, StateToPrint);
2050 state = SCE_HB_WORD;
2051 } else if (ch == '\'') {
2052 styler.ColourTo(i - 1, StateToPrint);
2053 state = SCE_HB_COMMENTLINE;
2054 } else if (ch == '\"') {
2055 styler.ColourTo(i - 1, StateToPrint);
2056 state = SCE_HB_STRING;
2057 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
2058 styler.SafeGetCharAt(i + 3) == '-') {
2059 styler.ColourTo(i - 1, StateToPrint);
2060 state = SCE_HB_COMMENTLINE;
2061 } else if (IsOperator(ch)) {
2062 styler.ColourTo(i - 1, StateToPrint);
2063 styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
2064 state = SCE_HB_DEFAULT;
2065 } else if ((ch == ' ') || (ch == '\t')) {
2066 if (state == SCE_HB_START) {
2067 styler.ColourTo(i - 1, StateToPrint);
2068 state = SCE_HB_DEFAULT;
2071 break;
2072 case SCE_HB_WORD:
2073 if (!IsAWordChar(ch)) {
2074 state = classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
2075 if (state == SCE_HB_DEFAULT) {
2076 if (ch == '\"') {
2077 state = SCE_HB_STRING;
2078 } else if (ch == '\'') {
2079 state = SCE_HB_COMMENTLINE;
2080 } else if (IsOperator(ch)) {
2081 styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
2082 state = SCE_HB_DEFAULT;
2086 break;
2087 case SCE_HB_STRING:
2088 if (ch == '\"') {
2089 styler.ColourTo(i, StateToPrint);
2090 state = SCE_HB_DEFAULT;
2091 } else if (ch == '\r' || ch == '\n') {
2092 styler.ColourTo(i - 1, StateToPrint);
2093 state = SCE_HB_STRINGEOL;
2095 break;
2096 case SCE_HB_COMMENTLINE:
2097 if (ch == '\r' || ch == '\n') {
2098 styler.ColourTo(i - 1, StateToPrint);
2099 state = SCE_HB_DEFAULT;
2101 break;
2102 case SCE_HB_STRINGEOL:
2103 if (!isLineEnd(ch)) {
2104 styler.ColourTo(i - 1, StateToPrint);
2105 state = SCE_HB_DEFAULT;
2106 } else if (!isLineEnd(chNext)) {
2107 styler.ColourTo(i, StateToPrint);
2108 state = SCE_HB_DEFAULT;
2110 break;
2111 case SCE_HP_DEFAULT:
2112 case SCE_HP_START:
2113 if (IsAWordStart(ch)) {
2114 styler.ColourTo(i - 1, StateToPrint);
2115 state = SCE_HP_WORD;
2116 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
2117 styler.SafeGetCharAt(i + 3) == '-') {
2118 styler.ColourTo(i - 1, StateToPrint);
2119 state = SCE_HP_COMMENTLINE;
2120 } else if (ch == '#') {
2121 styler.ColourTo(i - 1, StateToPrint);
2122 state = SCE_HP_COMMENTLINE;
2123 } else if (ch == '\"') {
2124 styler.ColourTo(i - 1, StateToPrint);
2125 if (chNext == '\"' && chNext2 == '\"') {
2126 i += 2;
2127 state = SCE_HP_TRIPLEDOUBLE;
2128 ch = ' ';
2129 chPrev = ' ';
2130 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2131 } else {
2132 // state = statePrintForState(SCE_HP_STRING,inScriptType);
2133 state = SCE_HP_STRING;
2135 } else if (ch == '\'') {
2136 styler.ColourTo(i - 1, StateToPrint);
2137 if (chNext == '\'' && chNext2 == '\'') {
2138 i += 2;
2139 state = SCE_HP_TRIPLE;
2140 ch = ' ';
2141 chPrev = ' ';
2142 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2143 } else {
2144 state = SCE_HP_CHARACTER;
2146 } else if (IsOperator(ch)) {
2147 styler.ColourTo(i - 1, StateToPrint);
2148 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
2149 } else if ((ch == ' ') || (ch == '\t')) {
2150 if (state == SCE_HP_START) {
2151 styler.ColourTo(i - 1, StateToPrint);
2152 state = SCE_HP_DEFAULT;
2155 break;
2156 case SCE_HP_WORD:
2157 if (!IsAWordChar(ch)) {
2158 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
2159 state = SCE_HP_DEFAULT;
2160 if (ch == '#') {
2161 state = SCE_HP_COMMENTLINE;
2162 } else if (ch == '\"') {
2163 if (chNext == '\"' && chNext2 == '\"') {
2164 i += 2;
2165 state = SCE_HP_TRIPLEDOUBLE;
2166 ch = ' ';
2167 chPrev = ' ';
2168 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2169 } else {
2170 state = SCE_HP_STRING;
2172 } else if (ch == '\'') {
2173 if (chNext == '\'' && chNext2 == '\'') {
2174 i += 2;
2175 state = SCE_HP_TRIPLE;
2176 ch = ' ';
2177 chPrev = ' ';
2178 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2179 } else {
2180 state = SCE_HP_CHARACTER;
2182 } else if (IsOperator(ch)) {
2183 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
2186 break;
2187 case SCE_HP_COMMENTLINE:
2188 if (ch == '\r' || ch == '\n') {
2189 styler.ColourTo(i - 1, StateToPrint);
2190 state = SCE_HP_DEFAULT;
2192 break;
2193 case SCE_HP_STRING:
2194 if (ch == '\\') {
2195 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
2196 i++;
2197 ch = chNext;
2198 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2200 } else if (ch == '\"') {
2201 styler.ColourTo(i, StateToPrint);
2202 state = SCE_HP_DEFAULT;
2204 break;
2205 case SCE_HP_CHARACTER:
2206 if (ch == '\\') {
2207 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
2208 i++;
2209 ch = chNext;
2210 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2212 } else if (ch == '\'') {
2213 styler.ColourTo(i, StateToPrint);
2214 state = SCE_HP_DEFAULT;
2216 break;
2217 case SCE_HP_TRIPLE:
2218 if (ch == '\'' && chPrev == '\'' && chPrev2 == '\'') {
2219 styler.ColourTo(i, StateToPrint);
2220 state = SCE_HP_DEFAULT;
2222 break;
2223 case SCE_HP_TRIPLEDOUBLE:
2224 if (ch == '\"' && chPrev == '\"' && chPrev2 == '\"') {
2225 styler.ColourTo(i, StateToPrint);
2226 state = SCE_HP_DEFAULT;
2228 break;
2229 ///////////// start - PHP state handling
2230 case SCE_HPHP_WORD:
2231 if (!IsAWordChar(ch)) {
2232 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
2233 if (ch == '/' && chNext == '*') {
2234 i++;
2235 state = SCE_HPHP_COMMENT;
2236 } else if (ch == '/' && chNext == '/') {
2237 i++;
2238 state = SCE_HPHP_COMMENTLINE;
2239 } else if (ch == '#') {
2240 state = SCE_HPHP_COMMENTLINE;
2241 } else if (ch == '\"') {
2242 state = SCE_HPHP_HSTRING;
2243 phpStringDelimiter = "\"";
2244 } else if (styler.Match(i, "<<<")) {
2245 bool isSimpleString = false;
2246 i = FindPhpStringDelimiter(phpStringDelimiter, i + 3, lengthDoc, styler, isSimpleString);
2247 if (!phpStringDelimiter.empty()) {
2248 state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING);
2249 if (foldHeredoc) levelCurrent++;
2251 } else if (ch == '\'') {
2252 state = SCE_HPHP_SIMPLESTRING;
2253 phpStringDelimiter = "\'";
2254 } else if (ch == '$' && IsPhpWordStart(chNext)) {
2255 state = SCE_HPHP_VARIABLE;
2256 } else if (IsOperator(ch)) {
2257 state = SCE_HPHP_OPERATOR;
2258 } else {
2259 state = SCE_HPHP_DEFAULT;
2262 break;
2263 case SCE_HPHP_NUMBER:
2264 // recognize bases 8,10 or 16 integers OR floating-point numbers
2265 if (!IsADigit(ch)
2266 && strchr(".xXabcdefABCDEF", ch) == NULL
2267 && ((ch != '-' && ch != '+') || (chPrev != 'e' && chPrev != 'E'))) {
2268 styler.ColourTo(i - 1, SCE_HPHP_NUMBER);
2269 if (IsOperator(ch))
2270 state = SCE_HPHP_OPERATOR;
2271 else
2272 state = SCE_HPHP_DEFAULT;
2274 break;
2275 case SCE_HPHP_VARIABLE:
2276 if (!IsPhpWordChar(chNext)) {
2277 styler.ColourTo(i, SCE_HPHP_VARIABLE);
2278 state = SCE_HPHP_DEFAULT;
2280 break;
2281 case SCE_HPHP_COMMENT:
2282 if (ch == '/' && chPrev == '*') {
2283 styler.ColourTo(i, StateToPrint);
2284 state = SCE_HPHP_DEFAULT;
2286 break;
2287 case SCE_HPHP_COMMENTLINE:
2288 if (ch == '\r' || ch == '\n') {
2289 styler.ColourTo(i - 1, StateToPrint);
2290 state = SCE_HPHP_DEFAULT;
2292 break;
2293 case SCE_HPHP_HSTRING:
2294 if (ch == '\\' && ((phpStringDelimiter == "\"") || chNext == '$' || chNext == '{')) {
2295 // skip the next char
2296 i++;
2297 } else if (((ch == '{' && chNext == '$') || (ch == '$' && chNext == '{'))
2298 && IsPhpWordStart(chNext2)) {
2299 styler.ColourTo(i - 1, StateToPrint);
2300 state = SCE_HPHP_COMPLEX_VARIABLE;
2301 } else if (ch == '$' && IsPhpWordStart(chNext)) {
2302 styler.ColourTo(i - 1, StateToPrint);
2303 state = SCE_HPHP_HSTRING_VARIABLE;
2304 } else if (styler.Match(i, phpStringDelimiter.c_str())) {
2305 if (phpStringDelimiter == "\"") {
2306 styler.ColourTo(i, StateToPrint);
2307 state = SCE_HPHP_DEFAULT;
2308 } else if (isLineEnd(chPrev)) {
2309 const int psdLength = static_cast<int>(phpStringDelimiter.length());
2310 const char chAfterPsd = styler.SafeGetCharAt(i + psdLength);
2311 const char chAfterPsd2 = styler.SafeGetCharAt(i + psdLength + 1);
2312 if (isLineEnd(chAfterPsd) ||
2313 (chAfterPsd == ';' && isLineEnd(chAfterPsd2))) {
2314 i += (((i + psdLength) < lengthDoc) ? psdLength : lengthDoc) - 1;
2315 styler.ColourTo(i, StateToPrint);
2316 state = SCE_HPHP_DEFAULT;
2317 if (foldHeredoc) levelCurrent--;
2321 break;
2322 case SCE_HPHP_SIMPLESTRING:
2323 if (phpStringDelimiter == "\'") {
2324 if (ch == '\\') {
2325 // skip the next char
2326 i++;
2327 } else if (ch == '\'') {
2328 styler.ColourTo(i, StateToPrint);
2329 state = SCE_HPHP_DEFAULT;
2331 } else if (isLineEnd(chPrev) && styler.Match(i, phpStringDelimiter.c_str())) {
2332 const int psdLength = static_cast<int>(phpStringDelimiter.length());
2333 const char chAfterPsd = styler.SafeGetCharAt(i + psdLength);
2334 const char chAfterPsd2 = styler.SafeGetCharAt(i + psdLength + 1);
2335 if (isLineEnd(chAfterPsd) ||
2336 (chAfterPsd == ';' && isLineEnd(chAfterPsd2))) {
2337 i += (((i + psdLength) < lengthDoc) ? psdLength : lengthDoc) - 1;
2338 styler.ColourTo(i, StateToPrint);
2339 state = SCE_HPHP_DEFAULT;
2340 if (foldHeredoc) levelCurrent--;
2343 break;
2344 case SCE_HPHP_HSTRING_VARIABLE:
2345 if (!IsPhpWordChar(chNext)) {
2346 styler.ColourTo(i, StateToPrint);
2347 state = SCE_HPHP_HSTRING;
2349 break;
2350 case SCE_HPHP_COMPLEX_VARIABLE:
2351 if (ch == '}') {
2352 styler.ColourTo(i, StateToPrint);
2353 state = SCE_HPHP_HSTRING;
2355 break;
2356 case SCE_HPHP_OPERATOR:
2357 case SCE_HPHP_DEFAULT:
2358 styler.ColourTo(i - 1, StateToPrint);
2359 if (IsADigit(ch) || (ch == '.' && IsADigit(chNext))) {
2360 state = SCE_HPHP_NUMBER;
2361 } else if (IsAWordStart(ch)) {
2362 state = SCE_HPHP_WORD;
2363 } else if (ch == '/' && chNext == '*') {
2364 i++;
2365 state = SCE_HPHP_COMMENT;
2366 } else if (ch == '/' && chNext == '/') {
2367 i++;
2368 state = SCE_HPHP_COMMENTLINE;
2369 } else if (ch == '#') {
2370 state = SCE_HPHP_COMMENTLINE;
2371 } else if (ch == '\"') {
2372 state = SCE_HPHP_HSTRING;
2373 phpStringDelimiter = "\"";
2374 } else if (styler.Match(i, "<<<")) {
2375 bool isSimpleString = false;
2376 i = FindPhpStringDelimiter(phpStringDelimiter, i + 3, lengthDoc, styler, isSimpleString);
2377 if (!phpStringDelimiter.empty()) {
2378 state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING);
2379 if (foldHeredoc) levelCurrent++;
2381 } else if (ch == '\'') {
2382 state = SCE_HPHP_SIMPLESTRING;
2383 phpStringDelimiter = "\'";
2384 } else if (ch == '$' && IsPhpWordStart(chNext)) {
2385 state = SCE_HPHP_VARIABLE;
2386 } else if (IsOperator(ch)) {
2387 state = SCE_HPHP_OPERATOR;
2388 } else if ((state == SCE_HPHP_OPERATOR) && (IsASpace(ch))) {
2389 state = SCE_HPHP_DEFAULT;
2391 break;
2392 ///////////// end - PHP state handling
2395 // Some of the above terminated their lexeme but since the same character starts
2396 // the same class again, only reenter if non empty segment.
2398 const bool nonEmptySegment = i >= static_cast<Sci_Position>(styler.GetStartSegment());
2399 if (state == SCE_HB_DEFAULT) { // One of the above succeeded
2400 if ((ch == '\"') && (nonEmptySegment)) {
2401 state = SCE_HB_STRING;
2402 } else if (ch == '\'') {
2403 state = SCE_HB_COMMENTLINE;
2404 } else if (IsAWordStart(ch)) {
2405 state = SCE_HB_WORD;
2406 } else if (IsOperator(ch)) {
2407 styler.ColourTo(i, SCE_HB_DEFAULT);
2409 } else if (state == SCE_HBA_DEFAULT) { // One of the above succeeded
2410 if ((ch == '\"') && (nonEmptySegment)) {
2411 state = SCE_HBA_STRING;
2412 } else if (ch == '\'') {
2413 state = SCE_HBA_COMMENTLINE;
2414 } else if (IsAWordStart(ch)) {
2415 state = SCE_HBA_WORD;
2416 } else if (IsOperator(ch)) {
2417 styler.ColourTo(i, SCE_HBA_DEFAULT);
2419 } else if (state == SCE_HJ_DEFAULT) { // One of the above succeeded
2420 if (ch == '/' && chNext == '*') {
2421 if (styler.SafeGetCharAt(i + 2) == '*')
2422 state = SCE_HJ_COMMENTDOC;
2423 else
2424 state = SCE_HJ_COMMENT;
2425 } else if (ch == '/' && chNext == '/') {
2426 state = SCE_HJ_COMMENTLINE;
2427 } else if ((ch == '\"') && (nonEmptySegment)) {
2428 state = SCE_HJ_DOUBLESTRING;
2429 } else if ((ch == '\'') && (nonEmptySegment)) {
2430 state = SCE_HJ_SINGLESTRING;
2431 } else if (IsAWordStart(ch)) {
2432 state = SCE_HJ_WORD;
2433 } else if (IsOperator(ch)) {
2434 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
2439 switch (state) {
2440 case SCE_HJ_WORD:
2441 classifyWordHTJS(styler.GetStartSegment(), lengthDoc - 1, keywords2, styler, inScriptType);
2442 break;
2443 case SCE_HB_WORD:
2444 classifyWordHTVB(styler.GetStartSegment(), lengthDoc - 1, keywords3, styler, inScriptType);
2445 break;
2446 case SCE_HP_WORD:
2447 classifyWordHTPy(styler.GetStartSegment(), lengthDoc - 1, keywords4, styler, prevWord, inScriptType, isMako);
2448 break;
2449 case SCE_HPHP_WORD:
2450 classifyWordHTPHP(styler.GetStartSegment(), lengthDoc - 1, keywords5, styler);
2451 break;
2452 default:
2453 StateToPrint = statePrintForState(state, inScriptType);
2454 if (static_cast<Sci_Position>(styler.GetStartSegment()) < lengthDoc)
2455 styler.ColourTo(lengthDoc - 1, StateToPrint);
2456 break;
2459 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
2460 if (fold) {
2461 const int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
2462 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
2464 styler.Flush();
2467 LexerModule lmHTML(SCLEX_HTML, LexerHTML::LexerFactoryHTML, "hypertext", htmlWordListDesc);
2468 LexerModule lmXML(SCLEX_XML, LexerHTML::LexerFactoryXML, "xml", htmlWordListDesc);
2469 LexerModule lmPHPSCRIPT(SCLEX_PHPSCRIPT, LexerHTML::LexerFactoryPHPScript, "phpscript", phpscriptWordListDesc);