1 // Scintilla source code edit control
3 ** Lexer for Assembler, just for the MASM syntax
4 ** Written by The Black Horus
5 ** Enhancements and NASM stuff by Kein-Hong Man, 2003-10
6 ** SCE_ASM_COMMENTBLOCK and SCE_ASM_CHARACTER are for future GNU as colouring
7 ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
9 // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
10 // The License.txt file describes the conditions under which this software may be distributed.
20 #pragma warning(disable: 4786)
28 #include "Scintilla.h"
32 #include "LexAccessor.h"
33 #include "StyleContext.h"
34 #include "CharacterSet.h"
35 #include "LexerModule.h"
36 #include "OptionSet.h"
39 using namespace Scintilla
;
42 static inline bool IsAWordChar(const int ch
) {
43 return (ch
< 0x80) && (isalnum(ch
) || ch
== '.' ||
44 ch
== '_' || ch
== '?');
47 static inline bool IsAWordStart(const int ch
) {
48 return (ch
< 0x80) && (isalnum(ch
) || ch
== '_' || ch
== '.' ||
49 ch
== '%' || ch
== '@' || ch
== '$' || ch
== '?');
52 static inline bool IsAsmOperator(const int ch
) {
53 if ((ch
< 0x80) && (isalnum(ch
)))
55 // '.' left out as it is used to make up numbers
56 if (ch
== '*' || ch
== '/' || ch
== '-' || ch
== '+' ||
57 ch
== '(' || ch
== ')' || ch
== '=' || ch
== '^' ||
58 ch
== '[' || ch
== ']' || ch
== '<' || ch
== '&' ||
59 ch
== '>' || ch
== ',' || ch
== '|' || ch
== '~' ||
60 ch
== '%' || ch
== ':')
65 static bool IsStreamCommentStyle(int style
) {
66 return style
== SCE_ASM_COMMENTDIRECTIVE
|| style
== SCE_ASM_COMMENTBLOCK
;
69 static inline int LowerCase(int c
) {
70 if (c
>= 'A' && c
<= 'Z')
75 // An individual named option for use in an OptionSet
77 // Options used for LexerAsm
79 std::string delimiter
;
82 bool foldCommentMultiline
;
83 bool foldCommentExplicit
;
84 std::string foldExplicitStart
;
85 std::string foldExplicitEnd
;
86 bool foldExplicitAnywhere
;
91 foldSyntaxBased
= true;
92 foldCommentMultiline
= false;
93 foldCommentExplicit
= false;
94 foldExplicitStart
= "";
96 foldExplicitAnywhere
= false;
101 static const char * const asmWordListDesc
[] = {
106 "Directive operands",
107 "Extended instructions",
108 "Directives4Foldstart",
109 "Directives4Foldend",
113 struct OptionSetAsm
: public OptionSet
<OptionsAsm
> {
115 DefineProperty("lexer.asm.comment.delimiter", &OptionsAsm::delimiter
,
116 "Character used for COMMENT directive's delimiter, replacing the standard \"~\".");
118 DefineProperty("fold", &OptionsAsm::fold
);
120 DefineProperty("fold.asm.syntax.based", &OptionsAsm::foldSyntaxBased
,
121 "Set this property to 0 to disable syntax based folding.");
123 DefineProperty("fold.asm.comment.multiline", &OptionsAsm::foldCommentMultiline
,
124 "Set this property to 1 to enable folding multi-line comments.");
126 DefineProperty("fold.asm.comment.explicit", &OptionsAsm::foldCommentExplicit
,
127 "This option enables folding explicit fold points when using the Asm lexer. "
128 "Explicit fold points allows adding extra folding by placing a ;{ comment at the start and a ;} "
129 "at the end of a section that should fold.");
131 DefineProperty("fold.asm.explicit.start", &OptionsAsm::foldExplicitStart
,
132 "The string to use for explicit fold start points, replacing the standard ;{.");
134 DefineProperty("fold.asm.explicit.end", &OptionsAsm::foldExplicitEnd
,
135 "The string to use for explicit fold end points, replacing the standard ;}.");
137 DefineProperty("fold.asm.explicit.anywhere", &OptionsAsm::foldExplicitAnywhere
,
138 "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
140 DefineProperty("fold.compact", &OptionsAsm::foldCompact
);
142 DefineWordListSets(asmWordListDesc
);
146 class LexerAsm
: public ILexer
{
147 WordList cpuInstruction
;
148 WordList mathInstruction
;
151 WordList directiveOperand
;
152 WordList extInstruction
;
153 WordList directives4foldstart
;
154 WordList directives4foldend
;
162 void SCI_METHOD
Release() {
165 int SCI_METHOD
Version() const {
168 const char * SCI_METHOD
PropertyNames() {
169 return osAsm
.PropertyNames();
171 int SCI_METHOD
PropertyType(const char *name
) {
172 return osAsm
.PropertyType(name
);
174 const char * SCI_METHOD
DescribeProperty(const char *name
) {
175 return osAsm
.DescribeProperty(name
);
177 int SCI_METHOD
PropertySet(const char *key
, const char *val
);
178 const char * SCI_METHOD
DescribeWordListSets() {
179 return osAsm
.DescribeWordListSets();
181 int SCI_METHOD
WordListSet(int n
, const char *wl
);
182 void SCI_METHOD
Lex(unsigned int startPos
, int length
, int initStyle
, IDocument
*pAccess
);
183 void SCI_METHOD
Fold(unsigned int startPos
, int length
, int initStyle
, IDocument
*pAccess
);
185 void * SCI_METHOD
PrivateCall(int, void *) {
189 static ILexer
*LexerFactoryAsm() {
190 return new LexerAsm();
194 int SCI_METHOD
LexerAsm::PropertySet(const char *key
, const char *val
) {
195 if (osAsm
.PropertySet(&options
, key
, val
)) {
201 int SCI_METHOD
LexerAsm::WordListSet(int n
, const char *wl
) {
202 WordList
*wordListN
= 0;
205 wordListN
= &cpuInstruction
;
208 wordListN
= &mathInstruction
;
211 wordListN
= ®isters
;
214 wordListN
= &directive
;
217 wordListN
= &directiveOperand
;
220 wordListN
= &extInstruction
;
223 wordListN
= &directives4foldstart
;
226 wordListN
= &directives4foldend
;
229 int firstModification
= -1;
233 if (*wordListN
!= wlNew
) {
235 firstModification
= 0;
238 return firstModification
;
241 void SCI_METHOD
LexerAsm::Lex(unsigned int startPos
, int length
, int initStyle
, IDocument
*pAccess
) {
242 LexAccessor
styler(pAccess
);
244 // Do not leak onto next line
245 if (initStyle
== SCE_ASM_STRINGEOL
)
246 initStyle
= SCE_ASM_DEFAULT
;
248 StyleContext
sc(startPos
, length
, initStyle
, styler
);
250 for (; sc
.More(); sc
.Forward())
253 // Prevent SCE_ASM_STRINGEOL from leaking back to previous line
254 if (sc
.atLineStart
&& (sc
.state
== SCE_ASM_STRING
)) {
255 sc
.SetState(SCE_ASM_STRING
);
256 } else if (sc
.atLineStart
&& (sc
.state
== SCE_ASM_CHARACTER
)) {
257 sc
.SetState(SCE_ASM_CHARACTER
);
260 // Handle line continuation generically.
262 if (sc
.chNext
== '\n' || sc
.chNext
== '\r') {
264 if (sc
.ch
== '\r' && sc
.chNext
== '\n') {
271 // Determine if the current state should terminate.
272 if (sc
.state
== SCE_ASM_OPERATOR
) {
273 if (!IsAsmOperator(sc
.ch
)) {
274 sc
.SetState(SCE_ASM_DEFAULT
);
276 } else if (sc
.state
== SCE_ASM_NUMBER
) {
277 if (!IsAWordChar(sc
.ch
)) {
278 sc
.SetState(SCE_ASM_DEFAULT
);
280 } else if (sc
.state
== SCE_ASM_IDENTIFIER
) {
281 if (!IsAWordChar(sc
.ch
) ) {
283 sc
.GetCurrentLowered(s
, sizeof(s
));
284 bool IsDirective
= false;
286 if (cpuInstruction
.InList(s
)) {
287 sc
.ChangeState(SCE_ASM_CPUINSTRUCTION
);
288 } else if (mathInstruction
.InList(s
)) {
289 sc
.ChangeState(SCE_ASM_MATHINSTRUCTION
);
290 } else if (registers
.InList(s
)) {
291 sc
.ChangeState(SCE_ASM_REGISTER
);
292 } else if (directive
.InList(s
)) {
293 sc
.ChangeState(SCE_ASM_DIRECTIVE
);
295 } else if (directiveOperand
.InList(s
)) {
296 sc
.ChangeState(SCE_ASM_DIRECTIVEOPERAND
);
297 } else if (extInstruction
.InList(s
)) {
298 sc
.ChangeState(SCE_ASM_EXTINSTRUCTION
);
300 sc
.SetState(SCE_ASM_DEFAULT
);
301 if (IsDirective
&& !strcmp(s
, "comment")) {
302 char delimiter
= options
.delimiter
.empty() ? '~' : options
.delimiter
.c_str()[0];
303 while (IsASpaceOrTab(sc
.ch
) && !sc
.atLineEnd
) {
304 sc
.ForwardSetState(SCE_ASM_DEFAULT
);
306 if (sc
.ch
== delimiter
) {
307 sc
.SetState(SCE_ASM_COMMENTDIRECTIVE
);
311 } else if (sc
.state
== SCE_ASM_COMMENTDIRECTIVE
) {
312 char delimiter
= options
.delimiter
.empty() ? '~' : options
.delimiter
.c_str()[0];
313 if (sc
.ch
== delimiter
) {
314 while (!sc
.atLineEnd
) {
317 sc
.SetState(SCE_ASM_DEFAULT
);
319 } else if (sc
.state
== SCE_ASM_COMMENT
) {
321 sc
.SetState(SCE_ASM_DEFAULT
);
323 } else if (sc
.state
== SCE_ASM_STRING
) {
325 if (sc
.chNext
== '\"' || sc
.chNext
== '\'' || sc
.chNext
== '\\') {
328 } else if (sc
.ch
== '\"') {
329 sc
.ForwardSetState(SCE_ASM_DEFAULT
);
330 } else if (sc
.atLineEnd
) {
331 sc
.ChangeState(SCE_ASM_STRINGEOL
);
332 sc
.ForwardSetState(SCE_ASM_DEFAULT
);
334 } else if (sc
.state
== SCE_ASM_CHARACTER
) {
336 if (sc
.chNext
== '\"' || sc
.chNext
== '\'' || sc
.chNext
== '\\') {
339 } else if (sc
.ch
== '\'') {
340 sc
.ForwardSetState(SCE_ASM_DEFAULT
);
341 } else if (sc
.atLineEnd
) {
342 sc
.ChangeState(SCE_ASM_STRINGEOL
);
343 sc
.ForwardSetState(SCE_ASM_DEFAULT
);
347 // Determine if a new state should be entered.
348 if (sc
.state
== SCE_ASM_DEFAULT
) {
350 sc
.SetState(SCE_ASM_COMMENT
);
351 } else if (isascii(sc
.ch
) && (isdigit(sc
.ch
) || (sc
.ch
== '.' && isascii(sc
.chNext
) && isdigit(sc
.chNext
)))) {
352 sc
.SetState(SCE_ASM_NUMBER
);
353 } else if (IsAWordStart(sc
.ch
)) {
354 sc
.SetState(SCE_ASM_IDENTIFIER
);
355 } else if (sc
.ch
== '\"') {
356 sc
.SetState(SCE_ASM_STRING
);
357 } else if (sc
.ch
== '\'') {
358 sc
.SetState(SCE_ASM_CHARACTER
);
359 } else if (IsAsmOperator(sc
.ch
)) {
360 sc
.SetState(SCE_ASM_OPERATOR
);
368 // Store both the current line's fold level and the next lines in the
369 // level store to make it easy to pick up with each increment
370 // and to make it possible to fiddle the current level for "else".
372 void SCI_METHOD
LexerAsm::Fold(unsigned int startPos
, int length
, int initStyle
, IDocument
*pAccess
) {
377 LexAccessor
styler(pAccess
);
379 unsigned int endPos
= startPos
+ length
;
380 int visibleChars
= 0;
381 int lineCurrent
= styler
.GetLine(startPos
);
382 int levelCurrent
= SC_FOLDLEVELBASE
;
384 levelCurrent
= styler
.LevelAt(lineCurrent
-1) >> 16;
385 int levelNext
= levelCurrent
;
386 char chNext
= styler
[startPos
];
387 int styleNext
= styler
.StyleAt(startPos
);
388 int style
= initStyle
;
391 const bool userDefinedFoldMarkers
= !options
.foldExplicitStart
.empty() && !options
.foldExplicitEnd
.empty();
392 for (unsigned int i
= startPos
; i
< endPos
; i
++) {
394 chNext
= styler
.SafeGetCharAt(i
+ 1);
395 int stylePrev
= style
;
397 styleNext
= styler
.StyleAt(i
+ 1);
398 bool atEOL
= (ch
== '\r' && chNext
!= '\n') || (ch
== '\n');
399 if (options
.foldCommentMultiline
&& IsStreamCommentStyle(style
)) {
400 if (!IsStreamCommentStyle(stylePrev
)) {
402 } else if (!IsStreamCommentStyle(styleNext
) && !atEOL
) {
403 // Comments don't end at end of line and the next character may be unstyled.
407 if (options
.foldCommentExplicit
&& ((style
== SCE_ASM_COMMENT
) || options
.foldExplicitAnywhere
)) {
408 if (userDefinedFoldMarkers
) {
409 if (styler
.Match(i
, options
.foldExplicitStart
.c_str())) {
411 } else if (styler
.Match(i
, options
.foldExplicitEnd
.c_str())) {
418 } else if (chNext
== '}') {
424 if (options
.foldSyntaxBased
&& (style
== SCE_ASM_DIRECTIVE
)) {
425 word
[wordlen
++] = static_cast<char>(LowerCase(ch
));
426 if (wordlen
== 100) { // prevent overflow
430 if (styleNext
!= SCE_ASM_DIRECTIVE
) { // reading directive ready
431 word
[wordlen
] = '\0';
433 if (directives4foldstart
.InList(word
)) {
435 } else if (directives4foldend
.InList(word
)){
442 if (atEOL
|| (i
== endPos
-1)) {
443 int levelUse
= levelCurrent
;
444 int lev
= levelUse
| levelNext
<< 16;
445 if (visibleChars
== 0 && options
.foldCompact
)
446 lev
|= SC_FOLDLEVELWHITEFLAG
;
447 if (levelUse
< levelNext
)
448 lev
|= SC_FOLDLEVELHEADERFLAG
;
449 if (lev
!= styler
.LevelAt(lineCurrent
)) {
450 styler
.SetLevel(lineCurrent
, lev
);
453 levelCurrent
= levelNext
;
454 if (atEOL
&& (i
== static_cast<unsigned int>(styler
.Length()-1))) {
455 // There is an empty line at end of file so give it same level and empty
456 styler
.SetLevel(lineCurrent
, (levelCurrent
| levelCurrent
<< 16) | SC_FOLDLEVELWHITEFLAG
);
463 LexerModule
lmAsm(SCLEX_ASM
, LexerAsm::LexerFactoryAsm
, "asm", asmWordListDesc
);