1 // Scintilla source code edit control
3 ** Lexer for Assembler, just for the MASM syntax
4 ** Written by The Black Horus
5 ** Enhancements and NASM stuff by Kein-Hong Man, 2003-10
6 ** SCE_ASM_COMMENTBLOCK and SCE_ASM_CHARACTER are for future GNU as colouring
7 ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
9 // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
10 // The License.txt file describes the conditions under which this software may be distributed.
24 #include "Scintilla.h"
28 #include "LexAccessor.h"
29 #include "StyleContext.h"
30 #include "CharacterSet.h"
31 #include "LexerModule.h"
32 #include "OptionSet.h"
35 using namespace Scintilla
;
38 static inline bool IsAWordChar(const int ch
) {
39 return (ch
< 0x80) && (isalnum(ch
) || ch
== '.' ||
40 ch
== '_' || ch
== '?');
43 static inline bool IsAWordStart(const int ch
) {
44 return (ch
< 0x80) && (isalnum(ch
) || ch
== '_' || ch
== '.' ||
45 ch
== '%' || ch
== '@' || ch
== '$' || ch
== '?');
48 static inline bool IsAsmOperator(const int ch
) {
49 if ((ch
< 0x80) && (isalnum(ch
)))
51 // '.' left out as it is used to make up numbers
52 if (ch
== '*' || ch
== '/' || ch
== '-' || ch
== '+' ||
53 ch
== '(' || ch
== ')' || ch
== '=' || ch
== '^' ||
54 ch
== '[' || ch
== ']' || ch
== '<' || ch
== '&' ||
55 ch
== '>' || ch
== ',' || ch
== '|' || ch
== '~' ||
56 ch
== '%' || ch
== ':')
61 static bool IsStreamCommentStyle(int style
) {
62 return style
== SCE_ASM_COMMENTDIRECTIVE
|| style
== SCE_ASM_COMMENTBLOCK
;
65 static inline int LowerCase(int c
) {
66 if (c
>= 'A' && c
<= 'Z')
71 // An individual named option for use in an OptionSet
73 // Options used for LexerAsm
75 std::string delimiter
;
78 bool foldCommentMultiline
;
79 bool foldCommentExplicit
;
80 std::string foldExplicitStart
;
81 std::string foldExplicitEnd
;
82 bool foldExplicitAnywhere
;
87 foldSyntaxBased
= true;
88 foldCommentMultiline
= false;
89 foldCommentExplicit
= false;
90 foldExplicitStart
= "";
92 foldExplicitAnywhere
= false;
97 static const char * const asmWordListDesc
[] = {
102 "Directive operands",
103 "Extended instructions",
104 "Directives4Foldstart",
105 "Directives4Foldend",
109 struct OptionSetAsm
: public OptionSet
<OptionsAsm
> {
111 DefineProperty("lexer.asm.comment.delimiter", &OptionsAsm::delimiter
,
112 "Character used for COMMENT directive's delimiter, replacing the standard \"~\".");
114 DefineProperty("fold", &OptionsAsm::fold
);
116 DefineProperty("fold.asm.syntax.based", &OptionsAsm::foldSyntaxBased
,
117 "Set this property to 0 to disable syntax based folding.");
119 DefineProperty("fold.asm.comment.multiline", &OptionsAsm::foldCommentMultiline
,
120 "Set this property to 1 to enable folding multi-line comments.");
122 DefineProperty("fold.asm.comment.explicit", &OptionsAsm::foldCommentExplicit
,
123 "This option enables folding explicit fold points when using the Asm lexer. "
124 "Explicit fold points allows adding extra folding by placing a ;{ comment at the start and a ;} "
125 "at the end of a section that should fold.");
127 DefineProperty("fold.asm.explicit.start", &OptionsAsm::foldExplicitStart
,
128 "The string to use for explicit fold start points, replacing the standard ;{.");
130 DefineProperty("fold.asm.explicit.end", &OptionsAsm::foldExplicitEnd
,
131 "The string to use for explicit fold end points, replacing the standard ;}.");
133 DefineProperty("fold.asm.explicit.anywhere", &OptionsAsm::foldExplicitAnywhere
,
134 "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
136 DefineProperty("fold.compact", &OptionsAsm::foldCompact
);
138 DefineWordListSets(asmWordListDesc
);
142 class LexerAsm
: public ILexer
{
143 WordList cpuInstruction
;
144 WordList mathInstruction
;
147 WordList directiveOperand
;
148 WordList extInstruction
;
149 WordList directives4foldstart
;
150 WordList directives4foldend
;
155 LexerAsm(int commentChar_
) {
156 commentChar
= commentChar_
;
158 virtual ~LexerAsm() {
160 void SCI_METHOD
Release() {
163 int SCI_METHOD
Version() const {
166 const char * SCI_METHOD
PropertyNames() {
167 return osAsm
.PropertyNames();
169 int SCI_METHOD
PropertyType(const char *name
) {
170 return osAsm
.PropertyType(name
);
172 const char * SCI_METHOD
DescribeProperty(const char *name
) {
173 return osAsm
.DescribeProperty(name
);
175 Sci_Position SCI_METHOD
PropertySet(const char *key
, const char *val
);
176 const char * SCI_METHOD
DescribeWordListSets() {
177 return osAsm
.DescribeWordListSets();
179 Sci_Position SCI_METHOD
WordListSet(int n
, const char *wl
);
180 void SCI_METHOD
Lex(Sci_PositionU startPos
, Sci_Position length
, int initStyle
, IDocument
*pAccess
);
181 void SCI_METHOD
Fold(Sci_PositionU startPos
, Sci_Position length
, int initStyle
, IDocument
*pAccess
);
183 void * SCI_METHOD
PrivateCall(int, void *) {
187 static ILexer
*LexerFactoryAsm() {
188 return new LexerAsm(';');
191 static ILexer
*LexerFactoryAs() {
192 return new LexerAsm('#');
196 Sci_Position SCI_METHOD
LexerAsm::PropertySet(const char *key
, const char *val
) {
197 if (osAsm
.PropertySet(&options
, key
, val
)) {
203 Sci_Position SCI_METHOD
LexerAsm::WordListSet(int n
, const char *wl
) {
204 WordList
*wordListN
= 0;
207 wordListN
= &cpuInstruction
;
210 wordListN
= &mathInstruction
;
213 wordListN
= ®isters
;
216 wordListN
= &directive
;
219 wordListN
= &directiveOperand
;
222 wordListN
= &extInstruction
;
225 wordListN
= &directives4foldstart
;
228 wordListN
= &directives4foldend
;
231 Sci_Position firstModification
= -1;
235 if (*wordListN
!= wlNew
) {
237 firstModification
= 0;
240 return firstModification
;
243 void SCI_METHOD
LexerAsm::Lex(Sci_PositionU startPos
, Sci_Position length
, int initStyle
, IDocument
*pAccess
) {
244 LexAccessor
styler(pAccess
);
246 // Do not leak onto next line
247 if (initStyle
== SCE_ASM_STRINGEOL
)
248 initStyle
= SCE_ASM_DEFAULT
;
250 StyleContext
sc(startPos
, length
, initStyle
, styler
);
252 for (; sc
.More(); sc
.Forward())
255 // Prevent SCE_ASM_STRINGEOL from leaking back to previous line
256 if (sc
.atLineStart
&& (sc
.state
== SCE_ASM_STRING
)) {
257 sc
.SetState(SCE_ASM_STRING
);
258 } else if (sc
.atLineStart
&& (sc
.state
== SCE_ASM_CHARACTER
)) {
259 sc
.SetState(SCE_ASM_CHARACTER
);
262 // Handle line continuation generically.
264 if (sc
.chNext
== '\n' || sc
.chNext
== '\r') {
266 if (sc
.ch
== '\r' && sc
.chNext
== '\n') {
273 // Determine if the current state should terminate.
274 if (sc
.state
== SCE_ASM_OPERATOR
) {
275 if (!IsAsmOperator(sc
.ch
)) {
276 sc
.SetState(SCE_ASM_DEFAULT
);
278 } else if (sc
.state
== SCE_ASM_NUMBER
) {
279 if (!IsAWordChar(sc
.ch
)) {
280 sc
.SetState(SCE_ASM_DEFAULT
);
282 } else if (sc
.state
== SCE_ASM_IDENTIFIER
) {
283 if (!IsAWordChar(sc
.ch
) ) {
285 sc
.GetCurrentLowered(s
, sizeof(s
));
286 bool IsDirective
= false;
288 if (cpuInstruction
.InList(s
)) {
289 sc
.ChangeState(SCE_ASM_CPUINSTRUCTION
);
290 } else if (mathInstruction
.InList(s
)) {
291 sc
.ChangeState(SCE_ASM_MATHINSTRUCTION
);
292 } else if (registers
.InList(s
)) {
293 sc
.ChangeState(SCE_ASM_REGISTER
);
294 } else if (directive
.InList(s
)) {
295 sc
.ChangeState(SCE_ASM_DIRECTIVE
);
297 } else if (directiveOperand
.InList(s
)) {
298 sc
.ChangeState(SCE_ASM_DIRECTIVEOPERAND
);
299 } else if (extInstruction
.InList(s
)) {
300 sc
.ChangeState(SCE_ASM_EXTINSTRUCTION
);
302 sc
.SetState(SCE_ASM_DEFAULT
);
303 if (IsDirective
&& !strcmp(s
, "comment")) {
304 char delimiter
= options
.delimiter
.empty() ? '~' : options
.delimiter
.c_str()[0];
305 while (IsASpaceOrTab(sc
.ch
) && !sc
.atLineEnd
) {
306 sc
.ForwardSetState(SCE_ASM_DEFAULT
);
308 if (sc
.ch
== delimiter
) {
309 sc
.SetState(SCE_ASM_COMMENTDIRECTIVE
);
313 } else if (sc
.state
== SCE_ASM_COMMENTDIRECTIVE
) {
314 char delimiter
= options
.delimiter
.empty() ? '~' : options
.delimiter
.c_str()[0];
315 if (sc
.ch
== delimiter
) {
316 while (!sc
.atLineEnd
) {
319 sc
.SetState(SCE_ASM_DEFAULT
);
321 } else if (sc
.state
== SCE_ASM_COMMENT
) {
323 sc
.SetState(SCE_ASM_DEFAULT
);
325 } else if (sc
.state
== SCE_ASM_STRING
) {
327 if (sc
.chNext
== '\"' || sc
.chNext
== '\'' || sc
.chNext
== '\\') {
330 } else if (sc
.ch
== '\"') {
331 sc
.ForwardSetState(SCE_ASM_DEFAULT
);
332 } else if (sc
.atLineEnd
) {
333 sc
.ChangeState(SCE_ASM_STRINGEOL
);
334 sc
.ForwardSetState(SCE_ASM_DEFAULT
);
336 } else if (sc
.state
== SCE_ASM_CHARACTER
) {
338 if (sc
.chNext
== '\"' || sc
.chNext
== '\'' || sc
.chNext
== '\\') {
341 } else if (sc
.ch
== '\'') {
342 sc
.ForwardSetState(SCE_ASM_DEFAULT
);
343 } else if (sc
.atLineEnd
) {
344 sc
.ChangeState(SCE_ASM_STRINGEOL
);
345 sc
.ForwardSetState(SCE_ASM_DEFAULT
);
349 // Determine if a new state should be entered.
350 if (sc
.state
== SCE_ASM_DEFAULT
) {
351 if (sc
.ch
== commentChar
){
352 sc
.SetState(SCE_ASM_COMMENT
);
353 } else if (IsASCII(sc
.ch
) && (isdigit(sc
.ch
) || (sc
.ch
== '.' && IsASCII(sc
.chNext
) && isdigit(sc
.chNext
)))) {
354 sc
.SetState(SCE_ASM_NUMBER
);
355 } else if (IsAWordStart(sc
.ch
)) {
356 sc
.SetState(SCE_ASM_IDENTIFIER
);
357 } else if (sc
.ch
== '\"') {
358 sc
.SetState(SCE_ASM_STRING
);
359 } else if (sc
.ch
== '\'') {
360 sc
.SetState(SCE_ASM_CHARACTER
);
361 } else if (IsAsmOperator(sc
.ch
)) {
362 sc
.SetState(SCE_ASM_OPERATOR
);
370 // Store both the current line's fold level and the next lines in the
371 // level store to make it easy to pick up with each increment
372 // and to make it possible to fiddle the current level for "else".
374 void SCI_METHOD
LexerAsm::Fold(Sci_PositionU startPos
, Sci_Position length
, int initStyle
, IDocument
*pAccess
) {
379 LexAccessor
styler(pAccess
);
381 Sci_PositionU endPos
= startPos
+ length
;
382 int visibleChars
= 0;
383 Sci_Position lineCurrent
= styler
.GetLine(startPos
);
384 int levelCurrent
= SC_FOLDLEVELBASE
;
386 levelCurrent
= styler
.LevelAt(lineCurrent
-1) >> 16;
387 int levelNext
= levelCurrent
;
388 char chNext
= styler
[startPos
];
389 int styleNext
= styler
.StyleAt(startPos
);
390 int style
= initStyle
;
393 const bool userDefinedFoldMarkers
= !options
.foldExplicitStart
.empty() && !options
.foldExplicitEnd
.empty();
394 for (Sci_PositionU i
= startPos
; i
< endPos
; i
++) {
396 chNext
= styler
.SafeGetCharAt(i
+ 1);
397 int stylePrev
= style
;
399 styleNext
= styler
.StyleAt(i
+ 1);
400 bool atEOL
= (ch
== '\r' && chNext
!= '\n') || (ch
== '\n');
401 if (options
.foldCommentMultiline
&& IsStreamCommentStyle(style
)) {
402 if (!IsStreamCommentStyle(stylePrev
)) {
404 } else if (!IsStreamCommentStyle(styleNext
) && !atEOL
) {
405 // Comments don't end at end of line and the next character may be unstyled.
409 if (options
.foldCommentExplicit
&& ((style
== SCE_ASM_COMMENT
) || options
.foldExplicitAnywhere
)) {
410 if (userDefinedFoldMarkers
) {
411 if (styler
.Match(i
, options
.foldExplicitStart
.c_str())) {
413 } else if (styler
.Match(i
, options
.foldExplicitEnd
.c_str())) {
420 } else if (chNext
== '}') {
426 if (options
.foldSyntaxBased
&& (style
== SCE_ASM_DIRECTIVE
)) {
427 word
[wordlen
++] = static_cast<char>(LowerCase(ch
));
428 if (wordlen
== 100) { // prevent overflow
432 if (styleNext
!= SCE_ASM_DIRECTIVE
) { // reading directive ready
433 word
[wordlen
] = '\0';
435 if (directives4foldstart
.InList(word
)) {
437 } else if (directives4foldend
.InList(word
)){
444 if (atEOL
|| (i
== endPos
-1)) {
445 int levelUse
= levelCurrent
;
446 int lev
= levelUse
| levelNext
<< 16;
447 if (visibleChars
== 0 && options
.foldCompact
)
448 lev
|= SC_FOLDLEVELWHITEFLAG
;
449 if (levelUse
< levelNext
)
450 lev
|= SC_FOLDLEVELHEADERFLAG
;
451 if (lev
!= styler
.LevelAt(lineCurrent
)) {
452 styler
.SetLevel(lineCurrent
, lev
);
455 levelCurrent
= levelNext
;
456 if (atEOL
&& (i
== static_cast<Sci_PositionU
>(styler
.Length() - 1))) {
457 // There is an empty line at end of file so give it same level and empty
458 styler
.SetLevel(lineCurrent
, (levelCurrent
| levelCurrent
<< 16) | SC_FOLDLEVELWHITEFLAG
);
465 LexerModule
lmAsm(SCLEX_ASM
, LexerAsm::LexerFactoryAsm
, "asm", asmWordListDesc
);
466 LexerModule
lmAs(SCLEX_AS
, LexerAsm::LexerFactoryAs
, "as", asmWordListDesc
);