1 // Scintilla source code edit control
3 ** Lexer for Assembler, just for the MASM syntax
4 ** Written by Martial Demolins AKA Folco
6 // Copyright 2010 Martial Demolins <mdemolins(a)gmail.com>
7 // The License.txt file describes the conditions under which this software
19 #include "Scintilla.h"
23 #include "LexAccessor.h"
25 #include "StyleContext.h"
26 #include "CharacterSet.h"
27 #include "LexerModule.h"
29 using namespace Scintilla
;
32 // Return values for GetOperatorType
34 #define OPERATOR_1CHAR 1
35 #define OPERATOR_2CHAR 2
41 * Return true if the given char is a valid identifier first char
44 static inline bool IsIdentifierStart (const int ch
)
46 return (isalpha(ch
) || (ch
== '_') || (ch
== '\\'));
53 * Return true if the given char is a valid identifier char
56 static inline bool IsIdentifierChar (const int ch
)
58 return (isalnum(ch
) || (ch
== '_') || (ch
== '@') || (ch
== ':') || (ch
== '.'));
66 * NO_OPERATOR if char is not an operator
67 * OPERATOR_1CHAR if the operator is one char long
68 * OPERATOR_2CHAR if the operator is two chars long
71 static inline int GetOperatorType (const int ch1
, const int ch2
)
73 int OpType
= NO_OPERATOR
;
75 if ((ch1
== '+') || (ch1
== '-') || (ch1
== '*') || (ch1
== '/') || (ch1
== '#') ||
76 (ch1
== '(') || (ch1
== ')') || (ch1
== '~') || (ch1
== '&') || (ch1
== '|') || (ch1
== ','))
77 OpType
= OPERATOR_1CHAR
;
79 else if ((ch1
== ch2
) && (ch1
== '<' || ch1
== '>'))
80 OpType
= OPERATOR_2CHAR
;
89 * Return true if the given char is 0 or 1
92 static inline bool IsBin (const int ch
)
94 return (ch
== '0') || (ch
== '1');
101 * Return true if the char may be part of a Doxygen keyword
104 static inline bool IsDoxygenChar (const int ch
)
106 return isalpha(ch
) || (ch
== '$') || (ch
== '[') || (ch
== ']') || (ch
== '{') || (ch
== '}');
113 * Main function, which colourises a 68k source
116 static void ColouriseA68kDoc (Sci_PositionU startPos
, Sci_Position length
, int initStyle
, WordList
*keywordlists
[], Accessor
&styler
)
118 // Used to buffer a string, to be able to compare it using built-in functions
122 // Used to know the length of an operator
126 // Get references to keywords lists
127 WordList
&cpuInstruction
= *keywordlists
[0];
128 WordList
®isters
= *keywordlists
[1];
129 WordList
&directive
= *keywordlists
[2];
130 WordList
&extInstruction
= *keywordlists
[3];
131 WordList
&alert
= *keywordlists
[4];
132 WordList
&doxygenKeyword
= *keywordlists
[5];
135 // Instanciate a context for our source
136 StyleContext
sc(startPos
, length
, initStyle
, styler
);
139 /************************************************************
143 ************************************************************/
145 for ( ; sc
.More(); sc
.Forward())
147 /************************************************************
149 * A style always terminates at the end of a line, even for
150 * comments (no multi-lines comments)
152 ************************************************************/
153 if (sc
.atLineStart
) {
154 sc
.SetState(SCE_A68K_DEFAULT
);
158 /************************************************************
160 * If we are not in "default style", check if the style continues
161 * In this case, we just have to loop
163 ************************************************************/
165 if (sc
.state
!= SCE_A68K_DEFAULT
)
167 if ( ((sc
.state
== SCE_A68K_NUMBER_DEC
) && isdigit(sc
.ch
)) // Decimal number
168 || ((sc
.state
== SCE_A68K_NUMBER_BIN
) && IsBin(sc
.ch
)) // Binary number
169 || ((sc
.state
== SCE_A68K_NUMBER_HEX
) && isxdigit(sc
.ch
)) // Hexa number
170 || ((sc
.state
== SCE_A68K_MACRO_ARG
) && isdigit(sc
.ch
)) // Macro argument
171 || ((sc
.state
== SCE_A68K_STRING1
) && (sc
.ch
!= '\'')) // String single-quoted
172 || ((sc
.state
== SCE_A68K_STRING2
) && (sc
.ch
!= '\"')) // String double-quoted
173 || ((sc
.state
== SCE_A68K_MACRO_DECLARATION
) && IsIdentifierChar(sc
.ch
)) // Macro declaration (or global label, we don't know at this point)
174 || ((sc
.state
== SCE_A68K_IDENTIFIER
) && IsIdentifierChar(sc
.ch
)) // Identifier
175 || ((sc
.state
== SCE_A68K_LABEL
) && IsIdentifierChar(sc
.ch
)) // Label (local)
176 || ((sc
.state
== SCE_A68K_COMMENT_DOXYGEN
) && IsDoxygenChar(sc
.ch
)) // Doxygen keyword
177 || ((sc
.state
== SCE_A68K_COMMENT_SPECIAL
) && isalpha(sc
.ch
)) // Alert
178 || ((sc
.state
== SCE_A68K_COMMENT
) && !isalpha(sc
.ch
) && (sc
.ch
!= '\\'))) // Normal comment
183 /************************************************************
185 * Check if current state terminates
187 ************************************************************/
189 // Strings: include terminal ' or " in the current string by skipping it
190 if ((sc
.state
== SCE_A68K_STRING1
) || (sc
.state
== SCE_A68K_STRING2
)) {
195 // If a macro declaration was terminated with ':', it was a label
196 else if ((sc
.state
== SCE_A68K_MACRO_DECLARATION
) && (sc
.chPrev
== ':')) {
197 sc
.ChangeState(SCE_A68K_LABEL
);
201 // If it wasn't a Doxygen keyword, change it to normal comment
202 else if (sc
.state
== SCE_A68K_COMMENT_DOXYGEN
) {
203 sc
.GetCurrent(Buffer
, sizeof(Buffer
));
204 if (!doxygenKeyword
.InList(Buffer
)) {
205 sc
.ChangeState(SCE_A68K_COMMENT
);
207 sc
.SetState(SCE_A68K_COMMENT
);
212 // If it wasn't an Alert, change it to normal comment
213 else if (sc
.state
== SCE_A68K_COMMENT_SPECIAL
) {
214 sc
.GetCurrent(Buffer
, sizeof(Buffer
));
215 if (!alert
.InList(Buffer
)) {
216 sc
.ChangeState(SCE_A68K_COMMENT
);
218 // Reset style to normal comment, or to Doxygen keyword if it begins with '\'
220 sc
.SetState(SCE_A68K_COMMENT_DOXYGEN
);
223 sc
.SetState(SCE_A68K_COMMENT
);
229 // If we are in a comment, it's a Doxygen keyword or an Alert
230 else if (sc
.state
== SCE_A68K_COMMENT
) {
232 sc
.SetState(SCE_A68K_COMMENT_DOXYGEN
);
235 sc
.SetState(SCE_A68K_COMMENT_SPECIAL
);
241 // Check if we are at the end of an identifier
242 // In this case, colourise it if was a keyword.
243 else if ((sc
.state
== SCE_A68K_IDENTIFIER
) && !IsIdentifierChar(sc
.ch
)) {
244 sc
.GetCurrentLowered(Buffer
, sizeof(Buffer
)); // Buffer the string of the current context
245 if (cpuInstruction
.InList(Buffer
)) { // And check if it belongs to a keyword list
246 sc
.ChangeState(SCE_A68K_CPUINSTRUCTION
);
248 else if (extInstruction
.InList(Buffer
)) {
249 sc
.ChangeState(SCE_A68K_EXTINSTRUCTION
);
251 else if (registers
.InList(Buffer
)) {
252 sc
.ChangeState(SCE_A68K_REGISTER
);
254 else if (directive
.InList(Buffer
)) {
255 sc
.ChangeState(SCE_A68K_DIRECTIVE
);
259 // All special contexts are now handled.Come back to default style
260 sc
.SetState(SCE_A68K_DEFAULT
);
264 /************************************************************
266 * Check if we must enter a new state
268 ************************************************************/
270 // Something which begins at the beginning of a line, and with
271 // - '\' + an identifier start char, or
272 // - '\\@' + an identifier start char
273 // is a local label (second case is used for macro local labels). We set it already as a label, it can't be a macro/equ declaration
274 if (sc
.atLineStart
&& (sc
.ch
< 0x80) && IsIdentifierStart(sc
.chNext
) && (sc
.ch
== '\\')) {
275 sc
.SetState(SCE_A68K_LABEL
);
278 if (sc
.atLineStart
&& (sc
.ch
< 0x80) && (sc
.ch
== '\\') && (sc
.chNext
== '\\')) {
280 if ((sc
.ch
== '@') && IsIdentifierStart(sc
.chNext
)) {
281 sc
.ChangeState(SCE_A68K_LABEL
);
282 sc
.SetState(SCE_A68K_LABEL
);
286 // Label and macro identifiers start at the beginning of a line
287 // We set both as a macro id, but if it wasn't one (':' at the end),
288 // it will be changed as a label.
289 if (sc
.atLineStart
&& (sc
.ch
< 0x80) && IsIdentifierStart(sc
.ch
)) {
290 sc
.SetState(SCE_A68K_MACRO_DECLARATION
);
292 else if ((sc
.ch
< 0x80) && (sc
.ch
== ';')) { // Default: alert in a comment. If it doesn't match
293 sc
.SetState(SCE_A68K_COMMENT
); // with an alert, it will be toggle to a normal comment
295 else if ((sc
.ch
< 0x80) && isdigit(sc
.ch
)) { // Decimal numbers haven't prefix
296 sc
.SetState(SCE_A68K_NUMBER_DEC
);
298 else if ((sc
.ch
< 0x80) && (sc
.ch
== '%')) { // Binary numbers are prefixed with '%'
299 sc
.SetState(SCE_A68K_NUMBER_BIN
);
301 else if ((sc
.ch
< 0x80) && (sc
.ch
== '$')) { // Hexadecimal numbers are prefixed with '$'
302 sc
.SetState(SCE_A68K_NUMBER_HEX
);
304 else if ((sc
.ch
< 0x80) && (sc
.ch
== '\'')) { // String (single-quoted)
305 sc
.SetState(SCE_A68K_STRING1
);
307 else if ((sc
.ch
< 0x80) && (sc
.ch
== '\"')) { // String (double-quoted)
308 sc
.SetState(SCE_A68K_STRING2
);
310 else if ((sc
.ch
< 0x80) && (sc
.ch
== '\\') && (isdigit(sc
.chNext
))) { // Replacement symbols in macro are prefixed with '\'
311 sc
.SetState(SCE_A68K_MACRO_ARG
);
313 else if ((sc
.ch
< 0x80) && IsIdentifierStart(sc
.ch
)) { // An identifier: constant, label, etc...
314 sc
.SetState(SCE_A68K_IDENTIFIER
);
318 OpType
= GetOperatorType(sc
.ch
, sc
.chNext
); // Check if current char is an operator
319 if (OpType
!= NO_OPERATOR
) {
320 sc
.SetState(SCE_A68K_OPERATOR
);
321 if (OpType
== OPERATOR_2CHAR
) { // Check if the operator is 2 bytes long
322 sc
.ForwardSetState(SCE_A68K_OPERATOR
); // (>> or <<)
332 // Names of the keyword lists
334 static const char * const a68kWordListDesc
[] =
339 "Extended instructions",
340 "Comment special words",
345 LexerModule
lmA68k(SCLEX_A68K
, ColouriseA68kDoc
, "a68k", 0, a68kWordListDesc
);