scintilla: Update scintilla with changeset 3662:1d1c06df8a2f using gtk+3
[anjuta-extras.git] / plugins / scintilla / scintilla / LexA68k.cxx
blob970e429c64a806c9189689e6ad6f18f37d66e44e
1 // Scintilla source code edit control
2 /** @file LexA68k.cxx
3 ** Lexer for Assembler, just for the MASM syntax
4 ** Written by Martial Demolins AKA Folco
5 **/
6 // Copyright 2010 Martial Demolins <mdemolins(a)gmail.com>
7 // The License.txt file describes the conditions under which this software
8 // may be distributed.
11 #include <stdlib.h>
12 #include <string.h>
13 #include <stdio.h>
14 #include <stdarg.h>
15 #include <assert.h>
16 #include <ctype.h>
18 #include "ILexer.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
22 #include "WordList.h"
23 #include "LexAccessor.h"
24 #include "Accessor.h"
25 #include "StyleContext.h"
26 #include "CharacterSet.h"
27 #include "LexerModule.h"
29 #ifdef SCI_NAMESPACE
30 using namespace Scintilla;
31 #endif
34 // Return values for GetOperatorType
35 #define NO_OPERATOR 0
36 #define OPERATOR_1CHAR 1
37 #define OPERATOR_2CHAR 2
40 /**
41 * IsIdentifierStart
43 * Return true if the given char is a valid identifier first char
46 static inline bool IsIdentifierStart (const int ch)
48 return (isalpha(ch) || (ch == '_') || (ch == '\\'));
52 /**
53 * IsIdentifierChar
55 * Return true if the given char is a valid identifier char
58 static inline bool IsIdentifierChar (const int ch)
60 return (isalnum(ch) || (ch == '_') || (ch == '@') || (ch == ':') || (ch == '.'));
64 /**
65 * GetOperatorType
67 * Return:
68 * NO_OPERATOR if char is not an operator
69 * OPERATOR_1CHAR if the operator is one char long
70 * OPERATOR_2CHAR if the operator is two chars long
73 static inline int GetOperatorType (const int ch1, const int ch2)
75 int OpType = NO_OPERATOR;
77 if ((ch1 == '+') || (ch1 == '-') || (ch1 == '*') || (ch1 == '/') || (ch1 == '#') ||
78 (ch1 == '(') || (ch1 == ')') || (ch1 == '~') || (ch1 == '&') || (ch1 == '|') || (ch1 == ','))
79 OpType = OPERATOR_1CHAR;
81 else if ((ch1 == ch2) && (ch1 == '<' || ch1 == '>'))
82 OpType = OPERATOR_2CHAR;
84 return OpType;
88 /**
89 * IsBin
91 * Return true if the given char is 0 or 1
94 static inline bool IsBin (const int ch)
96 return (ch == '0') || (ch == '1');
101 * IsDoxygenChar
103 * Return true if the char may be part of a Doxygen keyword
106 static inline bool IsDoxygenChar (const int ch)
108 return isalpha(ch) || (ch == '$') || (ch == '[') || (ch == ']') || (ch == '{') || (ch == '}');
113 * ColouriseA68kDoc
115 * Main function, which colourises a 68k source
118 static void ColouriseA68kDoc (unsigned int startPos, int length, int initStyle, WordList *keywordlists[], Accessor &styler)
121 // Get references to keywords lists
122 WordList &cpuInstruction = *keywordlists[0];
123 WordList &registers = *keywordlists[1];
124 WordList &directive = *keywordlists[2];
125 WordList &extInstruction = *keywordlists[3];
126 WordList &commentSpecial = *keywordlists[4];
127 WordList &doxygenKeyword = *keywordlists[5];
130 // Instanciate a context for our source
131 StyleContext sc(startPos, length, initStyle, styler);
134 /************************************************************
136 * Parse the text
138 ************************************************************/
140 for ( ; sc.More(); sc.Forward())
142 char Buffer[100];
143 int OpType;
145 // Reset style at beginning of line
146 if (sc.atLineStart)
147 sc.SetState(SCE_A68K_DEFAULT);
150 /************************************************************
152 * Handle current state if we are not in the "default style"
154 ************************************************************/
156 if (sc.state != SCE_A68K_DEFAULT)
158 // Check if current style continue.
159 // If this case, we loop because there is nothing else to do
160 if (((sc.state == SCE_A68K_NUMBER_DEC) && isdigit(sc.ch)) // Decimal number
161 || ((sc.state == SCE_A68K_NUMBER_BIN) && IsBin(sc.ch)) // Binary number
162 || ((sc.state == SCE_A68K_NUMBER_HEX) && isxdigit(sc.ch)) // Hexa number
163 || ((sc.state == SCE_A68K_MACRO_ARG) && isdigit(sc.ch)) // Arg of macro
164 || ((sc.state == SCE_A68K_STRING1) && (sc.ch != '\'')) // String single-quoted
165 || ((sc.state == SCE_A68K_STRING2) && (sc.ch != '\"')) // String double-quoted
166 || ((sc.state == SCE_A68K_MACRO_ARG) && isdigit(sc.ch)) // Macro argument
167 // Label. ' ' and '\t' are needed to handle macro declarations
168 || ((sc.state == SCE_A68K_LABEL) && (sc.ch != ':') && (sc.ch != ' ') && (sc.ch != '\t'))
169 || ((sc.state == SCE_A68K_IDENTIFIER) && (sc.ch < 0x80) && IsIdentifierChar(sc.ch)) // Identifier
170 || ((sc.state == SCE_A68K_COMMENT_DOXYGEN) && (sc.ch < 0x80) && IsDoxygenChar(sc.ch)) // Doxygen keyword
171 || ((sc.state == SCE_A68K_COMMENT_WORD) && (sc.ch < 0x80) && isalpha(sc.ch))) // Comment current word
173 continue;
176 // Check if some states terminate at the current char:
177 // we must include this char in the current style context
178 else if (((sc.state == SCE_A68K_STRING1) && (sc.ch < 0x80) && (sc.ch == '\'')) // String single-quoted
179 || ((sc.state == SCE_A68K_STRING2) && (sc.ch < 0x80) && (sc.ch == '\"')) // String double-quoted
180 || ((sc.state == SCE_A68K_LABEL) && (sc.ch < 0x80) && (sc.ch == ':'))) // Label
182 sc.ForwardSetState(SCE_A68K_DEFAULT);
185 // Check for special words or Doxygen keywords in comments
186 else if (sc.state == SCE_A68K_COMMENT)
188 if (sc.ch == '\\') {
189 sc.SetState(SCE_A68K_COMMENT_DOXYGEN);
191 else if ((sc.ch < 0x80) && isalpha(sc.ch)) {
192 sc.SetState(SCE_A68K_COMMENT_WORD);
194 continue;
197 // Check for special words in comment
198 else if ((sc.state == SCE_A68K_COMMENT_WORD) && (sc.ch < 0x80) && !isalpha(sc.ch))
200 sc.GetCurrent(Buffer, sizeof(Buffer));
201 if (commentSpecial.InList(Buffer)) {
202 sc.ChangeState(SCE_A68K_COMMENT_SPECIAL);
204 else {
205 sc.ChangeState(SCE_A68K_COMMENT);
207 sc.SetState(SCE_A68K_COMMENT);
208 continue;
211 // Check for Doxygen keywords
212 else if ((sc.state == SCE_A68K_COMMENT_DOXYGEN) && (sc.ch < 0x80) && !IsDoxygenChar(sc.ch))
214 sc.GetCurrentLowered(Buffer, sizeof(Buffer)); // Buffer the string of the current context
215 if (!doxygenKeyword.InList(Buffer)) {
216 sc.ChangeState(SCE_A68K_COMMENT);
218 sc.SetState(SCE_A68K_COMMENT);
219 continue;
222 // Check if we are in the case of a label which terminates without ':'
223 // It should be a macro declaration, not a label
224 else if ((sc.state == SCE_A68K_LABEL) && (sc.ch < 0x80) && ((sc.ch == ' ') || (sc.ch == '\t')))
226 sc.ChangeState(SCE_A68K_MACRO_DECLARATION);
229 // Check if we are at the end of an identifier
230 // In this case, colourise it if was a keyword.
231 else if ((sc.state == SCE_A68K_IDENTIFIER) && !IsIdentifierChar(sc.ch))
233 sc.GetCurrentLowered(Buffer, sizeof(Buffer)); // Buffer the string of the current context
234 if (cpuInstruction.InList(Buffer)) { // And check if it belongs to a keyword list
235 sc.ChangeState(SCE_A68K_CPUINSTRUCTION);
237 else if (extInstruction.InList(Buffer)) {
238 sc.ChangeState(SCE_A68K_EXTINSTRUCTION);
240 else if (registers.InList(Buffer)) {
241 sc.ChangeState(SCE_A68K_REGISTER);
243 else if (directive.InList(Buffer)) {
244 sc.ChangeState(SCE_A68K_DIRECTIVE);
248 // All special contexts are now handled.Come back to default style
249 sc.SetState(SCE_A68K_DEFAULT);
253 /************************************************************
255 * Check if we must enter a new state
257 ************************************************************/
259 // Label and macro identifiers start at the beginning of a line
260 // We set both as a label, but if it wasn't one (no ':' at the end),
261 // it will be changed as a macro identifier.
262 if (sc.atLineStart && (sc.ch < 0x80) && IsIdentifierStart(sc.ch)) {
263 sc.SetState(SCE_A68K_LABEL);
265 else if ((sc.ch < 0x80) && (sc.ch == ';')) { // Comment
266 sc.SetState(SCE_A68K_COMMENT);
268 else if ((sc.ch < 0x80) && isdigit(sc.ch)) { // Decimal numbers haven't prefix
269 sc.SetState(SCE_A68K_NUMBER_DEC);
271 else if ((sc.ch < 0x80) && (sc.ch == '%')) { // Binary numbers are prefixed with '%'
272 sc.SetState(SCE_A68K_NUMBER_BIN);
274 else if ((sc.ch < 0x80) && (sc.ch == '$')) { // Hexadecimal numbers are prefixed with '$'
275 sc.SetState(SCE_A68K_NUMBER_HEX);
277 else if ((sc.ch < 0x80) && (sc.ch == '\'')) { // String (single-quoted)
278 sc.SetState(SCE_A68K_STRING1);
280 else if ((sc.ch < 0x80) && (sc.ch == '\"')) { // String (double-quoted)
281 sc.SetState(SCE_A68K_STRING2);
283 else if ((sc.ch < 0x80) && (sc.ch == '\\') && (isdigit(sc.chNext))) { // Replacement symbols in macro
284 sc.SetState(SCE_A68K_MACRO_ARG);
286 else if ((sc.ch < 0x80) && IsIdentifierStart(sc.ch)) { // An identifier: constant, label, etc...
287 sc.SetState(SCE_A68K_IDENTIFIER);
289 else {
290 if (sc.ch < 0x80) {
291 OpType = GetOperatorType(sc.ch, sc.chNext); // Check if current char is an operator
292 if (OpType != NO_OPERATOR) {
293 sc.SetState(SCE_A68K_OPERATOR);
294 if (OpType == OPERATOR_2CHAR) { // Check if the operator is 2 bytes long
295 sc.ForwardSetState(SCE_A68K_OPERATOR); // (>> or <<)
300 } // End of for()
301 sc.Complete();
305 // Names of the keyword lists
307 static const char * const a68kWordListDesc[] =
309 "CPU instructions",
310 "Registers",
311 "Directives",
312 "Extended instructions",
313 "Comment special words",
314 "Doxygen keywords",
318 LexerModule lmA68k(SCLEX_A68K, ColouriseA68kDoc, "a68k", 0, a68kWordListDesc);