travis: Add a Travis CI settings file
[geany-mirror.git] / scintilla / lexers / LexAsm.cxx
blob37a4efe3578df23f1f5cf67ce6f1ed575cfda66e
1 // Scintilla source code edit control
2 /** @file LexAsm.cxx
3 ** Lexer for Assembler, just for the MASM syntax
4 ** Written by The Black Horus
5 ** Enhancements and NASM stuff by Kein-Hong Man, 2003-10
6 ** SCE_ASM_COMMENTBLOCK and SCE_ASM_CHARACTER are for future GNU as colouring
7 ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
8 **/
9 // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
10 // The License.txt file describes the conditions under which this software may be distributed.
12 #include <stdlib.h>
13 #include <string.h>
14 #include <stdio.h>
15 #include <stdarg.h>
16 #include <assert.h>
17 #include <ctype.h>
19 #include <string>
20 #include <map>
21 #include <set>
23 #include "ILexer.h"
24 #include "Scintilla.h"
25 #include "SciLexer.h"
27 #include "WordList.h"
28 #include "LexAccessor.h"
29 #include "StyleContext.h"
30 #include "CharacterSet.h"
31 #include "LexerModule.h"
32 #include "OptionSet.h"
34 #ifdef SCI_NAMESPACE
35 using namespace Scintilla;
36 #endif
38 static inline bool IsAWordChar(const int ch) {
39 return (ch < 0x80) && (isalnum(ch) || ch == '.' ||
40 ch == '_' || ch == '?');
43 static inline bool IsAWordStart(const int ch) {
44 return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.' ||
45 ch == '%' || ch == '@' || ch == '$' || ch == '?');
48 static inline bool IsAsmOperator(const int ch) {
49 if ((ch < 0x80) && (isalnum(ch)))
50 return false;
51 // '.' left out as it is used to make up numbers
52 if (ch == '*' || ch == '/' || ch == '-' || ch == '+' ||
53 ch == '(' || ch == ')' || ch == '=' || ch == '^' ||
54 ch == '[' || ch == ']' || ch == '<' || ch == '&' ||
55 ch == '>' || ch == ',' || ch == '|' || ch == '~' ||
56 ch == '%' || ch == ':')
57 return true;
58 return false;
61 static bool IsStreamCommentStyle(int style) {
62 return style == SCE_ASM_COMMENTDIRECTIVE || style == SCE_ASM_COMMENTBLOCK;
65 static inline int LowerCase(int c) {
66 if (c >= 'A' && c <= 'Z')
67 return 'a' + c - 'A';
68 return c;
71 // An individual named option for use in an OptionSet
73 // Options used for LexerAsm
74 struct OptionsAsm {
75 std::string delimiter;
76 bool fold;
77 bool foldSyntaxBased;
78 bool foldCommentMultiline;
79 bool foldCommentExplicit;
80 std::string foldExplicitStart;
81 std::string foldExplicitEnd;
82 bool foldExplicitAnywhere;
83 bool foldCompact;
84 OptionsAsm() {
85 delimiter = "";
86 fold = false;
87 foldSyntaxBased = true;
88 foldCommentMultiline = false;
89 foldCommentExplicit = false;
90 foldExplicitStart = "";
91 foldExplicitEnd = "";
92 foldExplicitAnywhere = false;
93 foldCompact = true;
97 static const char * const asmWordListDesc[] = {
98 "CPU instructions",
99 "FPU instructions",
100 "Registers",
101 "Directives",
102 "Directive operands",
103 "Extended instructions",
104 "Directives4Foldstart",
105 "Directives4Foldend",
109 struct OptionSetAsm : public OptionSet<OptionsAsm> {
110 OptionSetAsm() {
111 DefineProperty("lexer.asm.comment.delimiter", &OptionsAsm::delimiter,
112 "Character used for COMMENT directive's delimiter, replacing the standard \"~\".");
114 DefineProperty("fold", &OptionsAsm::fold);
116 DefineProperty("fold.asm.syntax.based", &OptionsAsm::foldSyntaxBased,
117 "Set this property to 0 to disable syntax based folding.");
119 DefineProperty("fold.asm.comment.multiline", &OptionsAsm::foldCommentMultiline,
120 "Set this property to 1 to enable folding multi-line comments.");
122 DefineProperty("fold.asm.comment.explicit", &OptionsAsm::foldCommentExplicit,
123 "This option enables folding explicit fold points when using the Asm lexer. "
124 "Explicit fold points allows adding extra folding by placing a ;{ comment at the start and a ;} "
125 "at the end of a section that should fold.");
127 DefineProperty("fold.asm.explicit.start", &OptionsAsm::foldExplicitStart,
128 "The string to use for explicit fold start points, replacing the standard ;{.");
130 DefineProperty("fold.asm.explicit.end", &OptionsAsm::foldExplicitEnd,
131 "The string to use for explicit fold end points, replacing the standard ;}.");
133 DefineProperty("fold.asm.explicit.anywhere", &OptionsAsm::foldExplicitAnywhere,
134 "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
136 DefineProperty("fold.compact", &OptionsAsm::foldCompact);
138 DefineWordListSets(asmWordListDesc);
142 class LexerAsm : public ILexer {
143 WordList cpuInstruction;
144 WordList mathInstruction;
145 WordList registers;
146 WordList directive;
147 WordList directiveOperand;
148 WordList extInstruction;
149 WordList directives4foldstart;
150 WordList directives4foldend;
151 OptionsAsm options;
152 OptionSetAsm osAsm;
153 int commentChar;
154 public:
155 LexerAsm(int commentChar_) {
156 commentChar = commentChar_;
158 virtual ~LexerAsm() {
160 void SCI_METHOD Release() {
161 delete this;
163 int SCI_METHOD Version() const {
164 return lvOriginal;
166 const char * SCI_METHOD PropertyNames() {
167 return osAsm.PropertyNames();
169 int SCI_METHOD PropertyType(const char *name) {
170 return osAsm.PropertyType(name);
172 const char * SCI_METHOD DescribeProperty(const char *name) {
173 return osAsm.DescribeProperty(name);
175 int SCI_METHOD PropertySet(const char *key, const char *val);
176 const char * SCI_METHOD DescribeWordListSets() {
177 return osAsm.DescribeWordListSets();
179 int SCI_METHOD WordListSet(int n, const char *wl);
180 void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
181 void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
183 void * SCI_METHOD PrivateCall(int, void *) {
184 return 0;
187 static ILexer *LexerFactoryAsm() {
188 return new LexerAsm(';');
191 static ILexer *LexerFactoryAs() {
192 return new LexerAsm('#');
196 int SCI_METHOD LexerAsm::PropertySet(const char *key, const char *val) {
197 if (osAsm.PropertySet(&options, key, val)) {
198 return 0;
200 return -1;
203 int SCI_METHOD LexerAsm::WordListSet(int n, const char *wl) {
204 WordList *wordListN = 0;
205 switch (n) {
206 case 0:
207 wordListN = &cpuInstruction;
208 break;
209 case 1:
210 wordListN = &mathInstruction;
211 break;
212 case 2:
213 wordListN = &registers;
214 break;
215 case 3:
216 wordListN = &directive;
217 break;
218 case 4:
219 wordListN = &directiveOperand;
220 break;
221 case 5:
222 wordListN = &extInstruction;
223 break;
224 case 6:
225 wordListN = &directives4foldstart;
226 break;
227 case 7:
228 wordListN = &directives4foldend;
229 break;
231 int firstModification = -1;
232 if (wordListN) {
233 WordList wlNew;
234 wlNew.Set(wl);
235 if (*wordListN != wlNew) {
236 wordListN->Set(wl);
237 firstModification = 0;
240 return firstModification;
243 void SCI_METHOD LexerAsm::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
244 LexAccessor styler(pAccess);
246 // Do not leak onto next line
247 if (initStyle == SCE_ASM_STRINGEOL)
248 initStyle = SCE_ASM_DEFAULT;
250 StyleContext sc(startPos, length, initStyle, styler);
252 for (; sc.More(); sc.Forward())
255 // Prevent SCE_ASM_STRINGEOL from leaking back to previous line
256 if (sc.atLineStart && (sc.state == SCE_ASM_STRING)) {
257 sc.SetState(SCE_ASM_STRING);
258 } else if (sc.atLineStart && (sc.state == SCE_ASM_CHARACTER)) {
259 sc.SetState(SCE_ASM_CHARACTER);
262 // Handle line continuation generically.
263 if (sc.ch == '\\') {
264 if (sc.chNext == '\n' || sc.chNext == '\r') {
265 sc.Forward();
266 if (sc.ch == '\r' && sc.chNext == '\n') {
267 sc.Forward();
269 continue;
273 // Determine if the current state should terminate.
274 if (sc.state == SCE_ASM_OPERATOR) {
275 if (!IsAsmOperator(sc.ch)) {
276 sc.SetState(SCE_ASM_DEFAULT);
278 } else if (sc.state == SCE_ASM_NUMBER) {
279 if (!IsAWordChar(sc.ch)) {
280 sc.SetState(SCE_ASM_DEFAULT);
282 } else if (sc.state == SCE_ASM_IDENTIFIER) {
283 if (!IsAWordChar(sc.ch) ) {
284 char s[100];
285 sc.GetCurrentLowered(s, sizeof(s));
286 bool IsDirective = false;
288 if (cpuInstruction.InList(s)) {
289 sc.ChangeState(SCE_ASM_CPUINSTRUCTION);
290 } else if (mathInstruction.InList(s)) {
291 sc.ChangeState(SCE_ASM_MATHINSTRUCTION);
292 } else if (registers.InList(s)) {
293 sc.ChangeState(SCE_ASM_REGISTER);
294 } else if (directive.InList(s)) {
295 sc.ChangeState(SCE_ASM_DIRECTIVE);
296 IsDirective = true;
297 } else if (directiveOperand.InList(s)) {
298 sc.ChangeState(SCE_ASM_DIRECTIVEOPERAND);
299 } else if (extInstruction.InList(s)) {
300 sc.ChangeState(SCE_ASM_EXTINSTRUCTION);
302 sc.SetState(SCE_ASM_DEFAULT);
303 if (IsDirective && !strcmp(s, "comment")) {
304 char delimiter = options.delimiter.empty() ? '~' : options.delimiter.c_str()[0];
305 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) {
306 sc.ForwardSetState(SCE_ASM_DEFAULT);
308 if (sc.ch == delimiter) {
309 sc.SetState(SCE_ASM_COMMENTDIRECTIVE);
313 } else if (sc.state == SCE_ASM_COMMENTDIRECTIVE) {
314 char delimiter = options.delimiter.empty() ? '~' : options.delimiter.c_str()[0];
315 if (sc.ch == delimiter) {
316 while (!sc.atLineEnd) {
317 sc.Forward();
319 sc.SetState(SCE_ASM_DEFAULT);
321 } else if (sc.state == SCE_ASM_COMMENT ) {
322 if (sc.atLineEnd) {
323 sc.SetState(SCE_ASM_DEFAULT);
325 } else if (sc.state == SCE_ASM_STRING) {
326 if (sc.ch == '\\') {
327 if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
328 sc.Forward();
330 } else if (sc.ch == '\"') {
331 sc.ForwardSetState(SCE_ASM_DEFAULT);
332 } else if (sc.atLineEnd) {
333 sc.ChangeState(SCE_ASM_STRINGEOL);
334 sc.ForwardSetState(SCE_ASM_DEFAULT);
336 } else if (sc.state == SCE_ASM_CHARACTER) {
337 if (sc.ch == '\\') {
338 if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
339 sc.Forward();
341 } else if (sc.ch == '\'') {
342 sc.ForwardSetState(SCE_ASM_DEFAULT);
343 } else if (sc.atLineEnd) {
344 sc.ChangeState(SCE_ASM_STRINGEOL);
345 sc.ForwardSetState(SCE_ASM_DEFAULT);
349 // Determine if a new state should be entered.
350 if (sc.state == SCE_ASM_DEFAULT) {
351 if (sc.ch == commentChar){
352 sc.SetState(SCE_ASM_COMMENT);
353 } else if (IsASCII(sc.ch) && (isdigit(sc.ch) || (sc.ch == '.' && IsASCII(sc.chNext) && isdigit(sc.chNext)))) {
354 sc.SetState(SCE_ASM_NUMBER);
355 } else if (IsAWordStart(sc.ch)) {
356 sc.SetState(SCE_ASM_IDENTIFIER);
357 } else if (sc.ch == '\"') {
358 sc.SetState(SCE_ASM_STRING);
359 } else if (sc.ch == '\'') {
360 sc.SetState(SCE_ASM_CHARACTER);
361 } else if (IsAsmOperator(sc.ch)) {
362 sc.SetState(SCE_ASM_OPERATOR);
367 sc.Complete();
370 // Store both the current line's fold level and the next lines in the
371 // level store to make it easy to pick up with each increment
372 // and to make it possible to fiddle the current level for "else".
374 void SCI_METHOD LexerAsm::Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
376 if (!options.fold)
377 return;
379 LexAccessor styler(pAccess);
381 unsigned int endPos = startPos + length;
382 int visibleChars = 0;
383 int lineCurrent = styler.GetLine(startPos);
384 int levelCurrent = SC_FOLDLEVELBASE;
385 if (lineCurrent > 0)
386 levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
387 int levelNext = levelCurrent;
388 char chNext = styler[startPos];
389 int styleNext = styler.StyleAt(startPos);
390 int style = initStyle;
391 char word[100];
392 int wordlen = 0;
393 const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
394 for (unsigned int i = startPos; i < endPos; i++) {
395 char ch = chNext;
396 chNext = styler.SafeGetCharAt(i + 1);
397 int stylePrev = style;
398 style = styleNext;
399 styleNext = styler.StyleAt(i + 1);
400 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
401 if (options.foldCommentMultiline && IsStreamCommentStyle(style)) {
402 if (!IsStreamCommentStyle(stylePrev)) {
403 levelNext++;
404 } else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
405 // Comments don't end at end of line and the next character may be unstyled.
406 levelNext--;
409 if (options.foldCommentExplicit && ((style == SCE_ASM_COMMENT) || options.foldExplicitAnywhere)) {
410 if (userDefinedFoldMarkers) {
411 if (styler.Match(i, options.foldExplicitStart.c_str())) {
412 levelNext++;
413 } else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
414 levelNext--;
416 } else {
417 if (ch == ';') {
418 if (chNext == '{') {
419 levelNext++;
420 } else if (chNext == '}') {
421 levelNext--;
426 if (options.foldSyntaxBased && (style == SCE_ASM_DIRECTIVE)) {
427 word[wordlen++] = static_cast<char>(LowerCase(ch));
428 if (wordlen == 100) { // prevent overflow
429 word[0] = '\0';
430 wordlen = 1;
432 if (styleNext != SCE_ASM_DIRECTIVE) { // reading directive ready
433 word[wordlen] = '\0';
434 wordlen = 0;
435 if (directives4foldstart.InList(word)) {
436 levelNext++;
437 } else if (directives4foldend.InList(word)){
438 levelNext--;
442 if (!IsASpace(ch))
443 visibleChars++;
444 if (atEOL || (i == endPos-1)) {
445 int levelUse = levelCurrent;
446 int lev = levelUse | levelNext << 16;
447 if (visibleChars == 0 && options.foldCompact)
448 lev |= SC_FOLDLEVELWHITEFLAG;
449 if (levelUse < levelNext)
450 lev |= SC_FOLDLEVELHEADERFLAG;
451 if (lev != styler.LevelAt(lineCurrent)) {
452 styler.SetLevel(lineCurrent, lev);
454 lineCurrent++;
455 levelCurrent = levelNext;
456 if (atEOL && (i == static_cast<unsigned int>(styler.Length()-1))) {
457 // There is an empty line at end of file so give it same level and empty
458 styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
460 visibleChars = 0;
465 LexerModule lmAsm(SCLEX_ASM, LexerAsm::LexerFactoryAsm, "asm", asmWordListDesc);
466 LexerModule lmAs(SCLEX_AS, LexerAsm::LexerFactoryAs, "as", asmWordListDesc);