updated Scintilla to 2.29
[TortoiseGit.git] / ext / scintilla / lexers / LexAsm.cxx
blobc28a8e9273d154a399e214899948b2fbd512fc96
1 // Scintilla source code edit control
2 /** @file LexAsm.cxx
3 ** Lexer for Assembler, just for the MASM syntax
4 ** Written by The Black Horus
5 ** Enhancements and NASM stuff by Kein-Hong Man, 2003-10
6 ** SCE_ASM_COMMENTBLOCK and SCE_ASM_CHARACTER are for future GNU as colouring
7 ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
8 **/
9 // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
10 // The License.txt file describes the conditions under which this software may be distributed.
12 #include <stdlib.h>
13 #include <string.h>
14 #include <stdio.h>
15 #include <stdarg.h>
16 #include <assert.h>
17 #include <ctype.h>
19 #ifdef _MSC_VER
20 #pragma warning(disable: 4786)
21 #endif
23 #include <string>
24 #include <map>
25 #include <set>
27 #include "ILexer.h"
28 #include "Scintilla.h"
29 #include "SciLexer.h"
31 #include "WordList.h"
32 #include "LexAccessor.h"
33 #include "StyleContext.h"
34 #include "CharacterSet.h"
35 #include "LexerModule.h"
36 #include "OptionSet.h"
38 #ifdef SCI_NAMESPACE
39 using namespace Scintilla;
40 #endif
42 static inline bool IsAWordChar(const int ch) {
43 return (ch < 0x80) && (isalnum(ch) || ch == '.' ||
44 ch == '_' || ch == '?');
47 static inline bool IsAWordStart(const int ch) {
48 return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.' ||
49 ch == '%' || ch == '@' || ch == '$' || ch == '?');
52 static inline bool IsAsmOperator(const int ch) {
53 if ((ch < 0x80) && (isalnum(ch)))
54 return false;
55 // '.' left out as it is used to make up numbers
56 if (ch == '*' || ch == '/' || ch == '-' || ch == '+' ||
57 ch == '(' || ch == ')' || ch == '=' || ch == '^' ||
58 ch == '[' || ch == ']' || ch == '<' || ch == '&' ||
59 ch == '>' || ch == ',' || ch == '|' || ch == '~' ||
60 ch == '%' || ch == ':')
61 return true;
62 return false;
65 static bool IsStreamCommentStyle(int style) {
66 return style == SCE_ASM_COMMENTDIRECTIVE || style == SCE_ASM_COMMENTBLOCK;
69 static inline int LowerCase(int c) {
70 if (c >= 'A' && c <= 'Z')
71 return 'a' + c - 'A';
72 return c;
75 // An individual named option for use in an OptionSet
77 // Options used for LexerAsm
78 struct OptionsAsm {
79 std::string delimiter;
80 bool fold;
81 bool foldSyntaxBased;
82 bool foldCommentMultiline;
83 bool foldCommentExplicit;
84 std::string foldExplicitStart;
85 std::string foldExplicitEnd;
86 bool foldExplicitAnywhere;
87 bool foldCompact;
88 OptionsAsm() {
89 delimiter = "";
90 fold = false;
91 foldSyntaxBased = true;
92 foldCommentMultiline = false;
93 foldCommentExplicit = false;
94 foldExplicitStart = "";
95 foldExplicitEnd = "";
96 foldExplicitAnywhere = false;
97 foldCompact = true;
101 static const char * const asmWordListDesc[] = {
102 "CPU instructions",
103 "FPU instructions",
104 "Registers",
105 "Directives",
106 "Directive operands",
107 "Extended instructions",
108 "Directives4Foldstart",
109 "Directives4Foldend",
113 struct OptionSetAsm : public OptionSet<OptionsAsm> {
114 OptionSetAsm() {
115 DefineProperty("lexer.asm.comment.delimiter", &OptionsAsm::delimiter,
116 "Character used for COMMENT directive's delimiter, replacing the standard \"~\".");
118 DefineProperty("fold", &OptionsAsm::fold);
120 DefineProperty("fold.asm.syntax.based", &OptionsAsm::foldSyntaxBased,
121 "Set this property to 0 to disable syntax based folding.");
123 DefineProperty("fold.asm.comment.multiline", &OptionsAsm::foldCommentMultiline,
124 "Set this property to 1 to enable folding multi-line comments.");
126 DefineProperty("fold.asm.comment.explicit", &OptionsAsm::foldCommentExplicit,
127 "This option enables folding explicit fold points when using the Asm lexer. "
128 "Explicit fold points allows adding extra folding by placing a ;{ comment at the start and a ;} "
129 "at the end of a section that should fold.");
131 DefineProperty("fold.asm.explicit.start", &OptionsAsm::foldExplicitStart,
132 "The string to use for explicit fold start points, replacing the standard ;{.");
134 DefineProperty("fold.asm.explicit.end", &OptionsAsm::foldExplicitEnd,
135 "The string to use for explicit fold end points, replacing the standard ;}.");
137 DefineProperty("fold.asm.explicit.anywhere", &OptionsAsm::foldExplicitAnywhere,
138 "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
140 DefineProperty("fold.compact", &OptionsAsm::foldCompact);
142 DefineWordListSets(asmWordListDesc);
146 class LexerAsm : public ILexer {
147 WordList cpuInstruction;
148 WordList mathInstruction;
149 WordList registers;
150 WordList directive;
151 WordList directiveOperand;
152 WordList extInstruction;
153 WordList directives4foldstart;
154 WordList directives4foldend;
155 OptionsAsm options;
156 OptionSetAsm osAsm;
157 public:
158 LexerAsm() {
160 ~LexerAsm() {
162 void SCI_METHOD Release() {
163 delete this;
165 int SCI_METHOD Version() const {
166 return lvOriginal;
168 const char * SCI_METHOD PropertyNames() {
169 return osAsm.PropertyNames();
171 int SCI_METHOD PropertyType(const char *name) {
172 return osAsm.PropertyType(name);
174 const char * SCI_METHOD DescribeProperty(const char *name) {
175 return osAsm.DescribeProperty(name);
177 int SCI_METHOD PropertySet(const char *key, const char *val);
178 const char * SCI_METHOD DescribeWordListSets() {
179 return osAsm.DescribeWordListSets();
181 int SCI_METHOD WordListSet(int n, const char *wl);
182 void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
183 void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
185 void * SCI_METHOD PrivateCall(int, void *) {
186 return 0;
189 static ILexer *LexerFactoryAsm() {
190 return new LexerAsm();
194 int SCI_METHOD LexerAsm::PropertySet(const char *key, const char *val) {
195 if (osAsm.PropertySet(&options, key, val)) {
196 return 0;
198 return -1;
201 int SCI_METHOD LexerAsm::WordListSet(int n, const char *wl) {
202 WordList *wordListN = 0;
203 switch (n) {
204 case 0:
205 wordListN = &cpuInstruction;
206 break;
207 case 1:
208 wordListN = &mathInstruction;
209 break;
210 case 2:
211 wordListN = &registers;
212 break;
213 case 3:
214 wordListN = &directive;
215 break;
216 case 4:
217 wordListN = &directiveOperand;
218 break;
219 case 5:
220 wordListN = &extInstruction;
221 break;
222 case 6:
223 wordListN = &directives4foldstart;
224 break;
225 case 7:
226 wordListN = &directives4foldend;
227 break;
229 int firstModification = -1;
230 if (wordListN) {
231 WordList wlNew;
232 wlNew.Set(wl);
233 if (*wordListN != wlNew) {
234 wordListN->Set(wl);
235 firstModification = 0;
238 return firstModification;
241 void SCI_METHOD LexerAsm::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
242 LexAccessor styler(pAccess);
244 // Do not leak onto next line
245 if (initStyle == SCE_ASM_STRINGEOL)
246 initStyle = SCE_ASM_DEFAULT;
248 StyleContext sc(startPos, length, initStyle, styler);
250 for (; sc.More(); sc.Forward())
253 // Prevent SCE_ASM_STRINGEOL from leaking back to previous line
254 if (sc.atLineStart && (sc.state == SCE_ASM_STRING)) {
255 sc.SetState(SCE_ASM_STRING);
256 } else if (sc.atLineStart && (sc.state == SCE_ASM_CHARACTER)) {
257 sc.SetState(SCE_ASM_CHARACTER);
260 // Handle line continuation generically.
261 if (sc.ch == '\\') {
262 if (sc.chNext == '\n' || sc.chNext == '\r') {
263 sc.Forward();
264 if (sc.ch == '\r' && sc.chNext == '\n') {
265 sc.Forward();
267 continue;
271 // Determine if the current state should terminate.
272 if (sc.state == SCE_ASM_OPERATOR) {
273 if (!IsAsmOperator(sc.ch)) {
274 sc.SetState(SCE_ASM_DEFAULT);
276 } else if (sc.state == SCE_ASM_NUMBER) {
277 if (!IsAWordChar(sc.ch)) {
278 sc.SetState(SCE_ASM_DEFAULT);
280 } else if (sc.state == SCE_ASM_IDENTIFIER) {
281 if (!IsAWordChar(sc.ch) ) {
282 char s[100];
283 sc.GetCurrentLowered(s, sizeof(s));
284 bool IsDirective = false;
286 if (cpuInstruction.InList(s)) {
287 sc.ChangeState(SCE_ASM_CPUINSTRUCTION);
288 } else if (mathInstruction.InList(s)) {
289 sc.ChangeState(SCE_ASM_MATHINSTRUCTION);
290 } else if (registers.InList(s)) {
291 sc.ChangeState(SCE_ASM_REGISTER);
292 } else if (directive.InList(s)) {
293 sc.ChangeState(SCE_ASM_DIRECTIVE);
294 IsDirective = true;
295 } else if (directiveOperand.InList(s)) {
296 sc.ChangeState(SCE_ASM_DIRECTIVEOPERAND);
297 } else if (extInstruction.InList(s)) {
298 sc.ChangeState(SCE_ASM_EXTINSTRUCTION);
300 sc.SetState(SCE_ASM_DEFAULT);
301 if (IsDirective && !strcmp(s, "comment")) {
302 char delimiter = options.delimiter.empty() ? '~' : options.delimiter.c_str()[0];
303 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) {
304 sc.ForwardSetState(SCE_ASM_DEFAULT);
306 if (sc.ch == delimiter) {
307 sc.SetState(SCE_ASM_COMMENTDIRECTIVE);
311 } else if (sc.state == SCE_ASM_COMMENTDIRECTIVE) {
312 char delimiter = options.delimiter.empty() ? '~' : options.delimiter.c_str()[0];
313 if (sc.ch == delimiter) {
314 while (!sc.atLineEnd) {
315 sc.Forward();
317 sc.SetState(SCE_ASM_DEFAULT);
319 } else if (sc.state == SCE_ASM_COMMENT ) {
320 if (sc.atLineEnd) {
321 sc.SetState(SCE_ASM_DEFAULT);
323 } else if (sc.state == SCE_ASM_STRING) {
324 if (sc.ch == '\\') {
325 if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
326 sc.Forward();
328 } else if (sc.ch == '\"') {
329 sc.ForwardSetState(SCE_ASM_DEFAULT);
330 } else if (sc.atLineEnd) {
331 sc.ChangeState(SCE_ASM_STRINGEOL);
332 sc.ForwardSetState(SCE_ASM_DEFAULT);
334 } else if (sc.state == SCE_ASM_CHARACTER) {
335 if (sc.ch == '\\') {
336 if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
337 sc.Forward();
339 } else if (sc.ch == '\'') {
340 sc.ForwardSetState(SCE_ASM_DEFAULT);
341 } else if (sc.atLineEnd) {
342 sc.ChangeState(SCE_ASM_STRINGEOL);
343 sc.ForwardSetState(SCE_ASM_DEFAULT);
347 // Determine if a new state should be entered.
348 if (sc.state == SCE_ASM_DEFAULT) {
349 if (sc.ch == ';'){
350 sc.SetState(SCE_ASM_COMMENT);
351 } else if (isascii(sc.ch) && (isdigit(sc.ch) || (sc.ch == '.' && isascii(sc.chNext) && isdigit(sc.chNext)))) {
352 sc.SetState(SCE_ASM_NUMBER);
353 } else if (IsAWordStart(sc.ch)) {
354 sc.SetState(SCE_ASM_IDENTIFIER);
355 } else if (sc.ch == '\"') {
356 sc.SetState(SCE_ASM_STRING);
357 } else if (sc.ch == '\'') {
358 sc.SetState(SCE_ASM_CHARACTER);
359 } else if (IsAsmOperator(sc.ch)) {
360 sc.SetState(SCE_ASM_OPERATOR);
365 sc.Complete();
368 // Store both the current line's fold level and the next lines in the
369 // level store to make it easy to pick up with each increment
370 // and to make it possible to fiddle the current level for "else".
372 void SCI_METHOD LexerAsm::Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
374 if (!options.fold)
375 return;
377 LexAccessor styler(pAccess);
379 unsigned int endPos = startPos + length;
380 int visibleChars = 0;
381 int lineCurrent = styler.GetLine(startPos);
382 int levelCurrent = SC_FOLDLEVELBASE;
383 if (lineCurrent > 0)
384 levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
385 int levelNext = levelCurrent;
386 char chNext = styler[startPos];
387 int styleNext = styler.StyleAt(startPos);
388 int style = initStyle;
389 char word[100];
390 int wordlen = 0;
391 const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
392 for (unsigned int i = startPos; i < endPos; i++) {
393 char ch = chNext;
394 chNext = styler.SafeGetCharAt(i + 1);
395 int stylePrev = style;
396 style = styleNext;
397 styleNext = styler.StyleAt(i + 1);
398 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
399 if (options.foldCommentMultiline && IsStreamCommentStyle(style)) {
400 if (!IsStreamCommentStyle(stylePrev)) {
401 levelNext++;
402 } else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
403 // Comments don't end at end of line and the next character may be unstyled.
404 levelNext--;
407 if (options.foldCommentExplicit && ((style == SCE_ASM_COMMENT) || options.foldExplicitAnywhere)) {
408 if (userDefinedFoldMarkers) {
409 if (styler.Match(i, options.foldExplicitStart.c_str())) {
410 levelNext++;
411 } else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
412 levelNext--;
414 } else {
415 if (ch == ';') {
416 if (chNext == '{') {
417 levelNext++;
418 } else if (chNext == '}') {
419 levelNext--;
424 if (options.foldSyntaxBased && (style == SCE_ASM_DIRECTIVE)) {
425 word[wordlen++] = static_cast<char>(LowerCase(ch));
426 if (wordlen == 100) { // prevent overflow
427 word[0] = '\0';
428 wordlen = 1;
430 if (styleNext != SCE_ASM_DIRECTIVE) { // reading directive ready
431 word[wordlen] = '\0';
432 wordlen = 0;
433 if (directives4foldstart.InList(word)) {
434 levelNext++;
435 } else if (directives4foldend.InList(word)){
436 levelNext--;
440 if (!IsASpace(ch))
441 visibleChars++;
442 if (atEOL || (i == endPos-1)) {
443 int levelUse = levelCurrent;
444 int lev = levelUse | levelNext << 16;
445 if (visibleChars == 0 && options.foldCompact)
446 lev |= SC_FOLDLEVELWHITEFLAG;
447 if (levelUse < levelNext)
448 lev |= SC_FOLDLEVELHEADERFLAG;
449 if (lev != styler.LevelAt(lineCurrent)) {
450 styler.SetLevel(lineCurrent, lev);
452 lineCurrent++;
453 levelCurrent = levelNext;
454 if (atEOL && (i == static_cast<unsigned int>(styler.Length()-1))) {
455 // There is an empty line at end of file so give it same level and empty
456 styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
458 visibleChars = 0;
463 LexerModule lmAsm(SCLEX_ASM, LexerAsm::LexerFactoryAsm, "asm", asmWordListDesc);