upgraded to scintilla 3.2.0
[TortoiseGit.git] / ext / scintilla / lexers / LexAsm.cxx
blob244629703dd1833f682d11d80099181c32912aee
1 // Scintilla source code edit control
2 /** @file LexAsm.cxx
3 ** Lexer for Assembler, just for the MASM syntax
4 ** Written by The Black Horus
5 ** Enhancements and NASM stuff by Kein-Hong Man, 2003-10
6 ** SCE_ASM_COMMENTBLOCK and SCE_ASM_CHARACTER are for future GNU as colouring
7 ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
8 **/
9 // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
10 // The License.txt file describes the conditions under which this software may be distributed.
12 #include <stdlib.h>
13 #include <string.h>
14 #include <stdio.h>
15 #include <stdarg.h>
16 #include <assert.h>
17 #include <ctype.h>
19 #include <string>
20 #include <map>
21 #include <set>
23 #include "ILexer.h"
24 #include "Scintilla.h"
25 #include "SciLexer.h"
27 #include "WordList.h"
28 #include "LexAccessor.h"
29 #include "StyleContext.h"
30 #include "CharacterSet.h"
31 #include "LexerModule.h"
32 #include "OptionSet.h"
34 #ifdef SCI_NAMESPACE
35 using namespace Scintilla;
36 #endif
38 static inline bool IsAWordChar(const int ch) {
39 return (ch < 0x80) && (isalnum(ch) || ch == '.' ||
40 ch == '_' || ch == '?');
43 static inline bool IsAWordStart(const int ch) {
44 return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.' ||
45 ch == '%' || ch == '@' || ch == '$' || ch == '?');
48 static inline bool IsAsmOperator(const int ch) {
49 if ((ch < 0x80) && (isalnum(ch)))
50 return false;
51 // '.' left out as it is used to make up numbers
52 if (ch == '*' || ch == '/' || ch == '-' || ch == '+' ||
53 ch == '(' || ch == ')' || ch == '=' || ch == '^' ||
54 ch == '[' || ch == ']' || ch == '<' || ch == '&' ||
55 ch == '>' || ch == ',' || ch == '|' || ch == '~' ||
56 ch == '%' || ch == ':')
57 return true;
58 return false;
61 static bool IsStreamCommentStyle(int style) {
62 return style == SCE_ASM_COMMENTDIRECTIVE || style == SCE_ASM_COMMENTBLOCK;
65 static inline int LowerCase(int c) {
66 if (c >= 'A' && c <= 'Z')
67 return 'a' + c - 'A';
68 return c;
71 // An individual named option for use in an OptionSet
73 // Options used for LexerAsm
74 struct OptionsAsm {
75 std::string delimiter;
76 bool fold;
77 bool foldSyntaxBased;
78 bool foldCommentMultiline;
79 bool foldCommentExplicit;
80 std::string foldExplicitStart;
81 std::string foldExplicitEnd;
82 bool foldExplicitAnywhere;
83 bool foldCompact;
84 OptionsAsm() {
85 delimiter = "";
86 fold = false;
87 foldSyntaxBased = true;
88 foldCommentMultiline = false;
89 foldCommentExplicit = false;
90 foldExplicitStart = "";
91 foldExplicitEnd = "";
92 foldExplicitAnywhere = false;
93 foldCompact = true;
97 static const char * const asmWordListDesc[] = {
98 "CPU instructions",
99 "FPU instructions",
100 "Registers",
101 "Directives",
102 "Directive operands",
103 "Extended instructions",
104 "Directives4Foldstart",
105 "Directives4Foldend",
109 struct OptionSetAsm : public OptionSet<OptionsAsm> {
110 OptionSetAsm() {
111 DefineProperty("lexer.asm.comment.delimiter", &OptionsAsm::delimiter,
112 "Character used for COMMENT directive's delimiter, replacing the standard \"~\".");
114 DefineProperty("fold", &OptionsAsm::fold);
116 DefineProperty("fold.asm.syntax.based", &OptionsAsm::foldSyntaxBased,
117 "Set this property to 0 to disable syntax based folding.");
119 DefineProperty("fold.asm.comment.multiline", &OptionsAsm::foldCommentMultiline,
120 "Set this property to 1 to enable folding multi-line comments.");
122 DefineProperty("fold.asm.comment.explicit", &OptionsAsm::foldCommentExplicit,
123 "This option enables folding explicit fold points when using the Asm lexer. "
124 "Explicit fold points allows adding extra folding by placing a ;{ comment at the start and a ;} "
125 "at the end of a section that should fold.");
127 DefineProperty("fold.asm.explicit.start", &OptionsAsm::foldExplicitStart,
128 "The string to use for explicit fold start points, replacing the standard ;{.");
130 DefineProperty("fold.asm.explicit.end", &OptionsAsm::foldExplicitEnd,
131 "The string to use for explicit fold end points, replacing the standard ;}.");
133 DefineProperty("fold.asm.explicit.anywhere", &OptionsAsm::foldExplicitAnywhere,
134 "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
136 DefineProperty("fold.compact", &OptionsAsm::foldCompact);
138 DefineWordListSets(asmWordListDesc);
142 class LexerAsm : public ILexer {
143 WordList cpuInstruction;
144 WordList mathInstruction;
145 WordList registers;
146 WordList directive;
147 WordList directiveOperand;
148 WordList extInstruction;
149 WordList directives4foldstart;
150 WordList directives4foldend;
151 OptionsAsm options;
152 OptionSetAsm osAsm;
153 public:
154 LexerAsm() {
156 virtual ~LexerAsm() {
158 void SCI_METHOD Release() {
159 delete this;
161 int SCI_METHOD Version() const {
162 return lvOriginal;
164 const char * SCI_METHOD PropertyNames() {
165 return osAsm.PropertyNames();
167 int SCI_METHOD PropertyType(const char *name) {
168 return osAsm.PropertyType(name);
170 const char * SCI_METHOD DescribeProperty(const char *name) {
171 return osAsm.DescribeProperty(name);
173 int SCI_METHOD PropertySet(const char *key, const char *val);
174 const char * SCI_METHOD DescribeWordListSets() {
175 return osAsm.DescribeWordListSets();
177 int SCI_METHOD WordListSet(int n, const char *wl);
178 void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
179 void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
181 void * SCI_METHOD PrivateCall(int, void *) {
182 return 0;
185 static ILexer *LexerFactoryAsm() {
186 return new LexerAsm();
190 int SCI_METHOD LexerAsm::PropertySet(const char *key, const char *val) {
191 if (osAsm.PropertySet(&options, key, val)) {
192 return 0;
194 return -1;
197 int SCI_METHOD LexerAsm::WordListSet(int n, const char *wl) {
198 WordList *wordListN = 0;
199 switch (n) {
200 case 0:
201 wordListN = &cpuInstruction;
202 break;
203 case 1:
204 wordListN = &mathInstruction;
205 break;
206 case 2:
207 wordListN = &registers;
208 break;
209 case 3:
210 wordListN = &directive;
211 break;
212 case 4:
213 wordListN = &directiveOperand;
214 break;
215 case 5:
216 wordListN = &extInstruction;
217 break;
218 case 6:
219 wordListN = &directives4foldstart;
220 break;
221 case 7:
222 wordListN = &directives4foldend;
223 break;
225 int firstModification = -1;
226 if (wordListN) {
227 WordList wlNew;
228 wlNew.Set(wl);
229 if (*wordListN != wlNew) {
230 wordListN->Set(wl);
231 firstModification = 0;
234 return firstModification;
237 void SCI_METHOD LexerAsm::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
238 LexAccessor styler(pAccess);
240 // Do not leak onto next line
241 if (initStyle == SCE_ASM_STRINGEOL)
242 initStyle = SCE_ASM_DEFAULT;
244 StyleContext sc(startPos, length, initStyle, styler);
246 for (; sc.More(); sc.Forward())
249 // Prevent SCE_ASM_STRINGEOL from leaking back to previous line
250 if (sc.atLineStart && (sc.state == SCE_ASM_STRING)) {
251 sc.SetState(SCE_ASM_STRING);
252 } else if (sc.atLineStart && (sc.state == SCE_ASM_CHARACTER)) {
253 sc.SetState(SCE_ASM_CHARACTER);
256 // Handle line continuation generically.
257 if (sc.ch == '\\') {
258 if (sc.chNext == '\n' || sc.chNext == '\r') {
259 sc.Forward();
260 if (sc.ch == '\r' && sc.chNext == '\n') {
261 sc.Forward();
263 continue;
267 // Determine if the current state should terminate.
268 if (sc.state == SCE_ASM_OPERATOR) {
269 if (!IsAsmOperator(sc.ch)) {
270 sc.SetState(SCE_ASM_DEFAULT);
272 } else if (sc.state == SCE_ASM_NUMBER) {
273 if (!IsAWordChar(sc.ch)) {
274 sc.SetState(SCE_ASM_DEFAULT);
276 } else if (sc.state == SCE_ASM_IDENTIFIER) {
277 if (!IsAWordChar(sc.ch) ) {
278 char s[100];
279 sc.GetCurrentLowered(s, sizeof(s));
280 bool IsDirective = false;
282 if (cpuInstruction.InList(s)) {
283 sc.ChangeState(SCE_ASM_CPUINSTRUCTION);
284 } else if (mathInstruction.InList(s)) {
285 sc.ChangeState(SCE_ASM_MATHINSTRUCTION);
286 } else if (registers.InList(s)) {
287 sc.ChangeState(SCE_ASM_REGISTER);
288 } else if (directive.InList(s)) {
289 sc.ChangeState(SCE_ASM_DIRECTIVE);
290 IsDirective = true;
291 } else if (directiveOperand.InList(s)) {
292 sc.ChangeState(SCE_ASM_DIRECTIVEOPERAND);
293 } else if (extInstruction.InList(s)) {
294 sc.ChangeState(SCE_ASM_EXTINSTRUCTION);
296 sc.SetState(SCE_ASM_DEFAULT);
297 if (IsDirective && !strcmp(s, "comment")) {
298 char delimiter = options.delimiter.empty() ? '~' : options.delimiter.c_str()[0];
299 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) {
300 sc.ForwardSetState(SCE_ASM_DEFAULT);
302 if (sc.ch == delimiter) {
303 sc.SetState(SCE_ASM_COMMENTDIRECTIVE);
307 } else if (sc.state == SCE_ASM_COMMENTDIRECTIVE) {
308 char delimiter = options.delimiter.empty() ? '~' : options.delimiter.c_str()[0];
309 if (sc.ch == delimiter) {
310 while (!sc.atLineEnd) {
311 sc.Forward();
313 sc.SetState(SCE_ASM_DEFAULT);
315 } else if (sc.state == SCE_ASM_COMMENT ) {
316 if (sc.atLineEnd) {
317 sc.SetState(SCE_ASM_DEFAULT);
319 } else if (sc.state == SCE_ASM_STRING) {
320 if (sc.ch == '\\') {
321 if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
322 sc.Forward();
324 } else if (sc.ch == '\"') {
325 sc.ForwardSetState(SCE_ASM_DEFAULT);
326 } else if (sc.atLineEnd) {
327 sc.ChangeState(SCE_ASM_STRINGEOL);
328 sc.ForwardSetState(SCE_ASM_DEFAULT);
330 } else if (sc.state == SCE_ASM_CHARACTER) {
331 if (sc.ch == '\\') {
332 if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
333 sc.Forward();
335 } else if (sc.ch == '\'') {
336 sc.ForwardSetState(SCE_ASM_DEFAULT);
337 } else if (sc.atLineEnd) {
338 sc.ChangeState(SCE_ASM_STRINGEOL);
339 sc.ForwardSetState(SCE_ASM_DEFAULT);
343 // Determine if a new state should be entered.
344 if (sc.state == SCE_ASM_DEFAULT) {
345 if (sc.ch == ';'){
346 sc.SetState(SCE_ASM_COMMENT);
347 } else if (isascii(sc.ch) && (isdigit(sc.ch) || (sc.ch == '.' && isascii(sc.chNext) && isdigit(sc.chNext)))) {
348 sc.SetState(SCE_ASM_NUMBER);
349 } else if (IsAWordStart(sc.ch)) {
350 sc.SetState(SCE_ASM_IDENTIFIER);
351 } else if (sc.ch == '\"') {
352 sc.SetState(SCE_ASM_STRING);
353 } else if (sc.ch == '\'') {
354 sc.SetState(SCE_ASM_CHARACTER);
355 } else if (IsAsmOperator(sc.ch)) {
356 sc.SetState(SCE_ASM_OPERATOR);
361 sc.Complete();
364 // Store both the current line's fold level and the next lines in the
365 // level store to make it easy to pick up with each increment
366 // and to make it possible to fiddle the current level for "else".
368 void SCI_METHOD LexerAsm::Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
370 if (!options.fold)
371 return;
373 LexAccessor styler(pAccess);
375 unsigned int endPos = startPos + length;
376 int visibleChars = 0;
377 int lineCurrent = styler.GetLine(startPos);
378 int levelCurrent = SC_FOLDLEVELBASE;
379 if (lineCurrent > 0)
380 levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
381 int levelNext = levelCurrent;
382 char chNext = styler[startPos];
383 int styleNext = styler.StyleAt(startPos);
384 int style = initStyle;
385 char word[100];
386 int wordlen = 0;
387 const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
388 for (unsigned int i = startPos; i < endPos; i++) {
389 char ch = chNext;
390 chNext = styler.SafeGetCharAt(i + 1);
391 int stylePrev = style;
392 style = styleNext;
393 styleNext = styler.StyleAt(i + 1);
394 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
395 if (options.foldCommentMultiline && IsStreamCommentStyle(style)) {
396 if (!IsStreamCommentStyle(stylePrev)) {
397 levelNext++;
398 } else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
399 // Comments don't end at end of line and the next character may be unstyled.
400 levelNext--;
403 if (options.foldCommentExplicit && ((style == SCE_ASM_COMMENT) || options.foldExplicitAnywhere)) {
404 if (userDefinedFoldMarkers) {
405 if (styler.Match(i, options.foldExplicitStart.c_str())) {
406 levelNext++;
407 } else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
408 levelNext--;
410 } else {
411 if (ch == ';') {
412 if (chNext == '{') {
413 levelNext++;
414 } else if (chNext == '}') {
415 levelNext--;
420 if (options.foldSyntaxBased && (style == SCE_ASM_DIRECTIVE)) {
421 word[wordlen++] = static_cast<char>(LowerCase(ch));
422 if (wordlen == 100) { // prevent overflow
423 word[0] = '\0';
424 wordlen = 1;
426 if (styleNext != SCE_ASM_DIRECTIVE) { // reading directive ready
427 word[wordlen] = '\0';
428 wordlen = 0;
429 if (directives4foldstart.InList(word)) {
430 levelNext++;
431 } else if (directives4foldend.InList(word)){
432 levelNext--;
436 if (!IsASpace(ch))
437 visibleChars++;
438 if (atEOL || (i == endPos-1)) {
439 int levelUse = levelCurrent;
440 int lev = levelUse | levelNext << 16;
441 if (visibleChars == 0 && options.foldCompact)
442 lev |= SC_FOLDLEVELWHITEFLAG;
443 if (levelUse < levelNext)
444 lev |= SC_FOLDLEVELHEADERFLAG;
445 if (lev != styler.LevelAt(lineCurrent)) {
446 styler.SetLevel(lineCurrent, lev);
448 lineCurrent++;
449 levelCurrent = levelNext;
450 if (atEOL && (i == static_cast<unsigned int>(styler.Length()-1))) {
451 // There is an empty line at end of file so give it same level and empty
452 styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
454 visibleChars = 0;
459 LexerModule lmAsm(SCLEX_ASM, LexerAsm::LexerFactoryAsm, "asm", asmWordListDesc);