Fixed issue #2175: TortoiseGitBlame fails to search if line has non-ascii chars and...
[TortoiseGit.git] / ext / scintilla / lexers / LexAbaqus.cxx
blobe1584a3ee68909e17b91dc7551dd70a55bfa1b0b
1 // Scintilla source code edit control
2 /** @file LexABAQUS.cxx
3 ** Lexer for ABAQUS. Based on the lexer for APDL by Hadar Raz.
4 ** By Sergio Lucato.
5 ** Sort of completely rewritten by Gertjan Kloosterman
6 **/
7 // The License.txt file describes the conditions under which this software may be distributed.
9 // Code folding copyied and modified from LexBasic.cxx
11 #include <stdlib.h>
12 #include <string.h>
13 #include <stdio.h>
14 #include <stdarg.h>
15 #include <assert.h>
16 #include <ctype.h>
18 #include "ILexer.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
22 #include "WordList.h"
23 #include "LexAccessor.h"
24 #include "Accessor.h"
25 #include "StyleContext.h"
26 #include "CharacterSet.h"
27 #include "LexerModule.h"
29 #ifdef SCI_NAMESPACE
30 using namespace Scintilla;
31 #endif
33 static inline bool IsAKeywordChar(const int ch) {
34 return (ch < 0x80 && (isalnum(ch) || (ch == '_') || (ch == ' ')));
37 static inline bool IsASetChar(const int ch) {
38 return (ch < 0x80 && (isalnum(ch) || (ch == '_') || (ch == '.') || (ch == '-')));
41 static void ColouriseABAQUSDoc(unsigned int startPos, int length, int initStyle, WordList*[] /* *keywordlists[] */,
42 Accessor &styler) {
43 enum localState { KW_LINE_KW, KW_LINE_COMMA, KW_LINE_PAR, KW_LINE_EQ, KW_LINE_VAL, \
44 DAT_LINE_VAL, DAT_LINE_COMMA,\
45 COMMENT_LINE,\
46 ST_ERROR, LINE_END } state ;
48 // Do not leak onto next line
49 state = LINE_END ;
50 initStyle = SCE_ABAQUS_DEFAULT;
51 StyleContext sc(startPos, length, initStyle, styler);
53 // Things are actually quite simple
54 // we have commentlines
55 // keywordlines and datalines
56 // On a data line there will only be colouring of numbers
57 // a keyword line is constructed as
58 // *word,[ paramname[=paramvalue]]*
59 // if the line ends with a , the keyword line continues onto the new line
61 for (; sc.More(); sc.Forward()) {
62 switch ( state ) {
63 case KW_LINE_KW :
64 if ( sc.atLineEnd ) {
65 // finished the line in keyword state, switch to LINE_END
66 sc.SetState(SCE_ABAQUS_DEFAULT) ;
67 state = LINE_END ;
68 } else if ( IsAKeywordChar(sc.ch) ) {
69 // nothing changes
70 state = KW_LINE_KW ;
71 } else if ( sc.ch == ',' ) {
72 // Well well we say a comma, arguments *MUST* follow
73 sc.SetState(SCE_ABAQUS_OPERATOR) ;
74 state = KW_LINE_COMMA ;
75 } else {
76 // Flag an error
77 sc.SetState(SCE_ABAQUS_PROCESSOR) ;
78 state = ST_ERROR ;
80 // Done with processing
81 break ;
82 case KW_LINE_COMMA :
83 // acomma on a keywordline was seen
84 if ( IsAKeywordChar(sc.ch)) {
85 sc.SetState(SCE_ABAQUS_ARGUMENT) ;
86 state = KW_LINE_PAR ;
87 } else if ( sc.atLineEnd || (sc.ch == ',') ) {
88 // we remain in keyword mode
89 state = KW_LINE_COMMA ;
90 } else if ( sc.ch == ' ' ) {
91 sc.SetState(SCE_ABAQUS_DEFAULT) ;
92 state = KW_LINE_COMMA ;
93 } else {
94 // Anything else constitutes an error
95 sc.SetState(SCE_ABAQUS_PROCESSOR) ;
96 state = ST_ERROR ;
98 break ;
99 case KW_LINE_PAR :
100 if ( sc.atLineEnd ) {
101 sc.SetState(SCE_ABAQUS_DEFAULT) ;
102 state = LINE_END ;
103 } else if ( IsAKeywordChar(sc.ch) || (sc.ch == '-') ) {
104 // remain in this state
105 state = KW_LINE_PAR ;
106 } else if ( sc.ch == ',' ) {
107 sc.SetState(SCE_ABAQUS_OPERATOR) ;
108 state = KW_LINE_COMMA ;
109 } else if ( sc.ch == '=' ) {
110 sc.SetState(SCE_ABAQUS_OPERATOR) ;
111 state = KW_LINE_EQ ;
112 } else {
113 // Anything else constitutes an error
114 sc.SetState(SCE_ABAQUS_PROCESSOR) ;
115 state = ST_ERROR ;
117 break ;
118 case KW_LINE_EQ :
119 if ( sc.ch == ' ' ) {
120 sc.SetState(SCE_ABAQUS_DEFAULT) ;
121 // remain in this state
122 state = KW_LINE_EQ ;
123 } else if ( IsADigit(sc.ch) || (sc.ch == '-') || (sc.ch == '.' && IsADigit(sc.chNext)) ) {
124 sc.SetState(SCE_ABAQUS_NUMBER) ;
125 state = KW_LINE_VAL ;
126 } else if ( IsAKeywordChar(sc.ch) ) {
127 sc.SetState(SCE_ABAQUS_DEFAULT) ;
128 state = KW_LINE_VAL ;
129 } else if ( (sc.ch == '\'') || (sc.ch == '\"') ) {
130 sc.SetState(SCE_ABAQUS_STRING) ;
131 state = KW_LINE_VAL ;
132 } else {
133 sc.SetState(SCE_ABAQUS_PROCESSOR) ;
134 state = ST_ERROR ;
136 break ;
137 case KW_LINE_VAL :
138 if ( sc.atLineEnd ) {
139 sc.SetState(SCE_ABAQUS_DEFAULT) ;
140 state = LINE_END ;
141 } else if ( IsASetChar(sc.ch) && (sc.state == SCE_ABAQUS_DEFAULT) ) {
142 // nothing changes
143 state = KW_LINE_VAL ;
144 } else if (( (IsADigit(sc.ch) || sc.ch == '.' || (sc.ch == 'e' || sc.ch == 'E') ||
145 ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E')))) &&
146 (sc.state == SCE_ABAQUS_NUMBER)) {
147 // remain in number mode
148 state = KW_LINE_VAL ;
149 } else if (sc.state == SCE_ABAQUS_STRING) {
150 // accept everything until a closing quote
151 if ( sc.ch == '\'' || sc.ch == '\"' ) {
152 sc.SetState(SCE_ABAQUS_DEFAULT) ;
153 state = KW_LINE_VAL ;
155 } else if ( sc.ch == ',' ) {
156 sc.SetState(SCE_ABAQUS_OPERATOR) ;
157 state = KW_LINE_COMMA ;
158 } else {
159 // anything else is an error
160 sc.SetState(SCE_ABAQUS_PROCESSOR) ;
161 state = ST_ERROR ;
163 break ;
164 case DAT_LINE_VAL :
165 if ( sc.atLineEnd ) {
166 sc.SetState(SCE_ABAQUS_DEFAULT) ;
167 state = LINE_END ;
168 } else if ( IsASetChar(sc.ch) && (sc.state == SCE_ABAQUS_DEFAULT) ) {
169 // nothing changes
170 state = DAT_LINE_VAL ;
171 } else if (( (IsADigit(sc.ch) || sc.ch == '.' || (sc.ch == 'e' || sc.ch == 'E') ||
172 ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E')))) &&
173 (sc.state == SCE_ABAQUS_NUMBER)) {
174 // remain in number mode
175 state = DAT_LINE_VAL ;
176 } else if (sc.state == SCE_ABAQUS_STRING) {
177 // accept everything until a closing quote
178 if ( sc.ch == '\'' || sc.ch == '\"' ) {
179 sc.SetState(SCE_ABAQUS_DEFAULT) ;
180 state = DAT_LINE_VAL ;
182 } else if ( sc.ch == ',' ) {
183 sc.SetState(SCE_ABAQUS_OPERATOR) ;
184 state = DAT_LINE_COMMA ;
185 } else {
186 // anything else is an error
187 sc.SetState(SCE_ABAQUS_PROCESSOR) ;
188 state = ST_ERROR ;
190 break ;
191 case DAT_LINE_COMMA :
192 // a comma on a data line was seen
193 if ( sc.atLineEnd ) {
194 sc.SetState(SCE_ABAQUS_DEFAULT) ;
195 state = LINE_END ;
196 } else if ( sc.ch == ' ' ) {
197 sc.SetState(SCE_ABAQUS_DEFAULT) ;
198 state = DAT_LINE_COMMA ;
199 } else if (sc.ch == ',') {
200 sc.SetState(SCE_ABAQUS_OPERATOR) ;
201 state = DAT_LINE_COMMA ;
202 } else if ( IsADigit(sc.ch) || (sc.ch == '-')|| (sc.ch == '.' && IsADigit(sc.chNext)) ) {
203 sc.SetState(SCE_ABAQUS_NUMBER) ;
204 state = DAT_LINE_VAL ;
205 } else if ( IsAKeywordChar(sc.ch) ) {
206 sc.SetState(SCE_ABAQUS_DEFAULT) ;
207 state = DAT_LINE_VAL ;
208 } else if ( (sc.ch == '\'') || (sc.ch == '\"') ) {
209 sc.SetState(SCE_ABAQUS_STRING) ;
210 state = DAT_LINE_VAL ;
211 } else {
212 sc.SetState(SCE_ABAQUS_PROCESSOR) ;
213 state = ST_ERROR ;
215 break ;
216 case COMMENT_LINE :
217 if ( sc.atLineEnd ) {
218 sc.SetState(SCE_ABAQUS_DEFAULT) ;
219 state = LINE_END ;
221 break ;
222 case ST_ERROR :
223 if ( sc.atLineEnd ) {
224 sc.SetState(SCE_ABAQUS_DEFAULT) ;
225 state = LINE_END ;
227 break ;
228 case LINE_END :
229 if ( sc.atLineEnd || sc.ch == ' ' ) {
230 // nothing changes
231 state = LINE_END ;
232 } else if ( sc.ch == '*' ) {
233 if ( sc.chNext == '*' ) {
234 state = COMMENT_LINE ;
235 sc.SetState(SCE_ABAQUS_COMMENT) ;
236 } else {
237 state = KW_LINE_KW ;
238 sc.SetState(SCE_ABAQUS_STARCOMMAND) ;
240 } else {
241 // it must be a data line, things are as if we are in DAT_LINE_COMMA
242 if ( sc.ch == ',' ) {
243 sc.SetState(SCE_ABAQUS_OPERATOR) ;
244 state = DAT_LINE_COMMA ;
245 } else if ( IsADigit(sc.ch) || (sc.ch == '-')|| (sc.ch == '.' && IsADigit(sc.chNext)) ) {
246 sc.SetState(SCE_ABAQUS_NUMBER) ;
247 state = DAT_LINE_VAL ;
248 } else if ( IsAKeywordChar(sc.ch) ) {
249 sc.SetState(SCE_ABAQUS_DEFAULT) ;
250 state = DAT_LINE_VAL ;
251 } else if ( (sc.ch == '\'') || (sc.ch == '\"') ) {
252 sc.SetState(SCE_ABAQUS_STRING) ;
253 state = DAT_LINE_VAL ;
254 } else {
255 sc.SetState(SCE_ABAQUS_PROCESSOR) ;
256 state = ST_ERROR ;
259 break ;
262 sc.Complete();
265 //------------------------------------------------------------------------------
266 // This copyied and modified from LexBasic.cxx
267 //------------------------------------------------------------------------------
269 /* Bits:
270 * 1 - whitespace
271 * 2 - operator
272 * 4 - identifier
273 * 8 - decimal digit
274 * 16 - hex digit
275 * 32 - bin digit
277 static int character_classification[128] =
279 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
280 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
281 1, 2, 0, 2, 2, 2, 2, 2, 2, 2, 6, 2, 2, 2, 10, 6,
282 60, 60, 28, 28, 28, 28, 28, 28, 28, 28, 2, 2, 2, 2, 2, 2,
283 2, 20, 20, 20, 20, 20, 20, 4, 4, 4, 4, 4, 4, 4, 4, 4,
284 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 4,
285 2, 20, 20, 20, 20, 20, 20, 4, 4, 4, 4, 4, 4, 4, 4, 4,
286 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 0
289 static bool IsSpace(int c) {
290 return c < 128 && (character_classification[c] & 1);
293 static bool IsIdentifier(int c) {
294 return c < 128 && (character_classification[c] & 4);
297 static int LowerCase(int c)
299 if (c >= 'A' && c <= 'Z')
300 return 'a' + c - 'A';
301 return c;
304 static int LineEnd(int line, Accessor &styler)
306 const int docLines = styler.GetLine(styler.Length() - 1); // Available last line
307 int eol_pos ;
308 // if the line is the last line, the eol_pos is styler.Length()
309 // eol will contain a new line, or a virtual new line
310 if ( docLines == line )
311 eol_pos = styler.Length() ;
312 else
313 eol_pos = styler.LineStart(line + 1) - 1;
314 return eol_pos ;
317 static int LineStart(int line, Accessor &styler)
319 return styler.LineStart(line) ;
322 // LineType
324 // bits determines the line type
325 // 1 : data line
326 // 2 : only whitespace
327 // 3 : data line with only whitespace
328 // 4 : keyword line
329 // 5 : block open keyword line
330 // 6 : block close keyword line
331 // 7 : keyword line in error
332 // 8 : comment line
333 static int LineType(int line, Accessor &styler) {
334 int pos = LineStart(line, styler) ;
335 int eol_pos = LineEnd(line, styler) ;
337 int c ;
338 char ch = ' ';
340 int i = pos ;
341 while ( i < eol_pos ) {
342 c = styler.SafeGetCharAt(i);
343 ch = static_cast<char>(LowerCase(c));
344 // We can say something as soon as no whitespace
345 // was encountered
346 if ( !IsSpace(c) )
347 break ;
348 i++ ;
351 if ( i >= eol_pos ) {
352 // This is a whitespace line, currently
353 // classifies as data line
354 return 3 ;
357 if ( ch != '*' ) {
358 // This is a data line
359 return 1 ;
362 if ( i == eol_pos - 1 ) {
363 // Only a single *, error but make keyword line
364 return 4+3 ;
367 // This means we can have a second character
368 // if that is also a * this means a comment
369 // otherwise it is a keyword.
370 c = styler.SafeGetCharAt(i+1);
371 ch = static_cast<char>(LowerCase(c));
372 if ( ch == '*' ) {
373 return 8 ;
376 // At this point we know this is a keyword line
377 // the character at position i is a *
378 // it is not a comment line
379 char word[256] ;
380 int wlen = 0;
382 word[wlen] = '*' ;
383 wlen++ ;
385 i++ ;
386 while ( (i < eol_pos) && (wlen < 255) ) {
387 c = styler.SafeGetCharAt(i);
388 ch = static_cast<char>(LowerCase(c));
390 if ( (!IsSpace(c)) && (!IsIdentifier(c)) )
391 break ;
393 if ( IsIdentifier(c) ) {
394 word[wlen] = ch ;
395 wlen++ ;
398 i++ ;
401 word[wlen] = 0 ;
403 // Make a comparison
404 if ( !strcmp(word, "*step") ||
405 !strcmp(word, "*part") ||
406 !strcmp(word, "*instance") ||
407 !strcmp(word, "*assembly")) {
408 return 4+1 ;
411 if ( !strcmp(word, "*endstep") ||
412 !strcmp(word, "*endpart") ||
413 !strcmp(word, "*endinstance") ||
414 !strcmp(word, "*endassembly")) {
415 return 4+2 ;
418 return 4 ;
421 static void SafeSetLevel(int line, int level, Accessor &styler)
423 if ( line < 0 )
424 return ;
426 int mask = ((~SC_FOLDLEVELHEADERFLAG) | (~SC_FOLDLEVELWHITEFLAG));
428 if ( (level & mask) < 0 )
429 return ;
431 if ( styler.LevelAt(line) != level )
432 styler.SetLevel(line, level) ;
435 static void FoldABAQUSDoc(unsigned int startPos, int length, int,
436 WordList *[], Accessor &styler) {
437 int startLine = styler.GetLine(startPos) ;
438 int endLine = styler.GetLine(startPos+length-1) ;
440 // bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
441 // We want to deal with all the cases
442 // To know the correct indentlevel, we need to look back to the
443 // previous command line indentation level
444 // order of formatting keyline datalines commentlines
445 int beginData = -1 ;
446 int beginComment = -1 ;
447 int prvKeyLine = startLine ;
448 int prvKeyLineTp = 0 ;
450 // Scan until we find the previous keyword line
451 // this will give us the level reference that we need
452 while ( prvKeyLine > 0 ) {
453 prvKeyLine-- ;
454 prvKeyLineTp = LineType(prvKeyLine, styler) ;
455 if ( prvKeyLineTp & 4 )
456 break ;
459 // Determine the base line level of all lines following
460 // the previous keyword
461 // new keyword lines are placed on this level
462 //if ( prvKeyLineTp & 4 ) {
463 int level = styler.LevelAt(prvKeyLine) & ~SC_FOLDLEVELHEADERFLAG ;
466 // uncomment line below if weird behaviour continues
467 prvKeyLine = -1 ;
469 // Now start scanning over the lines.
470 for ( int line = startLine; line <= endLine; line++ ) {
471 int lineType = LineType(line, styler) ;
473 // Check for comment line
474 if ( lineType == 8 ) {
475 if ( beginComment < 0 ) {
476 beginComment = line ;
480 // Check for data line
481 if ( (lineType == 1) || (lineType == 3) ) {
482 if ( beginData < 0 ) {
483 if ( beginComment >= 0 ) {
484 beginData = beginComment ;
485 } else {
486 beginData = line ;
489 beginComment = -1 ;
492 // Check for keywordline.
493 // As soon as a keyword line is encountered, we can set the
494 // levels of everything from the previous keyword line to this one
495 if ( lineType & 4 ) {
496 // this is a keyword, we can now place the previous keyword
497 // all its data lines and the remainder
499 // Write comments and data line
500 if ( beginComment < 0 ) {
501 beginComment = line ;
504 if ( beginData < 0 ) {
505 beginData = beginComment ;
506 if ( prvKeyLineTp != 5 )
507 SafeSetLevel(prvKeyLine, level, styler) ;
508 else
509 SafeSetLevel(prvKeyLine, level | SC_FOLDLEVELHEADERFLAG, styler) ;
510 } else {
511 SafeSetLevel(prvKeyLine, level | SC_FOLDLEVELHEADERFLAG, styler) ;
514 int datLevel = level + 1 ;
515 if ( !(prvKeyLineTp & 4) ) {
516 datLevel = level ;
519 for ( int ll = beginData; ll < beginComment; ll++ )
520 SafeSetLevel(ll, datLevel, styler) ;
522 // The keyword we just found is going to be written at another level
523 // if we have a type 5 and type 6
524 if ( prvKeyLineTp == 5 ) {
525 level += 1 ;
528 if ( prvKeyLineTp == 6 ) {
529 level -= 1 ;
530 if ( level < 0 ) {
531 level = 0 ;
535 for ( int lll = beginComment; lll < line; lll++ )
536 SafeSetLevel(lll, level, styler) ;
538 // wrap and reset
539 beginComment = -1 ;
540 beginData = -1 ;
541 prvKeyLine = line ;
542 prvKeyLineTp = lineType ;
547 if ( beginComment < 0 ) {
548 beginComment = endLine + 1 ;
549 } else {
550 // We need to find out whether this comment block is followed by
551 // a data line or a keyword line
552 const int docLines = styler.GetLine(styler.Length() - 1);
554 for ( int line = endLine + 1; line <= docLines; line++ ) {
555 int lineType = LineType(line, styler) ;
557 if ( lineType != 8 ) {
558 if ( !(lineType & 4) ) {
559 beginComment = endLine + 1 ;
561 break ;
566 if ( beginData < 0 ) {
567 beginData = beginComment ;
568 if ( prvKeyLineTp != 5 )
569 SafeSetLevel(prvKeyLine, level, styler) ;
570 else
571 SafeSetLevel(prvKeyLine, level | SC_FOLDLEVELHEADERFLAG, styler) ;
572 } else {
573 SafeSetLevel(prvKeyLine, level | SC_FOLDLEVELHEADERFLAG, styler) ;
576 int datLevel = level + 1 ;
577 if ( !(prvKeyLineTp & 4) ) {
578 datLevel = level ;
581 for ( int ll = beginData; ll < beginComment; ll++ )
582 SafeSetLevel(ll, datLevel, styler) ;
584 if ( prvKeyLineTp == 5 ) {
585 level += 1 ;
588 if ( prvKeyLineTp == 6 ) {
589 level -= 1 ;
591 for ( int m = beginComment; m <= endLine; m++ )
592 SafeSetLevel(m, level, styler) ;
595 static const char * const abaqusWordListDesc[] = {
596 "processors",
597 "commands",
598 "slashommands",
599 "starcommands",
600 "arguments",
601 "functions",
605 LexerModule lmAbaqus(SCLEX_ABAQUS, ColouriseABAQUSDoc, "abaqus", FoldABAQUSDoc, abaqusWordListDesc);