Fixed issue #2175: TortoiseGitBlame fails to search if line has non-ascii chars and...
[TortoiseGit.git] / ext / scintilla / lexers / LexModula.cxx
blob3fa1bd98c41b4b062962b97df1dc82feeadc4b14
1 // -*- coding: utf-8 -*-
2 // Scintilla source code edit control
3 /**
4 * @file LexModula.cxx
5 * @author Dariusz "DKnoto" KnociĊ„ski
6 * @date 2011/02/03
7 * @brief Lexer for Modula-2/3 documents.
8 */
9 // The License.txt file describes the conditions under which this software may
10 // be distributed.
12 #include <stdlib.h>
13 #include <string.h>
14 #include <stdio.h>
15 #include <stdarg.h>
16 #include <assert.h>
17 #include <ctype.h>
19 #include "ILexer.h"
20 #include "Scintilla.h"
21 #include "SciLexer.h"
23 #include "PropSetSimple.h"
24 #include "WordList.h"
25 #include "LexAccessor.h"
26 #include "Accessor.h"
27 #include "StyleContext.h"
28 #include "CharacterSet.h"
29 #include "LexerModule.h"
31 #ifdef SCI_NAMESPACE
32 using namespace Scintilla;
33 #endif
35 #ifdef DEBUG_LEX_MODULA
36 #define DEBUG_STATE( p, c )\
37 fprintf( stderr, "Unknown state: currentPos = %ud, char = '%c'\n", p, c );
38 #else
39 #define DEBUG_STATE( p, c )
40 #endif
42 static inline bool IsDigitOfBase( unsigned ch, unsigned base ) {
43 if( ch < '0' || ch > 'f' ) return false;
44 if( base <= 10 ) {
45 if( ch >= ( '0' + base ) ) return false;
46 } else {
47 if( ch > '9' ) {
48 unsigned nb = base - 10;
49 if( ( ch < 'A' ) || ( ch >= ( 'A' + nb ) ) ) {
50 if( ( ch < 'a' ) || ( ch >= ( 'a' + nb ) ) ) {
51 return false;
56 return true;
59 static inline unsigned IsOperator( StyleContext & sc, WordList & op ) {
60 int i;
61 char s[3];
63 s[0] = sc.ch;
64 s[1] = sc.chNext;
65 s[2] = 0;
66 for( i = 0; i < op.Length(); i++ ) {
67 if( ( strlen( op.WordAt(i) ) == 2 ) &&
68 ( s[0] == op.WordAt(i)[0] && s[1] == op.WordAt(i)[1] ) ) {
69 return 2;
72 s[1] = 0;
73 for( i = 0; i < op.Length(); i++ ) {
74 if( ( strlen( op.WordAt(i) ) == 1 ) &&
75 ( s[0] == op.WordAt(i)[0] ) ) {
76 return 1;
79 return 0;
82 static inline bool IsEOL( Accessor &styler, unsigned curPos ) {
83 unsigned ch = styler.SafeGetCharAt( curPos );
84 if( ( ch == '\r' && styler.SafeGetCharAt( curPos + 1 ) == '\n' ) ||
85 ( ch == '\n' ) ) {
86 return true;
88 return false;
91 static inline bool checkStatement(
92 Accessor &styler,
93 int &curPos,
94 const char *stt, bool spaceAfter = true ) {
95 int len = static_cast<int>(strlen( stt ));
96 int i;
97 for( i = 0; i < len; i++ ) {
98 if( styler.SafeGetCharAt( curPos + i ) != stt[i] ) {
99 return false;
102 if( spaceAfter ) {
103 if( ! isspace( styler.SafeGetCharAt( curPos + i ) ) ) {
104 return false;
107 curPos += ( len - 1 );
108 return true;
111 static inline bool checkEndSemicolon(
112 Accessor &styler,
113 int &curPos, int endPos )
115 const char *stt = "END";
116 int len = static_cast<int>(strlen( stt ));
117 int i;
118 for( i = 0; i < len; i++ ) {
119 if( styler.SafeGetCharAt( curPos + i ) != stt[i] ) {
120 return false;
123 while( isspace( styler.SafeGetCharAt( curPos + i ) ) ) {
124 i++;
125 if( ( curPos + i ) >= endPos ) return false;
127 if( styler.SafeGetCharAt( curPos + i ) != ';' ) {
128 return false;
130 curPos += ( i - 1 );
131 return true;
134 static inline bool checkKeyIdentOper(
136 Accessor &styler,
137 int &curPos, int endPos,
138 const char *stt, const char etk ) {
139 int newPos = curPos;
140 if( ! checkStatement( styler, newPos, stt ) )
141 return false;
142 newPos++;
143 if( newPos >= endPos )
144 return false;
145 if( ! isspace( styler.SafeGetCharAt( newPos ) ) )
146 return false;
147 newPos++;
148 if( newPos >= endPos )
149 return false;
150 while( isspace( styler.SafeGetCharAt( newPos ) ) ) {
151 newPos++;
152 if( newPos >= endPos )
153 return false;
155 if( ! isalpha( styler.SafeGetCharAt( newPos ) ) )
156 return false;
157 newPos++;
158 if( newPos >= endPos )
159 return false;
160 char ch;
161 ch = styler.SafeGetCharAt( newPos );
162 while( isalpha( ch ) || isdigit( ch ) || ch == '_' ) {
163 newPos++;
164 if( newPos >= endPos ) return false;
165 ch = styler.SafeGetCharAt( newPos );
167 while( isspace( styler.SafeGetCharAt( newPos ) ) ) {
168 newPos++;
169 if( newPos >= endPos ) return false;
171 if( styler.SafeGetCharAt( newPos ) != etk )
172 return false;
173 curPos = newPos;
174 return true;
177 static void FoldModulaDoc( unsigned int startPos,
178 int length,
179 int , WordList *[],
180 Accessor &styler)
182 int curLine = styler.GetLine(startPos);
183 int curLevel = SC_FOLDLEVELBASE;
184 int endPos = startPos + length;
185 if( curLine > 0 )
186 curLevel = styler.LevelAt( curLine - 1 ) >> 16;
187 int curPos = startPos;
188 int style = styler.StyleAt( curPos );
189 int visChars = 0;
190 int nextLevel = curLevel;
192 while( curPos < endPos ) {
193 if( ! isspace( styler.SafeGetCharAt( curPos ) ) ) visChars++;
195 switch( style ) {
196 case SCE_MODULA_COMMENT:
197 if( checkStatement( styler, curPos, "(*" ) )
198 nextLevel++;
199 else
200 if( checkStatement( styler, curPos, "*)" ) )
201 nextLevel--;
202 break;
204 case SCE_MODULA_DOXYCOMM:
205 if( checkStatement( styler, curPos, "(**", false ) )
206 nextLevel++;
207 else
208 if( checkStatement( styler, curPos, "*)" ) )
209 nextLevel--;
210 break;
212 case SCE_MODULA_KEYWORD:
213 if( checkStatement( styler, curPos, "IF" ) )
214 nextLevel++;
215 else
216 if( checkStatement( styler, curPos, "BEGIN" ) )
217 nextLevel++;
218 else
219 if( checkStatement( styler, curPos, "TRY" ) )
220 nextLevel++;
221 else
222 if( checkStatement( styler, curPos, "LOOP" ) )
223 nextLevel++;
224 else
225 if( checkStatement( styler, curPos, "FOR" ) )
226 nextLevel++;
227 else
228 if( checkStatement( styler, curPos, "WHILE" ) )
229 nextLevel++;
230 else
231 if( checkStatement( styler, curPos, "REPEAT" ) )
232 nextLevel++;
233 else
234 if( checkStatement( styler, curPos, "UNTIL" ) )
235 nextLevel--;
236 else
237 if( checkStatement( styler, curPos, "WITH" ) )
238 nextLevel++;
239 else
240 if( checkStatement( styler, curPos, "CASE" ) )
241 nextLevel++;
242 else
243 if( checkStatement( styler, curPos, "TYPECASE" ) )
244 nextLevel++;
245 else
246 if( checkStatement( styler, curPos, "LOCK" ) )
247 nextLevel++;
248 else
249 if( checkKeyIdentOper( styler, curPos, endPos, "PROCEDURE", '(' ) )
250 nextLevel++;
251 else
252 if( checkKeyIdentOper( styler, curPos, endPos, "END", ';' ) ) {
253 int cln = curLine;
254 int clv_old = curLevel;
255 int pos;
256 char ch;
257 int clv_new;
258 while( cln > 0 ) {
259 clv_new = styler.LevelAt( cln - 1 ) >> 16;
260 if( clv_new < clv_old ) {
261 nextLevel--;
262 pos = styler.LineStart( cln );
263 while( ( ch = styler.SafeGetCharAt( pos ) ) != '\n' ) {
264 if( ch == 'P' ) {
265 if( styler.StyleAt(pos) == SCE_MODULA_KEYWORD ) {
266 if( checkKeyIdentOper( styler, pos, endPos,
267 "PROCEDURE", '(' ) ) {
268 break;
272 pos++;
274 clv_old = clv_new;
276 cln--;
279 else
280 if( checkKeyIdentOper( styler, curPos, endPos, "END", '.' ) )
281 nextLevel--;
282 else
283 if( checkEndSemicolon( styler, curPos, endPos ) )
284 nextLevel--;
285 else {
286 while( styler.StyleAt( curPos + 1 ) == SCE_MODULA_KEYWORD )
287 curPos++;
289 break;
291 default:
292 break;
295 if( IsEOL( styler, curPos ) || ( curPos == endPos - 1 ) ) {
296 int efectiveLevel = curLevel | nextLevel << 16;
297 if( visChars == 0 )
298 efectiveLevel |= SC_FOLDLEVELWHITEFLAG;
299 if( curLevel < nextLevel )
300 efectiveLevel |= SC_FOLDLEVELHEADERFLAG;
301 if( efectiveLevel != styler.LevelAt(curLine) ) {
302 styler.SetLevel(curLine, efectiveLevel );
304 curLine++;
305 curLevel = nextLevel;
306 if( IsEOL( styler, curPos ) && ( curPos == endPos - 1 ) ) {
307 styler.SetLevel( curLine, ( curLevel | curLevel << 16)
308 | SC_FOLDLEVELWHITEFLAG);
310 visChars = 0;
312 curPos++;
313 style = styler.StyleAt( curPos );
317 static inline bool skipWhiteSpaces( StyleContext & sc ) {
318 while( isspace( sc.ch ) ) {
319 sc.SetState( SCE_MODULA_DEFAULT );
320 if( sc.More() )
321 sc.Forward();
322 else
323 return false;
325 return true;
328 static void ColouriseModulaDoc( unsigned int startPos,
329 int length,
330 int initStyle,
331 WordList *wl[],
332 Accessor &styler ) {
333 WordList& keyWords = *wl[0];
334 WordList& reservedWords = *wl[1];
335 WordList& operators = *wl[2];
336 WordList& pragmaWords = *wl[3];
337 WordList& escapeCodes = *wl[4];
338 WordList& doxyKeys = *wl[5];
340 const int BUFLEN = 128;
342 char buf[BUFLEN];
343 int i, kl;
345 int charPos = 0;
347 StyleContext sc( startPos, length, initStyle, styler );
349 while( sc.More() ) {
350 switch( sc.state ) {
351 case SCE_MODULA_DEFAULT:
352 if( ! skipWhiteSpaces( sc ) ) break;
354 if( sc.ch == '(' && sc.chNext == '*' ) {
355 if( sc.GetRelative(2) == '*' ) {
356 sc.SetState( SCE_MODULA_DOXYCOMM );
357 sc.Forward();
358 } else {
359 sc.SetState( SCE_MODULA_COMMENT );
361 sc.Forward();
363 else
364 if( isalpha( sc.ch ) ) {
365 if( isupper( sc.ch ) && isupper( sc.chNext ) ) {
366 for( i = 0; i < BUFLEN - 1; i++ ) {
367 buf[i] = sc.GetRelative(i);
368 if( !isalpha( buf[i] ) && !(buf[i] == '_') )
369 break;
371 kl = i;
372 buf[kl] = 0;
374 if( keyWords.InList( buf ) ) {
375 sc.SetState( SCE_MODULA_KEYWORD );
376 sc.Forward( kl );
377 sc.SetState( SCE_MODULA_DEFAULT );
378 continue;
380 else
381 if( reservedWords.InList( buf ) ) {
382 sc.SetState( SCE_MODULA_RESERVED );
383 sc.Forward( kl );
384 sc.SetState( SCE_MODULA_DEFAULT );
385 continue;
386 } else {
387 /** check procedure identifier */
389 } else {
390 for( i = 0; i < BUFLEN - 1; i++ ) {
391 buf[i] = sc.GetRelative(i);
392 if( !isalpha( buf[i] ) &&
393 !isdigit( buf[i] ) &&
394 !(buf[i] == '_') )
395 break;
397 kl = i;
398 buf[kl] = 0;
400 sc.SetState( SCE_MODULA_DEFAULT );
401 sc.Forward( kl );
402 continue;
405 else
406 if( isdigit( sc.ch ) ) {
407 sc.SetState( SCE_MODULA_NUMBER );
408 continue;
410 else
411 if( sc.ch == '\"' ) {
412 sc.SetState( SCE_MODULA_STRING );
414 else
415 if( sc.ch == '\'' ) {
416 charPos = sc.currentPos;
417 sc.SetState( SCE_MODULA_CHAR );
419 else
420 if( sc.ch == '<' && sc.chNext == '*' ) {
421 sc.SetState( SCE_MODULA_PRAGMA );
422 sc.Forward();
423 } else {
424 unsigned len = IsOperator( sc, operators );
425 if( len > 0 ) {
426 sc.SetState( SCE_MODULA_OPERATOR );
427 sc.Forward( len );
428 sc.SetState( SCE_MODULA_DEFAULT );
429 continue;
430 } else {
431 DEBUG_STATE( sc.currentPos, sc.ch );
434 break;
436 case SCE_MODULA_COMMENT:
437 if( sc.ch == '*' && sc.chNext == ')' ) {
438 sc.Forward( 2 );
439 sc.SetState( SCE_MODULA_DEFAULT );
440 continue;
442 break;
444 case SCE_MODULA_DOXYCOMM:
445 switch( sc.ch ) {
446 case '*':
447 if( sc.chNext == ')' ) {
448 sc.Forward( 2 );
449 sc.SetState( SCE_MODULA_DEFAULT );
450 continue;
452 break;
454 case '@':
455 if( islower( sc.chNext ) ) {
456 for( i = 0; i < BUFLEN - 1; i++ ) {
457 buf[i] = sc.GetRelative(i+1);
458 if( isspace( buf[i] ) ) break;
460 buf[i] = 0;
461 kl = i;
463 if( doxyKeys.InList( buf ) ) {
464 sc.SetState( SCE_MODULA_DOXYKEY );
465 sc.Forward( kl + 1 );
466 sc.SetState( SCE_MODULA_DOXYCOMM );
469 break;
471 default:
472 break;
474 break;
476 case SCE_MODULA_NUMBER:
478 buf[0] = sc.ch;
479 for( i = 1; i < BUFLEN - 1; i++ ) {
480 buf[i] = sc.GetRelative(i);
481 if( ! isdigit( buf[i] ) )
482 break;
484 kl = i;
485 buf[kl] = 0;
487 switch( sc.GetRelative(kl) ) {
488 case '_':
490 int base = atoi( buf );
491 if( base < 2 || base > 16 ) {
492 sc.SetState( SCE_MODULA_BADSTR );
493 } else {
494 int imax;
496 kl++;
497 for( i = 0; i < BUFLEN - 1; i++ ) {
498 buf[i] = sc.GetRelative(kl+i);
499 if( ! IsDigitOfBase( buf[i], 16 ) ) {
500 break;
503 imax = i;
504 for( i = 0; i < imax; i++ ) {
505 if( ! IsDigitOfBase( buf[i], base ) ) {
506 sc.SetState( SCE_MODULA_BADSTR );
507 break;
510 kl += imax;
512 sc.SetState( SCE_MODULA_BASENUM );
513 for( i = 0; i < kl; i++ ) {
514 sc.Forward();
516 sc.SetState( SCE_MODULA_DEFAULT );
517 continue;
519 break;
521 case '.':
522 if( sc.GetRelative(kl+1) == '.' ) {
523 kl--;
524 for( i = 0; i < kl; i++ ) {
525 sc.Forward();
527 sc.Forward();
528 sc.SetState( SCE_MODULA_DEFAULT );
529 continue;
530 } else {
531 bool doNext = false;
533 kl++;
535 buf[0] = sc.GetRelative(kl);
536 if( isdigit( buf[0] ) ) {
537 for( i = 0;; i++ ) {
538 if( !isdigit(sc.GetRelative(kl+i)) )
539 break;
541 kl += i;
542 buf[0] = sc.GetRelative(kl);
544 switch( buf[0] )
546 case 'E':
547 case 'e':
548 case 'D':
549 case 'd':
550 case 'X':
551 case 'x':
552 kl++;
553 buf[0] = sc.GetRelative(kl);
554 if( buf[0] == '-' || buf[0] == '+' ) {
555 kl++;
557 buf[0] = sc.GetRelative(kl);
558 if( isdigit( buf[0] ) ) {
559 for( i = 0;; i++ ) {
560 if( !isdigit(sc.GetRelative(kl+i)) ) {
561 buf[0] = sc.GetRelative(kl+i);
562 break;
565 kl += i;
566 doNext = true;
567 } else {
568 sc.SetState( SCE_MODULA_BADSTR );
570 break;
572 default:
573 doNext = true;
574 break;
576 } else {
577 sc.SetState( SCE_MODULA_BADSTR );
580 if( doNext ) {
581 if( ! isspace( buf[0] ) &&
582 buf[0] != ')' &&
583 buf[0] != '>' &&
584 buf[0] != '<' &&
585 buf[0] != '=' &&
586 buf[0] != '#' &&
587 buf[0] != '+' &&
588 buf[0] != '-' &&
589 buf[0] != '*' &&
590 buf[0] != '/' &&
591 buf[0] != ',' &&
592 buf[0] != ';'
594 sc.SetState( SCE_MODULA_BADSTR );
595 } else {
596 kl--;
600 sc.SetState( SCE_MODULA_FLOAT );
601 for( i = 0; i < kl; i++ ) {
602 sc.Forward();
604 sc.SetState( SCE_MODULA_DEFAULT );
605 continue;
606 break;
608 default:
609 for( i = 0; i < kl; i++ ) {
610 sc.Forward();
612 break;
614 sc.SetState( SCE_MODULA_DEFAULT );
615 continue;
617 break;
619 case SCE_MODULA_STRING:
620 if( sc.ch == '\"' ) {
621 sc.Forward();
622 sc.SetState( SCE_MODULA_DEFAULT );
623 continue;
624 } else {
625 if( sc.ch == '\\' ) {
626 i = 1;
627 if( IsDigitOfBase( sc.chNext, 8 ) ) {
628 for( i = 1; i < BUFLEN - 1; i++ ) {
629 if( ! IsDigitOfBase(sc.GetRelative(i+1), 8 ) )
630 break;
632 if( i == 3 ) {
633 sc.SetState( SCE_MODULA_STRSPEC );
634 } else {
635 sc.SetState( SCE_MODULA_BADSTR );
637 } else {
638 buf[0] = sc.chNext;
639 buf[1] = 0;
641 if( escapeCodes.InList( buf ) ) {
642 sc.SetState( SCE_MODULA_STRSPEC );
643 } else {
644 sc.SetState( SCE_MODULA_BADSTR );
647 sc.Forward(i+1);
648 sc.SetState( SCE_MODULA_STRING );
649 continue;
652 break;
654 case SCE_MODULA_CHAR:
655 if( sc.ch == '\'' ) {
656 sc.Forward();
657 sc.SetState( SCE_MODULA_DEFAULT );
658 continue;
660 else
661 if( ( sc.currentPos - charPos ) == 1 ) {
662 if( sc.ch == '\\' ) {
663 i = 1;
664 if( IsDigitOfBase( sc.chNext, 8 ) ) {
665 for( i = 1; i < BUFLEN - 1; i++ ) {
666 if( ! IsDigitOfBase(sc.GetRelative(i+1), 8 ) )
667 break;
669 if( i == 3 ) {
670 sc.SetState( SCE_MODULA_CHARSPEC );
671 } else {
672 sc.SetState( SCE_MODULA_BADSTR );
674 } else {
675 buf[0] = sc.chNext;
676 buf[1] = 0;
678 if( escapeCodes.InList( buf ) ) {
679 sc.SetState( SCE_MODULA_CHARSPEC );
680 } else {
681 sc.SetState( SCE_MODULA_BADSTR );
684 sc.Forward(i+1);
685 sc.SetState( SCE_MODULA_CHAR );
686 continue;
688 } else {
689 sc.SetState( SCE_MODULA_BADSTR );
690 sc.Forward();
691 sc.SetState( SCE_MODULA_CHAR );
692 continue;
694 break;
696 case SCE_MODULA_PRAGMA:
697 if( sc.ch == '*' && sc.chNext == '>' ) {
698 sc.Forward();
699 sc.Forward();
700 sc.SetState( SCE_MODULA_DEFAULT );
701 continue;
703 else
704 if( isupper( sc.ch ) && isupper( sc.chNext ) ) {
705 buf[0] = sc.ch;
706 buf[1] = sc.chNext;
707 for( i = 2; i < BUFLEN - 1; i++ ) {
708 buf[i] = sc.GetRelative(i);
709 if( !isupper( buf[i] ) )
710 break;
712 kl = i;
713 buf[kl] = 0;
714 if( pragmaWords.InList( buf ) ) {
715 sc.SetState( SCE_MODULA_PRGKEY );
716 sc.Forward( kl );
717 sc.SetState( SCE_MODULA_PRAGMA );
718 continue;
721 break;
723 default:
724 break;
726 sc.Forward();
728 sc.Complete();
731 static const char *const modulaWordListDesc[] =
733 "Keywords",
734 "ReservedKeywords",
735 "Operators",
736 "PragmaKeyswords",
737 "EscapeCodes",
738 "DoxygeneKeywords",
742 LexerModule lmModula( SCLEX_MODULA, ColouriseModulaDoc, "modula", FoldModulaDoc,
743 modulaWordListDesc);