1 package com
.intellij
.lexer
;
3 import com
.intellij
.openapi
.diagnostic
.Logger
;
4 import com
.intellij
.pom
.java
.LanguageLevel
;
5 import com
.intellij
.psi
.JavaTokenType
;
6 import com
.intellij
.psi
.tree
.IElementType
;
7 import com
.intellij
.util
.text
.CharArrayCharSequence
;
8 import com
.intellij
.util
.text
.CharArrayUtil
;
10 import java
.io
.BufferedReader
;
11 import java
.io
.FileNotFoundException
;
12 import java
.io
.FileReader
;
13 import java
.io
.IOException
;
15 public class JavaLexer
extends LexerBase
{
17 private JavaLexer(boolean isAssertKeywordEnabled
, boolean isJDK15
) {
18 myTable
= isAssertKeywordEnabled ?
19 (isJDK15 ? ourTableWithAssertAndJDK15
: ourTableWithAssert
) :
20 (isJDK15 ? ourTableWithJDK15
: ourTableWithoutAssert
);
21 myFlexlexer
= new _JavaLexer(isAssertKeywordEnabled
, isJDK15
);
24 public JavaLexer(LanguageLevel level
) {
25 this(level
.hasAssertKeyword(), level
.hasEnumKeywordAndAutoboxing());
28 private CharSequence myBuffer
;
29 private char[] myBufferArray
;
30 private int myBufferIndex
;
31 private int myBufferEndOffset
;
33 IElementType myTokenType
;
34 private _JavaLexer myFlexlexer
;
36 //Positioned after the last symbol of the current token
37 private int myTokenEndOffset
;
39 private final static class HashTable
{
40 static final int NUM_ENTRIES
= 999;
41 private static final Logger LOG
= Logger
.getInstance("com.intellij.Lexer.JavaLexer");
43 final char[][] myTable
= new char[NUM_ENTRIES
][];
44 final IElementType
[] myKeywords
= new IElementType
[NUM_ENTRIES
];
46 void add(String s
, IElementType tokenType
) {
47 char[] chars
= s
.toCharArray();
48 int hashCode
= chars
[0] * 2;
49 for (int j
= 1; j
< chars
.length
; j
++) {
52 int modHashCode
= hashCode
% NUM_ENTRIES
;
53 LOG
.assertTrue(myTable
[modHashCode
] == null);
55 myTable
[modHashCode
] = chars
;
56 myKeywords
[modHashCode
] = tokenType
;
59 boolean contains(int hashCode
, final CharSequence buffer
, final char[] bufferArray
, int offset
) {
60 int modHashCode
= hashCode
% NUM_ENTRIES
;
61 final char[] kwd
= myTable
[modHashCode
];
62 if (kwd
== null) return false;
64 if (bufferArray
!= null) {
65 for (int j
= 0; j
< kwd
.length
; j
++) {
66 if (bufferArray
[j
+ offset
] != kwd
[j
]) return false;
69 for (int j
= 0; j
< kwd
.length
; j
++) {
70 if (buffer
.charAt(j
+ offset
) != kwd
[j
]) return false;
76 IElementType
getTokenType(int hashCode
) {
77 return myKeywords
[hashCode
% NUM_ENTRIES
];
80 @SuppressWarnings({"HardCodedStringLiteral"})
81 public HashTable(boolean isAssertKeywordEnabled
, boolean isJDK15
) {
82 if (isAssertKeywordEnabled
) {
83 add("assert", JavaTokenType
.ASSERT_KEYWORD
);
86 add("enum", JavaTokenType
.ENUM_KEYWORD
);
88 add("abstract", JavaTokenType
.ABSTRACT_KEYWORD
);
89 add("default", JavaTokenType
.DEFAULT_KEYWORD
);
90 add("if", JavaTokenType
.IF_KEYWORD
);
91 add("private", JavaTokenType
.PRIVATE_KEYWORD
);
92 add("this", JavaTokenType
.THIS_KEYWORD
);
93 add("boolean", JavaTokenType
.BOOLEAN_KEYWORD
);
94 add("do", JavaTokenType
.DO_KEYWORD
);
95 add("implements", JavaTokenType
.IMPLEMENTS_KEYWORD
);
96 add("protected", JavaTokenType
.PROTECTED_KEYWORD
);
97 add("throw", JavaTokenType
.THROW_KEYWORD
);
98 add("break", JavaTokenType
.BREAK_KEYWORD
);
99 add("double", JavaTokenType
.DOUBLE_KEYWORD
);
100 add("import", JavaTokenType
.IMPORT_KEYWORD
);
101 add("public", JavaTokenType
.PUBLIC_KEYWORD
);
102 add("throws", JavaTokenType
.THROWS_KEYWORD
);
103 add("byte", JavaTokenType
.BYTE_KEYWORD
);
104 add("else", JavaTokenType
.ELSE_KEYWORD
);
105 add("instanceof", JavaTokenType
.INSTANCEOF_KEYWORD
);
106 add("return", JavaTokenType
.RETURN_KEYWORD
);
107 add("transient", JavaTokenType
.TRANSIENT_KEYWORD
);
108 add("case", JavaTokenType
.CASE_KEYWORD
);
109 add("extends", JavaTokenType
.EXTENDS_KEYWORD
);
110 add("int", JavaTokenType
.INT_KEYWORD
);
111 add("short", JavaTokenType
.SHORT_KEYWORD
);
112 add("try", JavaTokenType
.TRY_KEYWORD
);
113 add("catch", JavaTokenType
.CATCH_KEYWORD
);
114 add("final", JavaTokenType
.FINAL_KEYWORD
);
115 add("interface", JavaTokenType
.INTERFACE_KEYWORD
);
116 add("static", JavaTokenType
.STATIC_KEYWORD
);
117 add("void", JavaTokenType
.VOID_KEYWORD
);
118 add("char", JavaTokenType
.CHAR_KEYWORD
);
119 add("finally", JavaTokenType
.FINALLY_KEYWORD
);
120 add("long", JavaTokenType
.LONG_KEYWORD
);
121 add("strictfp", JavaTokenType
.STRICTFP_KEYWORD
);
122 add("volatile", JavaTokenType
.VOLATILE_KEYWORD
);
123 add("class", JavaTokenType
.CLASS_KEYWORD
);
124 add("float", JavaTokenType
.FLOAT_KEYWORD
);
125 add("native", JavaTokenType
.NATIVE_KEYWORD
);
126 add("super", JavaTokenType
.SUPER_KEYWORD
);
127 add("while", JavaTokenType
.WHILE_KEYWORD
);
128 add("const", JavaTokenType
.CONST_KEYWORD
);
129 add("for", JavaTokenType
.FOR_KEYWORD
);
130 add("new", JavaTokenType
.NEW_KEYWORD
);
131 add("switch", JavaTokenType
.SWITCH_KEYWORD
);
132 add("continue", JavaTokenType
.CONTINUE_KEYWORD
);
133 add("goto", JavaTokenType
.GOTO_KEYWORD
);
134 add("package", JavaTokenType
.PACKAGE_KEYWORD
);
135 add("synchronized", JavaTokenType
.SYNCHRONIZED_KEYWORD
);
136 add("true", JavaTokenType
.TRUE_KEYWORD
);
137 add("false", JavaTokenType
.FALSE_KEYWORD
);
138 add("null", JavaTokenType
.NULL_KEYWORD
);
142 private final HashTable myTable
;
143 private final static HashTable ourTableWithoutAssert
= new HashTable(false, false);
144 private final static HashTable ourTableWithAssert
= new HashTable(true, false);
145 private final static HashTable ourTableWithAssertAndJDK15
= new HashTable(true, true);
146 private final static HashTable ourTableWithJDK15
= new HashTable(false, true);
148 public final void start(CharSequence buffer
, int startOffset
, int endOffset
, int initialState
) {
150 myBufferArray
= CharArrayUtil
.fromSequenceWithoutCopying(buffer
);
151 myBufferIndex
= startOffset
;
152 myBufferEndOffset
= endOffset
;
154 myTokenEndOffset
= startOffset
;
155 myFlexlexer
.reset(myBuffer
, startOffset
, endOffset
, 0);
158 public final void start(char[] buffer
, int startOffset
, int endOffset
, int initialState
) {
159 start(new CharArrayCharSequence(buffer
), startOffset
, endOffset
, initialState
);
162 public int getState() {
166 public final IElementType
getTokenType() {
172 public final int getTokenStart() {
173 return myBufferIndex
;
176 public final int getTokenEnd() {
178 return myTokenEndOffset
;
182 public final void advance() {
187 protected final void locateToken() {
188 if (myTokenType
!= null) return;
192 private void _locateToken() {
194 if (myTokenEndOffset
== myBufferEndOffset
) {
196 myBufferIndex
= myBufferEndOffset
;
200 myBufferIndex
= myTokenEndOffset
;
202 final char c
= myBufferArray
!= null ? myBufferArray
[myBufferIndex
]:myBuffer
.charAt(myBufferIndex
);
213 myTokenType
= JavaTokenType
.WHITE_SPACE
;
214 myTokenEndOffset
= getWhitespaces(myBufferIndex
+ 1);
218 if (myBufferIndex
+ 1 >= myBufferEndOffset
) {
219 myTokenType
= JavaTokenType
.DIV
;
220 myTokenEndOffset
= myBufferEndOffset
;
223 final char nextChar
= myBufferArray
!= null ? myBufferArray
[myBufferIndex
+ 1]:myBuffer
.charAt(myBufferIndex
+ 1);
225 if (nextChar
== '/') {
226 myTokenType
= JavaTokenType
.END_OF_LINE_COMMENT
;
227 myTokenEndOffset
= getLineTerminator(myBufferIndex
+ 2);
229 else if (nextChar
== '*') {
230 if (myBufferIndex
+ 2 >= myBufferEndOffset
||
231 (myBufferArray
!= null ? myBufferArray
[myBufferIndex
+ 2]:myBuffer
.charAt(myBufferIndex
+ 2)) != '*') {
232 myTokenType
= JavaTokenType
.C_STYLE_COMMENT
;
233 myTokenEndOffset
= getClosingComment(myBufferIndex
+ 2);
236 myTokenType
= JavaTokenType
.DOC_COMMENT
;
237 myTokenEndOffset
= getDocClosingComment(myBufferIndex
+ 3);
240 else if ((c
> 127) && Character
.isJavaIdentifierStart(c
)) {
241 myTokenEndOffset
= getIdentifier(myBufferIndex
+ 1);
252 myTokenType
= c
== '"' ? JavaTokenType
.STRING_LITERAL
: JavaTokenType
.CHARACTER_LITERAL
;
253 myTokenEndOffset
= getClosingParenthesys(myBufferIndex
+ 1, c
);
256 if (myTokenEndOffset
> myBufferEndOffset
) {
257 myTokenEndOffset
= myBufferEndOffset
;
261 private int getWhitespaces(int pos
) {
262 if (pos
>= myBufferEndOffset
) return myBufferEndOffset
;
263 final CharSequence lBuffer
= myBuffer
;
264 final char[] lBufferArray
= myBufferArray
;
265 final boolean hasArray
= lBufferArray
!= null;
267 char c
= hasArray ? lBufferArray
[pos
] : lBuffer
.charAt(pos
);
269 while (c
== ' ' || c
== '\t' || c
== '\n' || c
== '\r' || c
== '\f') {
271 if (pos
== myBufferEndOffset
) return pos
;
272 c
= hasArray ? lBufferArray
[pos
] : lBuffer
.charAt(pos
);
278 private void flexLocateToken() {
280 myFlexlexer
.goTo(myBufferIndex
);
281 myTokenType
= myFlexlexer
.advance();
282 myTokenEndOffset
= myFlexlexer
.getTokenEnd();
284 catch (IOException e
) {
290 private int getClosingParenthesys(int offset
, char c
) {
292 final int lBufferEnd
= myBufferEndOffset
;
293 if (pos
>= lBufferEnd
) return lBufferEnd
;
295 final CharSequence lBuffer
= myBuffer
;
296 final char[] lBufferArray
= myBufferArray
;
297 final boolean hasArray
= lBufferArray
!= null;
298 char cur
= hasArray ? lBufferArray
[pos
]:lBuffer
.charAt(pos
);
301 while (cur
!= c
&& cur
!= '\n' && cur
!= '\r' && cur
!= '\\') {
303 if (pos
>= lBufferEnd
) return lBufferEnd
;
304 cur
= hasArray ? lBufferArray
[pos
]:lBuffer
.charAt(pos
);
309 if (pos
>= lBufferEnd
) return lBufferEnd
;
310 cur
= hasArray ? lBufferArray
[pos
]:lBuffer
.charAt(pos
);
311 if (cur
== '\n' || cur
== '\r') continue;
313 if (pos
>= lBufferEnd
) return lBufferEnd
;
314 cur
= hasArray ? lBufferArray
[pos
]:lBuffer
.charAt(pos
);
315 } else if (cur
== c
) {
326 private int getDocClosingComment(int offset
) {
327 final int lBufferEnd
= myBufferEndOffset
;
328 final CharSequence lBuffer
= myBuffer
;
329 final char[] lBufferArray
= myBufferArray
;
330 final boolean hasArray
= lBufferArray
!= null;
332 if (offset
< lBufferEnd
&&
333 (hasArray ? lBufferArray
[offset
]:lBuffer
.charAt(offset
)) == '/') {
338 while (pos
< lBufferEnd
- 1) {
339 final char c
= hasArray ? lBufferArray
[pos
]:lBuffer
.charAt(pos
);
342 (hasArray ? lBufferArray
[pos
+ 1]:lBuffer
.charAt(pos
+ 1)) == '/'
351 private int getClosingComment(int offset
) {
354 final int lBufferEnd
= myBufferEndOffset
;
355 final CharSequence lBuffer
= myBuffer
;
356 final char[] lBufferArray
= myBufferArray
;
357 final boolean hasArray
= lBufferArray
!= null;
359 while (pos
< lBufferEnd
- 1) {
360 final char c
= hasArray ? lBufferArray
[pos
]:lBuffer
.charAt(pos
);
363 (hasArray ? lBufferArray
[pos
+ 1]:lBuffer
.charAt(pos
+ 1)) == '/'
373 private int getLineTerminator(int offset
) {
375 final int lBufferEnd
= myBufferEndOffset
;
376 final CharSequence lBuffer
= myBuffer
;
377 final char[] lBufferArray
= myBufferArray
;
378 final boolean hasArray
= lBufferArray
!= null;
380 while (pos
< lBufferEnd
) {
381 final char c
= hasArray ? lBufferArray
[pos
]:lBuffer
.charAt(pos
);
382 if (c
== '\r' || c
== '\n') break;
389 private int getIdentifier(int offset
) {
390 final CharSequence lBuffer
= myBuffer
;
391 final char[] lBufferArray
= myBufferArray
;
392 final boolean hasArray
= lBufferArray
!= null;
394 int hashCode
= (hasArray ? lBufferArray
[offset
- 1]:lBuffer
.charAt(offset
- 1)) * 2;
395 final int lBufferEnd
= myBufferEndOffset
;
398 if (pos
< lBufferEnd
) {
399 char c
= hasArray ? lBufferArray
[pos
]:lBuffer
.charAt(pos
);
401 while ((c
>= 'a' && c
<= 'z') || (c
>= 'A' && c
<= 'Z') || (c
>= '0' && c
<= '9')
402 || c
== '_' || c
== '$' || ((c
> 127) && Character
.isJavaIdentifierPart(c
))) {
406 if (pos
== lBufferEnd
) break;
407 c
= hasArray ? lBufferArray
[pos
]:lBuffer
.charAt(pos
);
411 if (myTable
.contains(hashCode
, lBuffer
, lBufferArray
, offset
- 1)) {
412 myTokenType
= myTable
.getTokenType(hashCode
);
414 myTokenType
= JavaTokenType
.IDENTIFIER
;
420 public final char[] getBuffer() {
421 return myBufferArray
!= null ? myBufferArray
: CharArrayUtil
.fromSequence(myBuffer
);
424 public CharSequence
getBufferSequence() {
428 public final int getBufferEnd() {
429 return myBufferEndOffset
;
432 public static void main(String
[] args
) {
435 BufferedReader reader
= new BufferedReader(new FileReader(args
[0]));
437 StringBuffer buf
= new StringBuffer();
438 while ((s
= reader
.readLine()) != null) {
439 buf
.append(s
).append("\n");
442 char[] cbuf
= buf
.toString().toCharArray();
444 JavaLexer lexer
= new JavaLexer(LanguageLevel
.JDK_1_5
);
445 lexer
.start(cbuf
, 0, cbuf
.length
);
446 while (lexer
.getTokenType() != null) {
449 } catch (FileNotFoundException e
) {
450 e
.printStackTrace(); //To change body of catch statement use Options | File Templates.
451 } catch (IOException e
) {
452 e
.printStackTrace(); //To change body of catch statement use Options | File Templates.