2 * Copyright 2000-2009 JetBrains s.r.o.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 package com
.intellij
.lexer
;
18 import com
.intellij
.openapi
.diagnostic
.Logger
;
19 import com
.intellij
.pom
.java
.LanguageLevel
;
20 import com
.intellij
.psi
.JavaTokenType
;
21 import com
.intellij
.psi
.TokenType
;
22 import com
.intellij
.psi
.tree
.IElementType
;
24 import java
.io
.BufferedReader
;
26 import java
.io
.FileReader
;
27 import java
.io
.IOException
;
29 public class JavaLexer
extends LexerBase
{
30 private JavaLexer(boolean isAssertKeywordEnabled
, boolean isJDK15
) {
31 myTable
= isAssertKeywordEnabled
32 ? isJDK15 ? ourTableWithAssertAndJDK15
: ourTableWithAssert
33 : isJDK15 ? ourTableWithJDK15
: ourTableWithoutAssert
;
34 myFlexlexer
= new _JavaLexer(isAssertKeywordEnabled
, isJDK15
);
37 public JavaLexer(LanguageLevel level
) {
38 this(level
.hasAssertKeyword(), level
.hasEnumKeywordAndAutoboxing());
41 private CharSequence myBuffer
;
42 private int myBufferIndex
;
43 private int myBufferEndOffset
;
45 private IElementType myTokenType
;
46 private _JavaLexer myFlexlexer
;
48 //Positioned after the last symbol of the current token
49 private int myTokenEndOffset
;
51 private static final class HashTable
{
52 private static final int NUM_ENTRIES
= 999;
53 private static final Logger LOG
= Logger
.getInstance("com.intellij.Lexer.JavaLexer");
55 private final char[][] myTable
= new char[NUM_ENTRIES
][];
56 private final IElementType
[] myKeywords
= new IElementType
[NUM_ENTRIES
];
58 private void add(String s
, IElementType tokenType
) {
59 char[] chars
= s
.toCharArray();
60 int hashCode
= chars
[0] * 2;
61 for (int j
= 1; j
< chars
.length
; j
++) {
64 int modHashCode
= hashCode
% NUM_ENTRIES
;
65 LOG
.assertTrue(myTable
[modHashCode
] == null);
67 myTable
[modHashCode
] = chars
;
68 myKeywords
[modHashCode
] = tokenType
;
71 private boolean contains(int hashCode
, final CharSequence buffer
, int offset
) {
72 int modHashCode
= hashCode
% NUM_ENTRIES
;
73 final char[] kwd
= myTable
[modHashCode
];
74 if (kwd
== null) return false;
76 for (int j
= 0; j
< kwd
.length
; j
++) {
77 if (buffer
.charAt(j
+ offset
) != kwd
[j
]) return false;
82 private IElementType
getTokenType(int hashCode
) {
83 return myKeywords
[hashCode
% NUM_ENTRIES
];
86 @SuppressWarnings({"HardCodedStringLiteral"})
87 private HashTable(boolean isAssertKeywordEnabled
, boolean isJDK15
) {
88 if (isAssertKeywordEnabled
) {
89 add("assert", JavaTokenType
.ASSERT_KEYWORD
);
92 add("enum", JavaTokenType
.ENUM_KEYWORD
);
94 add("abstract", JavaTokenType
.ABSTRACT_KEYWORD
);
95 add("default", JavaTokenType
.DEFAULT_KEYWORD
);
96 add("if", JavaTokenType
.IF_KEYWORD
);
97 add("private", JavaTokenType
.PRIVATE_KEYWORD
);
98 add("this", JavaTokenType
.THIS_KEYWORD
);
99 add("boolean", JavaTokenType
.BOOLEAN_KEYWORD
);
100 add("do", JavaTokenType
.DO_KEYWORD
);
101 add("implements", JavaTokenType
.IMPLEMENTS_KEYWORD
);
102 add("protected", JavaTokenType
.PROTECTED_KEYWORD
);
103 add("throw", JavaTokenType
.THROW_KEYWORD
);
104 add("break", JavaTokenType
.BREAK_KEYWORD
);
105 add("double", JavaTokenType
.DOUBLE_KEYWORD
);
106 add("import", JavaTokenType
.IMPORT_KEYWORD
);
107 add("public", JavaTokenType
.PUBLIC_KEYWORD
);
108 add("throws", JavaTokenType
.THROWS_KEYWORD
);
109 add("byte", JavaTokenType
.BYTE_KEYWORD
);
110 add("else", JavaTokenType
.ELSE_KEYWORD
);
111 add("instanceof", JavaTokenType
.INSTANCEOF_KEYWORD
);
112 add("return", JavaTokenType
.RETURN_KEYWORD
);
113 add("transient", JavaTokenType
.TRANSIENT_KEYWORD
);
114 add("case", JavaTokenType
.CASE_KEYWORD
);
115 add("extends", JavaTokenType
.EXTENDS_KEYWORD
);
116 add("int", JavaTokenType
.INT_KEYWORD
);
117 add("short", JavaTokenType
.SHORT_KEYWORD
);
118 add("try", JavaTokenType
.TRY_KEYWORD
);
119 add("catch", JavaTokenType
.CATCH_KEYWORD
);
120 add("final", JavaTokenType
.FINAL_KEYWORD
);
121 add("interface", JavaTokenType
.INTERFACE_KEYWORD
);
122 add("static", JavaTokenType
.STATIC_KEYWORD
);
123 add("void", JavaTokenType
.VOID_KEYWORD
);
124 add("char", JavaTokenType
.CHAR_KEYWORD
);
125 add("finally", JavaTokenType
.FINALLY_KEYWORD
);
126 add("long", JavaTokenType
.LONG_KEYWORD
);
127 add("strictfp", JavaTokenType
.STRICTFP_KEYWORD
);
128 add("volatile", JavaTokenType
.VOLATILE_KEYWORD
);
129 add("class", JavaTokenType
.CLASS_KEYWORD
);
130 add("float", JavaTokenType
.FLOAT_KEYWORD
);
131 add("native", JavaTokenType
.NATIVE_KEYWORD
);
132 add("super", JavaTokenType
.SUPER_KEYWORD
);
133 add("while", JavaTokenType
.WHILE_KEYWORD
);
134 add("const", JavaTokenType
.CONST_KEYWORD
);
135 add("for", JavaTokenType
.FOR_KEYWORD
);
136 add("new", JavaTokenType
.NEW_KEYWORD
);
137 add("switch", JavaTokenType
.SWITCH_KEYWORD
);
138 add("continue", JavaTokenType
.CONTINUE_KEYWORD
);
139 add("goto", JavaTokenType
.GOTO_KEYWORD
);
140 add("package", JavaTokenType
.PACKAGE_KEYWORD
);
141 add("synchronized", JavaTokenType
.SYNCHRONIZED_KEYWORD
);
142 add("true", JavaTokenType
.TRUE_KEYWORD
);
143 add("false", JavaTokenType
.FALSE_KEYWORD
);
144 add("null", JavaTokenType
.NULL_KEYWORD
);
148 private final HashTable myTable
;
149 private static final HashTable ourTableWithoutAssert
= new HashTable(false, false);
150 private static final HashTable ourTableWithAssert
= new HashTable(true, false);
151 private static final HashTable ourTableWithAssertAndJDK15
= new HashTable(true, true);
152 private static final HashTable ourTableWithJDK15
= new HashTable(false, true);
154 public final void start(CharSequence buffer
, int startOffset
, int endOffset
, int initialState
) {
156 myBufferIndex
= startOffset
;
157 myBufferEndOffset
= endOffset
;
159 myTokenEndOffset
= startOffset
;
160 myFlexlexer
.reset(myBuffer
, startOffset
, endOffset
, 0);
163 public int getState() {
167 public final IElementType
getTokenType() {
173 public final int getTokenStart() {
174 return myBufferIndex
;
177 public final int getTokenEnd() {
179 return myTokenEndOffset
;
183 public final void advance() {
188 protected final void locateToken() {
189 if (myTokenType
!= null) return;
193 private void _locateToken() {
195 if (myTokenEndOffset
== myBufferEndOffset
) {
197 myBufferIndex
= myBufferEndOffset
;
201 myBufferIndex
= myTokenEndOffset
;
203 final char c
= myBuffer
.charAt(myBufferIndex
);
214 myTokenType
= TokenType
.WHITE_SPACE
;
215 myTokenEndOffset
= getWhitespaces(myBufferIndex
+ 1);
219 if (myBufferIndex
+ 1 >= myBufferEndOffset
) {
220 myTokenType
= JavaTokenType
.DIV
;
221 myTokenEndOffset
= myBufferEndOffset
;
224 final char nextChar
= myBuffer
.charAt(myBufferIndex
+ 1);
226 if (nextChar
== '/') {
227 myTokenType
= JavaTokenType
.END_OF_LINE_COMMENT
;
228 myTokenEndOffset
= getLineTerminator(myBufferIndex
+ 2);
230 else if (nextChar
== '*') {
231 if (myBufferIndex
+ 2 >= myBufferEndOffset
|| myBuffer
.charAt(myBufferIndex
+ 2) != '*') {
232 myTokenType
= JavaTokenType
.C_STYLE_COMMENT
;
233 myTokenEndOffset
= getClosingComment(myBufferIndex
+ 2);
236 myTokenType
= JavaTokenType
.DOC_COMMENT
;
237 myTokenEndOffset
= getDocClosingComment(myBufferIndex
+ 3);
240 else if (c
> 127 && Character
.isJavaIdentifierStart(c
)) {
241 myTokenEndOffset
= getIdentifier(myBufferIndex
+ 1);
251 myTokenType
= c
== '"' ? JavaTokenType
.STRING_LITERAL
: JavaTokenType
.CHARACTER_LITERAL
;
252 myTokenEndOffset
= getClosingParenthesys(myBufferIndex
+ 1, c
);
255 if (myTokenEndOffset
> myBufferEndOffset
) {
256 myTokenEndOffset
= myBufferEndOffset
;
260 private int getWhitespaces(int pos
) {
261 if (pos
>= myBufferEndOffset
) return myBufferEndOffset
;
262 final CharSequence lBuffer
= myBuffer
;
264 char c
= lBuffer
.charAt(pos
);
266 while (c
== ' ' || c
== '\t' || c
== '\n' || c
== '\r' || c
== '\f') {
268 if (pos
== myBufferEndOffset
) return pos
;
269 c
= lBuffer
.charAt(pos
);
275 private void flexLocateToken() {
277 myFlexlexer
.goTo(myBufferIndex
);
278 myTokenType
= myFlexlexer
.advance();
279 myTokenEndOffset
= myFlexlexer
.getTokenEnd();
281 catch (IOException e
) {
287 private int getClosingParenthesys(int offset
, char c
) {
289 final int lBufferEnd
= myBufferEndOffset
;
290 if (pos
>= lBufferEnd
) return lBufferEnd
;
292 final CharSequence lBuffer
= myBuffer
;
293 char cur
= lBuffer
.charAt(pos
);
296 while (cur
!= c
&& cur
!= '\n' && cur
!= '\r' && cur
!= '\\') {
298 if (pos
>= lBufferEnd
) return lBufferEnd
;
299 cur
= lBuffer
.charAt(pos
);
304 if (pos
>= lBufferEnd
) return lBufferEnd
;
305 cur
= lBuffer
.charAt(pos
);
306 if (cur
== '\n' || cur
== '\r') continue;
308 if (pos
>= lBufferEnd
) return lBufferEnd
;
309 cur
= lBuffer
.charAt(pos
);
323 private int getDocClosingComment(int offset
) {
324 final int lBufferEnd
= myBufferEndOffset
;
325 final CharSequence lBuffer
= myBuffer
;
327 if (offset
< lBufferEnd
&& lBuffer
.charAt(offset
) == '/') {
332 while (pos
< lBufferEnd
- 1) {
333 final char c
= lBuffer
.charAt(pos
);
335 if (c
== '*' && lBuffer
.charAt(pos
+ 1) == '/') {
343 private int getClosingComment(int offset
) {
346 final int lBufferEnd
= myBufferEndOffset
;
347 final CharSequence lBuffer
= myBuffer
;
349 while (pos
< lBufferEnd
- 1) {
350 final char c
= lBuffer
.charAt(pos
);
352 if (c
== '*' && lBuffer
.charAt(pos
+ 1) == '/') {
361 private int getLineTerminator(int offset
) {
363 final int lBufferEnd
= myBufferEndOffset
;
364 final CharSequence lBuffer
= myBuffer
;
366 while (pos
< lBufferEnd
) {
367 final char c
= lBuffer
.charAt(pos
);
368 if (c
== '\r' || c
== '\n') break;
375 private int getIdentifier(int offset
) {
376 final CharSequence lBuffer
= myBuffer
;
378 int hashCode
= lBuffer
.charAt(offset
- 1) * 2;
379 final int lBufferEnd
= myBufferEndOffset
;
382 if (pos
< lBufferEnd
) {
383 char c
= lBuffer
.charAt(pos
);
385 while (c
>= 'a' && c
<= 'z' ||
386 c
>= 'A' && c
<= 'Z' ||
387 c
>= '0' && c
<= '9' ||
390 c
> 127 && Character
.isJavaIdentifierPart(c
)) {
394 if (pos
== lBufferEnd
) break;
395 c
= lBuffer
.charAt(pos
);
399 if (myTable
.contains(hashCode
, lBuffer
, offset
- 1)) {
400 myTokenType
= myTable
.getTokenType(hashCode
);
403 myTokenType
= JavaTokenType
.IDENTIFIER
;
409 public CharSequence
getBufferSequence() {
413 public final int getBufferEnd() {
414 return myBufferEndOffset
;
417 public static void main(String
[] args
) throws IOException
{
418 File root
= new File(args
[0]);
420 Stats stats
= new Stats();
423 System
.out
.println("Scanned " + stats
.files
+ " files, total of " + stats
.lines
+ " lines in " + (stats
.time
/ 1000000) + " ms.");
424 System
.out
.println("Size:" + stats
.bytes
);
428 private static void lex(File root
, Stats stats
) throws IOException
{
430 BufferedReader reader
= new BufferedReader(new FileReader(root
));
432 StringBuilder buf
= new StringBuilder();
433 while ((s
= reader
.readLine()) != null) {
435 buf
.append(s
).append("\n");
438 stats
.bytes
+= buf
.length();
440 long start
= System
.nanoTime();
442 stats
.time
+= System
.nanoTime() - start
;
445 private static void lexText(StringBuilder buf
) {
446 JavaLexer lexer
= new JavaLexer(LanguageLevel
.JDK_1_5
);
448 while (lexer
.getTokenType() != null) {
453 private static class Stats
{
460 private static void walk(File root
, Stats stats
) throws IOException
{
461 if (root
.isDirectory()) {
462 System
.out
.println("Lexing in " + root
.getPath());
463 for (File file
: root
.listFiles()) {
468 if (root
.getName().endsWith(".java")) {