update copyright
[fedora-idea.git] / java / java-impl / src / com / intellij / lexer / JavaLexer.java
blobd552ab156ca070d408a6c175699d01421d18e274
1 /*
2 * Copyright 2000-2009 JetBrains s.r.o.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 package com.intellij.lexer;
18 import com.intellij.openapi.diagnostic.Logger;
19 import com.intellij.pom.java.LanguageLevel;
20 import com.intellij.psi.JavaTokenType;
21 import com.intellij.psi.TokenType;
22 import com.intellij.psi.tree.IElementType;
24 import java.io.BufferedReader;
25 import java.io.File;
26 import java.io.FileReader;
27 import java.io.IOException;
29 public class JavaLexer extends LexerBase {
30 private JavaLexer(boolean isAssertKeywordEnabled, boolean isJDK15) {
31 myTable = isAssertKeywordEnabled
32 ? isJDK15 ? ourTableWithAssertAndJDK15 : ourTableWithAssert
33 : isJDK15 ? ourTableWithJDK15 : ourTableWithoutAssert;
34 myFlexlexer = new _JavaLexer(isAssertKeywordEnabled, isJDK15);
37 public JavaLexer(LanguageLevel level) {
38 this(level.hasAssertKeyword(), level.hasEnumKeywordAndAutoboxing());
41 private CharSequence myBuffer;
42 private int myBufferIndex;
43 private int myBufferEndOffset;
45 private IElementType myTokenType;
46 private _JavaLexer myFlexlexer;
48 //Positioned after the last symbol of the current token
49 private int myTokenEndOffset;
51 private static final class HashTable {
52 private static final int NUM_ENTRIES = 999;
53 private static final Logger LOG = Logger.getInstance("com.intellij.Lexer.JavaLexer");
55 private final char[][] myTable = new char[NUM_ENTRIES][];
56 private final IElementType[] myKeywords = new IElementType[NUM_ENTRIES];
58 private void add(String s, IElementType tokenType) {
59 char[] chars = s.toCharArray();
60 int hashCode = chars[0] * 2;
61 for (int j = 1; j < chars.length; j++) {
62 hashCode += chars[j];
64 int modHashCode = hashCode % NUM_ENTRIES;
65 LOG.assertTrue(myTable[modHashCode] == null);
67 myTable[modHashCode] = chars;
68 myKeywords[modHashCode] = tokenType;
71 private boolean contains(int hashCode, final CharSequence buffer, int offset) {
72 int modHashCode = hashCode % NUM_ENTRIES;
73 final char[] kwd = myTable[modHashCode];
74 if (kwd == null) return false;
76 for (int j = 0; j < kwd.length; j++) {
77 if (buffer.charAt(j + offset) != kwd[j]) return false;
79 return true;
82 private IElementType getTokenType(int hashCode) {
83 return myKeywords[hashCode % NUM_ENTRIES];
86 @SuppressWarnings({"HardCodedStringLiteral"})
87 private HashTable(boolean isAssertKeywordEnabled, boolean isJDK15) {
88 if (isAssertKeywordEnabled) {
89 add("assert", JavaTokenType.ASSERT_KEYWORD);
91 if (isJDK15) {
92 add("enum", JavaTokenType.ENUM_KEYWORD);
94 add("abstract", JavaTokenType.ABSTRACT_KEYWORD);
95 add("default", JavaTokenType.DEFAULT_KEYWORD);
96 add("if", JavaTokenType.IF_KEYWORD);
97 add("private", JavaTokenType.PRIVATE_KEYWORD);
98 add("this", JavaTokenType.THIS_KEYWORD);
99 add("boolean", JavaTokenType.BOOLEAN_KEYWORD);
100 add("do", JavaTokenType.DO_KEYWORD);
101 add("implements", JavaTokenType.IMPLEMENTS_KEYWORD);
102 add("protected", JavaTokenType.PROTECTED_KEYWORD);
103 add("throw", JavaTokenType.THROW_KEYWORD);
104 add("break", JavaTokenType.BREAK_KEYWORD);
105 add("double", JavaTokenType.DOUBLE_KEYWORD);
106 add("import", JavaTokenType.IMPORT_KEYWORD);
107 add("public", JavaTokenType.PUBLIC_KEYWORD);
108 add("throws", JavaTokenType.THROWS_KEYWORD);
109 add("byte", JavaTokenType.BYTE_KEYWORD);
110 add("else", JavaTokenType.ELSE_KEYWORD);
111 add("instanceof", JavaTokenType.INSTANCEOF_KEYWORD);
112 add("return", JavaTokenType.RETURN_KEYWORD);
113 add("transient", JavaTokenType.TRANSIENT_KEYWORD);
114 add("case", JavaTokenType.CASE_KEYWORD);
115 add("extends", JavaTokenType.EXTENDS_KEYWORD);
116 add("int", JavaTokenType.INT_KEYWORD);
117 add("short", JavaTokenType.SHORT_KEYWORD);
118 add("try", JavaTokenType.TRY_KEYWORD);
119 add("catch", JavaTokenType.CATCH_KEYWORD);
120 add("final", JavaTokenType.FINAL_KEYWORD);
121 add("interface", JavaTokenType.INTERFACE_KEYWORD);
122 add("static", JavaTokenType.STATIC_KEYWORD);
123 add("void", JavaTokenType.VOID_KEYWORD);
124 add("char", JavaTokenType.CHAR_KEYWORD);
125 add("finally", JavaTokenType.FINALLY_KEYWORD);
126 add("long", JavaTokenType.LONG_KEYWORD);
127 add("strictfp", JavaTokenType.STRICTFP_KEYWORD);
128 add("volatile", JavaTokenType.VOLATILE_KEYWORD);
129 add("class", JavaTokenType.CLASS_KEYWORD);
130 add("float", JavaTokenType.FLOAT_KEYWORD);
131 add("native", JavaTokenType.NATIVE_KEYWORD);
132 add("super", JavaTokenType.SUPER_KEYWORD);
133 add("while", JavaTokenType.WHILE_KEYWORD);
134 add("const", JavaTokenType.CONST_KEYWORD);
135 add("for", JavaTokenType.FOR_KEYWORD);
136 add("new", JavaTokenType.NEW_KEYWORD);
137 add("switch", JavaTokenType.SWITCH_KEYWORD);
138 add("continue", JavaTokenType.CONTINUE_KEYWORD);
139 add("goto", JavaTokenType.GOTO_KEYWORD);
140 add("package", JavaTokenType.PACKAGE_KEYWORD);
141 add("synchronized", JavaTokenType.SYNCHRONIZED_KEYWORD);
142 add("true", JavaTokenType.TRUE_KEYWORD);
143 add("false", JavaTokenType.FALSE_KEYWORD);
144 add("null", JavaTokenType.NULL_KEYWORD);
148 private final HashTable myTable;
149 private static final HashTable ourTableWithoutAssert = new HashTable(false, false);
150 private static final HashTable ourTableWithAssert = new HashTable(true, false);
151 private static final HashTable ourTableWithAssertAndJDK15 = new HashTable(true, true);
152 private static final HashTable ourTableWithJDK15 = new HashTable(false, true);
154 public final void start(CharSequence buffer, int startOffset, int endOffset, int initialState) {
155 myBuffer = buffer;
156 myBufferIndex = startOffset;
157 myBufferEndOffset = endOffset;
158 myTokenType = null;
159 myTokenEndOffset = startOffset;
160 myFlexlexer.reset(myBuffer, startOffset, endOffset, 0);
163 public int getState() {
164 return 0;
167 public final IElementType getTokenType() {
168 locateToken();
170 return myTokenType;
173 public final int getTokenStart() {
174 return myBufferIndex;
177 public final int getTokenEnd() {
178 locateToken();
179 return myTokenEndOffset;
183 public final void advance() {
184 locateToken();
185 myTokenType = null;
188 protected final void locateToken() {
189 if (myTokenType != null) return;
190 _locateToken();
193 private void _locateToken() {
195 if (myTokenEndOffset == myBufferEndOffset) {
196 myTokenType = null;
197 myBufferIndex = myBufferEndOffset;
198 return;
201 myBufferIndex = myTokenEndOffset;
203 final char c = myBuffer.charAt(myBufferIndex);
204 switch (c) {
205 default:
206 flexLocateToken();
207 break;
209 case ' ':
210 case '\t':
211 case '\n':
212 case '\r':
213 case '\f':
214 myTokenType = TokenType.WHITE_SPACE;
215 myTokenEndOffset = getWhitespaces(myBufferIndex + 1);
216 break;
218 case '/':
219 if (myBufferIndex + 1 >= myBufferEndOffset) {
220 myTokenType = JavaTokenType.DIV;
221 myTokenEndOffset = myBufferEndOffset;
223 else {
224 final char nextChar = myBuffer.charAt(myBufferIndex + 1);
226 if (nextChar == '/') {
227 myTokenType = JavaTokenType.END_OF_LINE_COMMENT;
228 myTokenEndOffset = getLineTerminator(myBufferIndex + 2);
230 else if (nextChar == '*') {
231 if (myBufferIndex + 2 >= myBufferEndOffset || myBuffer.charAt(myBufferIndex + 2) != '*') {
232 myTokenType = JavaTokenType.C_STYLE_COMMENT;
233 myTokenEndOffset = getClosingComment(myBufferIndex + 2);
235 else {
236 myTokenType = JavaTokenType.DOC_COMMENT;
237 myTokenEndOffset = getDocClosingComment(myBufferIndex + 3);
240 else if (c > 127 && Character.isJavaIdentifierStart(c)) {
241 myTokenEndOffset = getIdentifier(myBufferIndex + 1);
243 else {
244 flexLocateToken();
247 break;
249 case '"':
250 case '\'':
251 myTokenType = c == '"' ? JavaTokenType.STRING_LITERAL : JavaTokenType.CHARACTER_LITERAL;
252 myTokenEndOffset = getClosingParenthesys(myBufferIndex + 1, c);
255 if (myTokenEndOffset > myBufferEndOffset) {
256 myTokenEndOffset = myBufferEndOffset;
260 private int getWhitespaces(int pos) {
261 if (pos >= myBufferEndOffset) return myBufferEndOffset;
262 final CharSequence lBuffer = myBuffer;
264 char c = lBuffer.charAt(pos);
266 while (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f') {
267 pos++;
268 if (pos == myBufferEndOffset) return pos;
269 c = lBuffer.charAt(pos);
272 return pos;
275 private void flexLocateToken() {
276 try {
277 myFlexlexer.goTo(myBufferIndex);
278 myTokenType = myFlexlexer.advance();
279 myTokenEndOffset = myFlexlexer.getTokenEnd();
281 catch (IOException e) {
282 // Can't be
287 private int getClosingParenthesys(int offset, char c) {
288 int pos = offset;
289 final int lBufferEnd = myBufferEndOffset;
290 if (pos >= lBufferEnd) return lBufferEnd;
292 final CharSequence lBuffer = myBuffer;
293 char cur = lBuffer.charAt(pos);
295 while (true) {
296 while (cur != c && cur != '\n' && cur != '\r' && cur != '\\') {
297 pos++;
298 if (pos >= lBufferEnd) return lBufferEnd;
299 cur = lBuffer.charAt(pos);
302 if (cur == '\\') {
303 pos++;
304 if (pos >= lBufferEnd) return lBufferEnd;
305 cur = lBuffer.charAt(pos);
306 if (cur == '\n' || cur == '\r') continue;
307 pos++;
308 if (pos >= lBufferEnd) return lBufferEnd;
309 cur = lBuffer.charAt(pos);
311 else if (cur == c) {
312 break;
314 else {
315 pos--;
316 break;
320 return pos + 1;
323 private int getDocClosingComment(int offset) {
324 final int lBufferEnd = myBufferEndOffset;
325 final CharSequence lBuffer = myBuffer;
327 if (offset < lBufferEnd && lBuffer.charAt(offset) == '/') {
328 return offset + 1;
331 int pos = offset;
332 while (pos < lBufferEnd - 1) {
333 final char c = lBuffer.charAt(pos);
335 if (c == '*' && lBuffer.charAt(pos + 1) == '/') {
336 break;
338 pos++;
340 return pos + 2;
343 private int getClosingComment(int offset) {
344 int pos = offset;
346 final int lBufferEnd = myBufferEndOffset;
347 final CharSequence lBuffer = myBuffer;
349 while (pos < lBufferEnd - 1) {
350 final char c = lBuffer.charAt(pos);
352 if (c == '*' && lBuffer.charAt(pos + 1) == '/') {
353 break;
355 pos++;
358 return pos + 2;
361 private int getLineTerminator(int offset) {
362 int pos = offset;
363 final int lBufferEnd = myBufferEndOffset;
364 final CharSequence lBuffer = myBuffer;
366 while (pos < lBufferEnd) {
367 final char c = lBuffer.charAt(pos);
368 if (c == '\r' || c == '\n') break;
369 pos++;
372 return pos;
375 private int getIdentifier(int offset) {
376 final CharSequence lBuffer = myBuffer;
378 int hashCode = lBuffer.charAt(offset - 1) * 2;
379 final int lBufferEnd = myBufferEndOffset;
381 int pos = offset;
382 if (pos < lBufferEnd) {
383 char c = lBuffer.charAt(pos);
385 while (c >= 'a' && c <= 'z' ||
386 c >= 'A' && c <= 'Z' ||
387 c >= '0' && c <= '9' ||
388 c == '_' ||
389 c == '$' ||
390 c > 127 && Character.isJavaIdentifierPart(c)) {
391 pos++;
392 hashCode += c;
394 if (pos == lBufferEnd) break;
395 c = lBuffer.charAt(pos);
399 if (myTable.contains(hashCode, lBuffer, offset - 1)) {
400 myTokenType = myTable.getTokenType(hashCode);
402 else {
403 myTokenType = JavaTokenType.IDENTIFIER;
406 return pos;
409 public CharSequence getBufferSequence() {
410 return myBuffer;
413 public final int getBufferEnd() {
414 return myBufferEndOffset;
417 public static void main(String[] args) throws IOException {
418 File root = new File(args[0]);
420 Stats stats = new Stats();
421 walk(root, stats);
423 System.out.println("Scanned " + stats.files + " files, total of " + stats.lines + " lines in " + (stats.time / 1000000) + " ms.");
424 System.out.println("Size:" + stats.bytes);
428 private static void lex(File root, Stats stats) throws IOException {
429 stats.files++;
430 BufferedReader reader = new BufferedReader(new FileReader(root));
431 String s;
432 StringBuilder buf = new StringBuilder();
433 while ((s = reader.readLine()) != null) {
434 stats.lines++;
435 buf.append(s).append("\n");
438 stats.bytes += buf.length();
440 long start = System.nanoTime();
441 lexText(buf);
442 stats.time += System.nanoTime() - start;
445 private static void lexText(StringBuilder buf) {
446 JavaLexer lexer = new JavaLexer(LanguageLevel.JDK_1_5);
447 lexer.start(buf);
448 while (lexer.getTokenType() != null) {
449 lexer.advance();
453 private static class Stats {
454 public int files;
455 public int lines;
456 public long time;
457 public long bytes;
460 private static void walk(File root, Stats stats) throws IOException {
461 if (root.isDirectory()) {
462 System.out.println("Lexing in " + root.getPath());
463 for (File file : root.listFiles()) {
464 walk(file, stats);
467 else {
468 if (root.getName().endsWith(".java")) {
469 lex(root, stats);