HierarchyBrowserManager is a service
[fedora-idea.git] / source / com / intellij / lexer / JavaLexer.java
blobee9b2f30a25363f4424cdcfb50beb5fb1ee71283
1 package com.intellij.lexer;
3 import com.intellij.openapi.diagnostic.Logger;
4 import com.intellij.pom.java.LanguageLevel;
5 import com.intellij.psi.JavaTokenType;
6 import com.intellij.psi.tree.IElementType;
7 import com.intellij.util.text.CharArrayCharSequence;
8 import com.intellij.util.text.CharArrayUtil;
10 import java.io.BufferedReader;
11 import java.io.FileNotFoundException;
12 import java.io.FileReader;
13 import java.io.IOException;
15 public class JavaLexer extends LexerBase {
17 private JavaLexer(boolean isAssertKeywordEnabled, boolean isJDK15) {
18 myTable = isAssertKeywordEnabled ?
19 (isJDK15 ? ourTableWithAssertAndJDK15 : ourTableWithAssert) :
20 (isJDK15 ? ourTableWithJDK15 : ourTableWithoutAssert);
21 myFlexlexer = new _JavaLexer(isAssertKeywordEnabled, isJDK15);
24 public JavaLexer(LanguageLevel level) {
25 this(level.hasAssertKeyword(), level.hasEnumKeywordAndAutoboxing());
28 private CharSequence myBuffer;
29 private char[] myBufferArray;
30 private int myBufferIndex;
31 private int myBufferEndOffset;
33 IElementType myTokenType;
34 private _JavaLexer myFlexlexer;
36 //Positioned after the last symbol of the current token
37 private int myTokenEndOffset;
39 private final static class HashTable {
40 static final int NUM_ENTRIES = 999;
41 private static final Logger LOG = Logger.getInstance("com.intellij.Lexer.JavaLexer");
43 final char[][] myTable = new char[NUM_ENTRIES][];
44 final IElementType[] myKeywords = new IElementType[NUM_ENTRIES];
46 void add(String s, IElementType tokenType) {
47 char[] chars = s.toCharArray();
48 int hashCode = chars[0] * 2;
49 for (int j = 1; j < chars.length; j++) {
50 hashCode += chars[j];
52 int modHashCode = hashCode % NUM_ENTRIES;
53 LOG.assertTrue(myTable[modHashCode] == null);
55 myTable[modHashCode] = chars;
56 myKeywords[modHashCode] = tokenType;
59 boolean contains(int hashCode, final CharSequence buffer, final char[] bufferArray, int offset) {
60 int modHashCode = hashCode % NUM_ENTRIES;
61 final char[] kwd = myTable[modHashCode];
62 if (kwd == null) return false;
64 if (bufferArray != null) {
65 for (int j = 0; j < kwd.length; j++) {
66 if (bufferArray[j + offset] != kwd[j]) return false;
68 } else {
69 for (int j = 0; j < kwd.length; j++) {
70 if (buffer.charAt(j + offset) != kwd[j]) return false;
73 return true;
76 IElementType getTokenType(int hashCode) {
77 return myKeywords[hashCode % NUM_ENTRIES];
80 @SuppressWarnings({"HardCodedStringLiteral"})
81 public HashTable(boolean isAssertKeywordEnabled, boolean isJDK15) {
82 if (isAssertKeywordEnabled) {
83 add("assert", JavaTokenType.ASSERT_KEYWORD);
85 if (isJDK15) {
86 add("enum", JavaTokenType.ENUM_KEYWORD);
88 add("abstract", JavaTokenType.ABSTRACT_KEYWORD);
89 add("default", JavaTokenType.DEFAULT_KEYWORD);
90 add("if", JavaTokenType.IF_KEYWORD);
91 add("private", JavaTokenType.PRIVATE_KEYWORD);
92 add("this", JavaTokenType.THIS_KEYWORD);
93 add("boolean", JavaTokenType.BOOLEAN_KEYWORD);
94 add("do", JavaTokenType.DO_KEYWORD);
95 add("implements", JavaTokenType.IMPLEMENTS_KEYWORD);
96 add("protected", JavaTokenType.PROTECTED_KEYWORD);
97 add("throw", JavaTokenType.THROW_KEYWORD);
98 add("break", JavaTokenType.BREAK_KEYWORD);
99 add("double", JavaTokenType.DOUBLE_KEYWORD);
100 add("import", JavaTokenType.IMPORT_KEYWORD);
101 add("public", JavaTokenType.PUBLIC_KEYWORD);
102 add("throws", JavaTokenType.THROWS_KEYWORD);
103 add("byte", JavaTokenType.BYTE_KEYWORD);
104 add("else", JavaTokenType.ELSE_KEYWORD);
105 add("instanceof", JavaTokenType.INSTANCEOF_KEYWORD);
106 add("return", JavaTokenType.RETURN_KEYWORD);
107 add("transient", JavaTokenType.TRANSIENT_KEYWORD);
108 add("case", JavaTokenType.CASE_KEYWORD);
109 add("extends", JavaTokenType.EXTENDS_KEYWORD);
110 add("int", JavaTokenType.INT_KEYWORD);
111 add("short", JavaTokenType.SHORT_KEYWORD);
112 add("try", JavaTokenType.TRY_KEYWORD);
113 add("catch", JavaTokenType.CATCH_KEYWORD);
114 add("final", JavaTokenType.FINAL_KEYWORD);
115 add("interface", JavaTokenType.INTERFACE_KEYWORD);
116 add("static", JavaTokenType.STATIC_KEYWORD);
117 add("void", JavaTokenType.VOID_KEYWORD);
118 add("char", JavaTokenType.CHAR_KEYWORD);
119 add("finally", JavaTokenType.FINALLY_KEYWORD);
120 add("long", JavaTokenType.LONG_KEYWORD);
121 add("strictfp", JavaTokenType.STRICTFP_KEYWORD);
122 add("volatile", JavaTokenType.VOLATILE_KEYWORD);
123 add("class", JavaTokenType.CLASS_KEYWORD);
124 add("float", JavaTokenType.FLOAT_KEYWORD);
125 add("native", JavaTokenType.NATIVE_KEYWORD);
126 add("super", JavaTokenType.SUPER_KEYWORD);
127 add("while", JavaTokenType.WHILE_KEYWORD);
128 add("const", JavaTokenType.CONST_KEYWORD);
129 add("for", JavaTokenType.FOR_KEYWORD);
130 add("new", JavaTokenType.NEW_KEYWORD);
131 add("switch", JavaTokenType.SWITCH_KEYWORD);
132 add("continue", JavaTokenType.CONTINUE_KEYWORD);
133 add("goto", JavaTokenType.GOTO_KEYWORD);
134 add("package", JavaTokenType.PACKAGE_KEYWORD);
135 add("synchronized", JavaTokenType.SYNCHRONIZED_KEYWORD);
136 add("true", JavaTokenType.TRUE_KEYWORD);
137 add("false", JavaTokenType.FALSE_KEYWORD);
138 add("null", JavaTokenType.NULL_KEYWORD);
142 private final HashTable myTable;
143 private final static HashTable ourTableWithoutAssert = new HashTable(false, false);
144 private final static HashTable ourTableWithAssert = new HashTable(true, false);
145 private final static HashTable ourTableWithAssertAndJDK15 = new HashTable(true, true);
146 private final static HashTable ourTableWithJDK15 = new HashTable(false, true);
148 public final void start(CharSequence buffer, int startOffset, int endOffset, int initialState) {
149 myBuffer = buffer;
150 myBufferArray = CharArrayUtil.fromSequenceWithoutCopying(buffer);
151 myBufferIndex = startOffset;
152 myBufferEndOffset = endOffset;
153 myTokenType = null;
154 myTokenEndOffset = startOffset;
155 myFlexlexer.reset(myBuffer, startOffset, endOffset, 0);
158 public final void start(char[] buffer, int startOffset, int endOffset, int initialState) {
159 start(new CharArrayCharSequence(buffer), startOffset, endOffset, initialState);
162 public int getState() {
163 return 0;
166 public final IElementType getTokenType() {
167 locateToken();
169 return myTokenType;
172 public final int getTokenStart() {
173 return myBufferIndex;
176 public final int getTokenEnd() {
177 locateToken();
178 return myTokenEndOffset;
182 public final void advance() {
183 locateToken();
184 myTokenType = null;
187 protected final void locateToken() {
188 if (myTokenType != null) return;
189 _locateToken();
192 private void _locateToken() {
194 if (myTokenEndOffset == myBufferEndOffset) {
195 myTokenType = null;
196 myBufferIndex = myBufferEndOffset;
197 return;
200 myBufferIndex = myTokenEndOffset;
202 final char c = myBufferArray != null ? myBufferArray[myBufferIndex]:myBuffer.charAt(myBufferIndex);
203 switch (c) {
204 default:
205 flexLocateToken();
206 break;
208 case ' ':
209 case '\t':
210 case '\n':
211 case '\r':
212 case '\f':
213 myTokenType = JavaTokenType.WHITE_SPACE;
214 myTokenEndOffset = getWhitespaces(myBufferIndex + 1);
215 break;
217 case '/': {
218 if (myBufferIndex + 1 >= myBufferEndOffset) {
219 myTokenType = JavaTokenType.DIV;
220 myTokenEndOffset = myBufferEndOffset;
222 else {
223 final char nextChar = myBufferArray != null ? myBufferArray[myBufferIndex + 1]:myBuffer.charAt(myBufferIndex + 1);
225 if (nextChar == '/') {
226 myTokenType = JavaTokenType.END_OF_LINE_COMMENT;
227 myTokenEndOffset = getLineTerminator(myBufferIndex + 2);
229 else if (nextChar == '*') {
230 if (myBufferIndex + 2 >= myBufferEndOffset ||
231 (myBufferArray != null ? myBufferArray[myBufferIndex + 2]:myBuffer.charAt(myBufferIndex + 2)) != '*') {
232 myTokenType = JavaTokenType.C_STYLE_COMMENT;
233 myTokenEndOffset = getClosingComment(myBufferIndex + 2);
235 else {
236 myTokenType = JavaTokenType.DOC_COMMENT;
237 myTokenEndOffset = getDocClosingComment(myBufferIndex + 3);
240 else if ((c > 127) && Character.isJavaIdentifierStart(c)) {
241 myTokenEndOffset = getIdentifier(myBufferIndex + 1);
243 else {
244 flexLocateToken();
247 break;
250 case '"':
251 case '\'':
252 myTokenType = c == '"' ? JavaTokenType.STRING_LITERAL : JavaTokenType.CHARACTER_LITERAL;
253 myTokenEndOffset = getClosingParenthesys(myBufferIndex + 1, c);
256 if (myTokenEndOffset > myBufferEndOffset) {
257 myTokenEndOffset = myBufferEndOffset;
261 private int getWhitespaces(int pos) {
262 if (pos >= myBufferEndOffset) return myBufferEndOffset;
263 final CharSequence lBuffer = myBuffer;
264 final char[] lBufferArray = myBufferArray;
265 final boolean hasArray = lBufferArray != null;
267 char c = hasArray ? lBufferArray[pos] : lBuffer.charAt(pos);
269 while (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f') {
270 pos++;
271 if (pos == myBufferEndOffset) return pos;
272 c = hasArray ? lBufferArray[pos] : lBuffer.charAt(pos);
275 return pos;
278 private void flexLocateToken() {
279 try {
280 myFlexlexer.goTo(myBufferIndex);
281 myTokenType = myFlexlexer.advance();
282 myTokenEndOffset = myFlexlexer.getTokenEnd();
284 catch (IOException e) {
285 // Can't be
290 private int getClosingParenthesys(int offset, char c) {
291 int pos = offset;
292 final int lBufferEnd = myBufferEndOffset;
293 if (pos >= lBufferEnd) return lBufferEnd;
295 final CharSequence lBuffer = myBuffer;
296 final char[] lBufferArray = myBufferArray;
297 final boolean hasArray = lBufferArray != null;
298 char cur = hasArray ? lBufferArray[pos]:lBuffer.charAt(pos);
300 while (true) {
301 while (cur != c && cur != '\n' && cur != '\r' && cur != '\\') {
302 pos++;
303 if (pos >= lBufferEnd) return lBufferEnd;
304 cur = hasArray ? lBufferArray[pos]:lBuffer.charAt(pos);
307 if (cur == '\\') {
308 pos++;
309 if (pos >= lBufferEnd) return lBufferEnd;
310 cur = hasArray ? lBufferArray[pos]:lBuffer.charAt(pos);
311 if (cur == '\n' || cur == '\r') continue;
312 pos ++;
313 if (pos >= lBufferEnd) return lBufferEnd;
314 cur = hasArray ? lBufferArray[pos]:lBuffer.charAt(pos);
315 } else if (cur == c) {
316 break;
317 } else {
318 pos--;
319 break;
323 return pos + 1;
326 private int getDocClosingComment(int offset) {
327 final int lBufferEnd = myBufferEndOffset;
328 final CharSequence lBuffer = myBuffer;
329 final char[] lBufferArray = myBufferArray;
330 final boolean hasArray = lBufferArray != null;
332 if (offset < lBufferEnd &&
333 (hasArray ? lBufferArray[offset]:lBuffer.charAt(offset)) == '/') {
334 return offset + 1;
337 int pos = offset;
338 while (pos < lBufferEnd - 1) {
339 final char c = hasArray ? lBufferArray[pos]:lBuffer.charAt(pos);
341 if (c == '*' &&
342 (hasArray ? lBufferArray[pos + 1]:lBuffer.charAt(pos + 1)) == '/'
344 break;
346 pos++;
348 return pos + 2;
351 private int getClosingComment(int offset) {
352 int pos = offset;
354 final int lBufferEnd = myBufferEndOffset;
355 final CharSequence lBuffer = myBuffer;
356 final char[] lBufferArray = myBufferArray;
357 final boolean hasArray = lBufferArray != null;
359 while (pos < lBufferEnd - 1) {
360 final char c = hasArray ? lBufferArray[pos]:lBuffer.charAt(pos);
362 if (c == '*' &&
363 (hasArray ? lBufferArray[pos + 1]:lBuffer.charAt(pos + 1)) == '/'
365 break;
367 pos++;
370 return pos + 2;
373 private int getLineTerminator(int offset) {
374 int pos = offset;
375 final int lBufferEnd = myBufferEndOffset;
376 final CharSequence lBuffer = myBuffer;
377 final char[] lBufferArray = myBufferArray;
378 final boolean hasArray = lBufferArray != null;
380 while (pos < lBufferEnd) {
381 final char c = hasArray ? lBufferArray[pos]:lBuffer.charAt(pos);
382 if (c == '\r' || c == '\n') break;
383 pos++;
386 return pos;
389 private int getIdentifier(int offset) {
390 final CharSequence lBuffer = myBuffer;
391 final char[] lBufferArray = myBufferArray;
392 final boolean hasArray = lBufferArray != null;
394 int hashCode = (hasArray ? lBufferArray[offset - 1]:lBuffer.charAt(offset - 1)) * 2;
395 final int lBufferEnd = myBufferEndOffset;
397 int pos = offset;
398 if (pos < lBufferEnd) {
399 char c = hasArray ? lBufferArray[pos]:lBuffer.charAt(pos);
401 while ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')
402 || c == '_' || c == '$' || ((c > 127) && Character.isJavaIdentifierPart(c))) {
403 pos++;
404 hashCode += c;
406 if (pos == lBufferEnd) break;
407 c = hasArray ? lBufferArray[pos]:lBuffer.charAt(pos);
411 if (myTable.contains(hashCode, lBuffer, lBufferArray, offset - 1)) {
412 myTokenType = myTable.getTokenType(hashCode);
413 } else {
414 myTokenType = JavaTokenType.IDENTIFIER;
417 return pos;
420 public final char[] getBuffer() {
421 return myBufferArray != null ? myBufferArray : CharArrayUtil.fromSequence(myBuffer);
424 public CharSequence getBufferSequence() {
425 return myBuffer;
428 public final int getBufferEnd() {
429 return myBufferEndOffset;
432 public static void main(String[] args) {
434 try {
435 BufferedReader reader = new BufferedReader(new FileReader(args[0]));
436 String s;
437 StringBuffer buf = new StringBuffer();
438 while ((s = reader.readLine()) != null) {
439 buf.append(s).append("\n");
442 char[] cbuf = buf.toString().toCharArray();
444 JavaLexer lexer = new JavaLexer(LanguageLevel.JDK_1_5);
445 lexer.start(cbuf, 0, cbuf.length);
446 while (lexer.getTokenType() != null) {
447 lexer.advance();
449 } catch (FileNotFoundException e) {
450 e.printStackTrace(); //To change body of catch statement use Options | File Templates.
451 } catch (IOException e) {
452 e.printStackTrace(); //To change body of catch statement use Options | File Templates.