update copyrights
[fedora-idea.git] / platform / lang-api / src / com / intellij / lexer / StringLiteralLexer.java
blobe4f3694538248fc17ea29b132428fd8fd7b16754
1 /*
2 * Copyright 2000-2009 JetBrains s.r.o.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 package com.intellij.lexer;
18 import com.intellij.openapi.diagnostic.Logger;
19 import com.intellij.psi.StringEscapesTokenTypes;
20 import com.intellij.psi.tree.IElementType;
22 /**
23 * @author max
25 public class StringLiteralLexer extends LexerBase {
26 private static final Logger LOG = Logger.getInstance("#com.intellij.lexer.StringLiteralLexer");
28 private static final short AFTER_FIRST_QUOTE = 1;
29 private static final short AFTER_LAST_QUOTE = 2;
31 public static final char NO_QUOTE_CHAR = (char)-1;
33 private CharSequence myBuffer;
34 private int myStart;
35 private int myEnd;
36 private int myState;
37 private int myLastState;
38 private int myBufferEnd;
39 private char myQuoteChar;
40 private IElementType myOriginalLiteralToken;
41 private final boolean myCanEscapeEolOrFramingSpaces;
42 private final String myAdditionalValidEscapes;
43 private boolean mySeenEscapedSpacesOnly;
45 public StringLiteralLexer(char quoteChar, final IElementType originalLiteralToken) {
46 this(quoteChar, originalLiteralToken, false, null);
49 /**
50 * @param canEscapeEolOrFramingSpaces true if following sequences are acceptable
51 * '\' in the end of the buffer (meaning escaped end of line) or
52 * '\ ' (escaped space) in the beginning and in the end of the buffer (meaning escaped space, to avoid auto trimming on load)
54 public StringLiteralLexer(char quoteChar, final IElementType originalLiteralToken, boolean canEscapeEolOrFramingSpaces, String additionalValidEscapes) {
55 myQuoteChar = quoteChar;
56 myOriginalLiteralToken = originalLiteralToken;
57 myCanEscapeEolOrFramingSpaces = canEscapeEolOrFramingSpaces;
58 myAdditionalValidEscapes = additionalValidEscapes;
61 public void start(CharSequence buffer, int startOffset, int endOffset, int initialState) {
62 myBuffer = buffer;
63 myStart = startOffset;
64 if (myQuoteChar == NO_QUOTE_CHAR) {
65 myState = AFTER_FIRST_QUOTE;
67 else {
68 myState = initialState;
70 myLastState = initialState;
71 myBufferEnd = endOffset;
72 myEnd = locateToken(myStart);
73 mySeenEscapedSpacesOnly = true;
76 public int getState() {
77 return myLastState;
80 private static boolean isHexDigit(char c) {
81 return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
84 public IElementType getTokenType() {
85 if (myStart >= myEnd) return null;
87 if (myBuffer.charAt(myStart) != '\\') {
88 mySeenEscapedSpacesOnly = false;
89 return myOriginalLiteralToken;
92 if (myStart + 1 >= myEnd) return StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN;
93 char nextChar = myBuffer.charAt(myStart + 1);
94 mySeenEscapedSpacesOnly &= nextChar == ' ';
95 if (myCanEscapeEolOrFramingSpaces &&
96 (nextChar == '\n' || nextChar == ' ' && (mySeenEscapedSpacesOnly || isTrailingSpace(myStart+2)))
97 ) {
98 return StringEscapesTokenTypes.VALID_STRING_ESCAPE_TOKEN;
100 if (nextChar == 'u') {
101 for(int i = myStart + 2; i < myStart + 6; i++) {
102 if (i >= myEnd || !isHexDigit(myBuffer.charAt(i))) return StringEscapesTokenTypes.INVALID_UNICODE_ESCAPE_TOKEN;
104 return StringEscapesTokenTypes.VALID_STRING_ESCAPE_TOKEN;
107 switch (nextChar) {
108 case 'n':
109 case 'r':
110 case 'b':
111 case 't':
112 case 'f':
113 case '\'':
114 case '\"':
115 case '\\':
116 case '0':
117 case '1':
118 case '2':
119 case '3':
120 case '4':
121 case '5':
122 case '6':
123 case '7':
124 return StringEscapesTokenTypes.VALID_STRING_ESCAPE_TOKEN;
126 if (myAdditionalValidEscapes != null && myAdditionalValidEscapes.indexOf(nextChar) != -1) {
127 return StringEscapesTokenTypes.VALID_STRING_ESCAPE_TOKEN;
130 return StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN;
133 // all subsequent chars are escaped spaces
134 private boolean isTrailingSpace(final int start) {
135 for (int i=start;i<myBufferEnd;i+=2) {
136 final char c = myBuffer.charAt(i);
137 if (c != '\\') return false;
138 if (i==myBufferEnd-1) return false;
139 if (myBuffer.charAt(i+1) != ' ') return false;
141 return true;
144 public int getTokenStart() {
145 return myStart;
148 public int getTokenEnd() {
149 return myEnd;
152 private int locateToken(int start) {
153 if (start == myBufferEnd) {
154 myState = AFTER_LAST_QUOTE;
156 if (myState == AFTER_LAST_QUOTE) return start;
157 int i = start;
158 if (myBuffer.charAt(i) == '\\') {
159 LOG.assertTrue(myState == AFTER_FIRST_QUOTE);
160 i++;
161 if (i == myBufferEnd || myBuffer.charAt(i) == '\n' && !myCanEscapeEolOrFramingSpaces) {
162 myState = AFTER_LAST_QUOTE;
163 return i;
166 if (myBuffer.charAt(i) >= '0' && myBuffer.charAt(i) <= '7') {
167 char first = myBuffer.charAt(i);
168 i++;
169 if (i < myBufferEnd && myBuffer.charAt(i) >= '0' && myBuffer.charAt(i) <= '7') {
170 i++;
171 if (i < myBufferEnd && first <= '3' && myBuffer.charAt(i) >= '0' && myBuffer.charAt(i) <= '7') {
172 i++;
175 return i;
178 if (myBuffer.charAt(i) == 'u') {
179 i++;
180 for (; i < start + 6; i++) {
181 if (i == myBufferEnd || myBuffer.charAt(i) == '\n' || myBuffer.charAt(i) == myQuoteChar) {
182 return i;
185 return i;
187 else {
188 return i + 1;
191 else {
192 LOG.assertTrue(myState == AFTER_FIRST_QUOTE || myBuffer.charAt(i) == myQuoteChar);
193 while (i < myBufferEnd) {
194 if (myBuffer.charAt(i) == '\\') {
195 return i;
197 //if (myBuffer[i] == '\n') {
198 // myState = AFTER_LAST_QUOTE;
199 // return i;
201 if (myState == AFTER_FIRST_QUOTE && myBuffer.charAt(i) == myQuoteChar) {
202 myState = AFTER_LAST_QUOTE;
203 return i + 1;
205 i++;
206 myState = AFTER_FIRST_QUOTE;
210 return i;
213 public void advance() {
214 myLastState = myState;
215 myStart = myEnd;
216 myEnd = locateToken(myStart);
219 public CharSequence getBufferSequence() {
220 return myBuffer;
223 public int getBufferEnd() {
224 return myBufferEnd;