2 * Copyright 2000-2009 JetBrains s.r.o.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 package com
.intellij
.lexer
;
18 import com
.intellij
.openapi
.diagnostic
.Logger
;
19 import com
.intellij
.psi
.StringEscapesTokenTypes
;
20 import com
.intellij
.psi
.tree
.IElementType
;
25 public class StringLiteralLexer
extends LexerBase
{
26 private static final Logger LOG
= Logger
.getInstance("#com.intellij.lexer.StringLiteralLexer");
28 private static final short AFTER_FIRST_QUOTE
= 1;
29 private static final short AFTER_LAST_QUOTE
= 2;
31 public static final char NO_QUOTE_CHAR
= (char)-1;
33 private CharSequence myBuffer
;
37 private int myLastState
;
38 private int myBufferEnd
;
39 private char myQuoteChar
;
40 private IElementType myOriginalLiteralToken
;
41 private final boolean myCanEscapeEolOrFramingSpaces
;
42 private final String myAdditionalValidEscapes
;
43 private boolean mySeenEscapedSpacesOnly
;
45 public StringLiteralLexer(char quoteChar
, final IElementType originalLiteralToken
) {
46 this(quoteChar
, originalLiteralToken
, false, null);
50 * @param canEscapeEolOrFramingSpaces true if following sequences are acceptable
51 * '\' in the end of the buffer (meaning escaped end of line) or
52 * '\ ' (escaped space) in the beginning and in the end of the buffer (meaning escaped space, to avoid auto trimming on load)
54 public StringLiteralLexer(char quoteChar
, final IElementType originalLiteralToken
, boolean canEscapeEolOrFramingSpaces
, String additionalValidEscapes
) {
55 myQuoteChar
= quoteChar
;
56 myOriginalLiteralToken
= originalLiteralToken
;
57 myCanEscapeEolOrFramingSpaces
= canEscapeEolOrFramingSpaces
;
58 myAdditionalValidEscapes
= additionalValidEscapes
;
61 public void start(CharSequence buffer
, int startOffset
, int endOffset
, int initialState
) {
63 myStart
= startOffset
;
64 if (myQuoteChar
== NO_QUOTE_CHAR
) {
65 myState
= AFTER_FIRST_QUOTE
;
68 myState
= initialState
;
70 myLastState
= initialState
;
71 myBufferEnd
= endOffset
;
72 myEnd
= locateToken(myStart
);
73 mySeenEscapedSpacesOnly
= true;
76 public int getState() {
80 private static boolean isHexDigit(char c
) {
81 return (c
>= '0' && c
<= '9') || (c
>= 'a' && c
<= 'f') || (c
>= 'A' && c
<= 'F');
84 public IElementType
getTokenType() {
85 if (myStart
>= myEnd
) return null;
87 if (myBuffer
.charAt(myStart
) != '\\') {
88 mySeenEscapedSpacesOnly
= false;
89 return myOriginalLiteralToken
;
92 if (myStart
+ 1 >= myEnd
) return StringEscapesTokenTypes
.INVALID_CHARACTER_ESCAPE_TOKEN
;
93 char nextChar
= myBuffer
.charAt(myStart
+ 1);
94 mySeenEscapedSpacesOnly
&= nextChar
== ' ';
95 if (myCanEscapeEolOrFramingSpaces
&&
96 (nextChar
== '\n' || nextChar
== ' ' && (mySeenEscapedSpacesOnly
|| isTrailingSpace(myStart
+2)))
98 return StringEscapesTokenTypes
.VALID_STRING_ESCAPE_TOKEN
;
100 if (nextChar
== 'u') {
101 for(int i
= myStart
+ 2; i
< myStart
+ 6; i
++) {
102 if (i
>= myEnd
|| !isHexDigit(myBuffer
.charAt(i
))) return StringEscapesTokenTypes
.INVALID_UNICODE_ESCAPE_TOKEN
;
104 return StringEscapesTokenTypes
.VALID_STRING_ESCAPE_TOKEN
;
124 return StringEscapesTokenTypes
.VALID_STRING_ESCAPE_TOKEN
;
126 if (myAdditionalValidEscapes
!= null && myAdditionalValidEscapes
.indexOf(nextChar
) != -1) {
127 return StringEscapesTokenTypes
.VALID_STRING_ESCAPE_TOKEN
;
130 return StringEscapesTokenTypes
.INVALID_CHARACTER_ESCAPE_TOKEN
;
133 // all subsequent chars are escaped spaces
134 private boolean isTrailingSpace(final int start
) {
135 for (int i
=start
;i
<myBufferEnd
;i
+=2) {
136 final char c
= myBuffer
.charAt(i
);
137 if (c
!= '\\') return false;
138 if (i
==myBufferEnd
-1) return false;
139 if (myBuffer
.charAt(i
+1) != ' ') return false;
144 public int getTokenStart() {
148 public int getTokenEnd() {
152 private int locateToken(int start
) {
153 if (start
== myBufferEnd
) {
154 myState
= AFTER_LAST_QUOTE
;
156 if (myState
== AFTER_LAST_QUOTE
) return start
;
158 if (myBuffer
.charAt(i
) == '\\') {
159 LOG
.assertTrue(myState
== AFTER_FIRST_QUOTE
);
161 if (i
== myBufferEnd
|| myBuffer
.charAt(i
) == '\n' && !myCanEscapeEolOrFramingSpaces
) {
162 myState
= AFTER_LAST_QUOTE
;
166 if (myBuffer
.charAt(i
) >= '0' && myBuffer
.charAt(i
) <= '7') {
167 char first
= myBuffer
.charAt(i
);
169 if (i
< myBufferEnd
&& myBuffer
.charAt(i
) >= '0' && myBuffer
.charAt(i
) <= '7') {
171 if (i
< myBufferEnd
&& first
<= '3' && myBuffer
.charAt(i
) >= '0' && myBuffer
.charAt(i
) <= '7') {
178 if (myBuffer
.charAt(i
) == 'u') {
180 for (; i
< start
+ 6; i
++) {
181 if (i
== myBufferEnd
|| myBuffer
.charAt(i
) == '\n' || myBuffer
.charAt(i
) == myQuoteChar
) {
192 LOG
.assertTrue(myState
== AFTER_FIRST_QUOTE
|| myBuffer
.charAt(i
) == myQuoteChar
);
193 while (i
< myBufferEnd
) {
194 if (myBuffer
.charAt(i
) == '\\') {
197 //if (myBuffer[i] == '\n') {
198 // myState = AFTER_LAST_QUOTE;
201 if (myState
== AFTER_FIRST_QUOTE
&& myBuffer
.charAt(i
) == myQuoteChar
) {
202 myState
= AFTER_LAST_QUOTE
;
206 myState
= AFTER_FIRST_QUOTE
;
213 public void advance() {
214 myLastState
= myState
;
216 myEnd
= locateToken(myStart
);
219 public CharSequence
getBufferSequence() {
223 public int getBufferEnd() {