2 * Copyright 2000-2009 JetBrains s.r.o.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 package com
.intellij
.openapi
.fileEditor
.impl
;
18 import com
.intellij
.Patches
;
19 import com
.intellij
.lang
.properties
.charset
.Native2AsciiCharset
;
20 import com
.intellij
.openapi
.fileTypes
.*;
21 import com
.intellij
.openapi
.project
.Project
;
22 import com
.intellij
.openapi
.util
.Key
;
23 import com
.intellij
.openapi
.util
.Pair
;
24 import com
.intellij
.openapi
.util
.text
.StringUtil
;
25 import com
.intellij
.openapi
.vfs
.CharsetToolkit
;
26 import com
.intellij
.openapi
.vfs
.VirtualFile
;
27 import com
.intellij
.openapi
.vfs
.encoding
.EncodingManager
;
28 import com
.intellij
.testFramework
.LightVirtualFile
;
29 import com
.intellij
.util
.ArrayUtil
;
30 import org
.jetbrains
.annotations
.NotNull
;
31 import org
.jetbrains
.annotations
.Nullable
;
34 import java
.nio
.ByteBuffer
;
35 import java
.nio
.CharBuffer
;
36 import java
.nio
.charset
.Charset
;
38 public final class LoadTextUtil
{
39 static final Key
<String
> DETECTED_LINE_SEPARATOR_KEY
= Key
.create("DETECTED_LINE_SEPARATOR_KEY");
41 private LoadTextUtil() {
44 private static Pair
<CharSequence
, String
> convertLineSeparators(final CharBuffer buffer
) {
47 int line_separator
= 0;
51 final int length
= buffer
.length();
52 for (int src
= 0; src
< length
; src
++) {
53 char c
= buffer
.charAt(src
);
56 buffer
.put(dst
++, '\n');
61 line_separator
= CR
+ LF
;
64 buffer
.put(dst
++, '\n');
75 String detectedLineSeparator
= null;
76 switch (line_separator
) {
78 detectedLineSeparator
= "\r";
81 detectedLineSeparator
= "\n";
84 detectedLineSeparator
= "\r\n";
89 if (buffer
.length() == dst
) {
93 result
= buffer
.subSequence(0, dst
);
95 return Pair
.create(result
, detectedLineSeparator
);
98 public static Charset
detectCharset(final VirtualFile virtualFile
, final byte[] content
) {
99 Charset charset
= dodetectCharset(virtualFile
, content
);
100 charset
= charset
== null ? EncodingManager
.getInstance().getDefaultCharset() : charset
;
101 if (virtualFile
.getFileType() == StdFileTypes
.PROPERTIES
&& EncodingManager
.getInstance().isNative2AsciiForPropertiesFiles(virtualFile
)) {
102 charset
= Native2AsciiCharset
.wrap(charset
);
104 virtualFile
.setCharset(charset
);
108 private static Charset
dodetectCharset(final VirtualFile virtualFile
, final byte[] content
) {
109 EncodingManager settings
= EncodingManager
.getInstance();
110 boolean shouldGuess
= settings
!= null && settings
.isUseUTFGuessing(virtualFile
);
111 CharsetToolkit toolkit
= shouldGuess ?
new CharsetToolkit(content
, EncodingManager
.getInstance().getDefaultCharset()) : null;
112 setUtfCharsetWasDetectedFromBytes(virtualFile
, false);
114 toolkit
.setEnforce8Bit(true);
115 Charset charset
= toolkit
.guessFromBOM();
116 if (charset
!= null) {
117 setUtfCharsetWasDetectedFromBytes(virtualFile
, true);
120 CharsetToolkit
.GuessedEncoding guessed
= toolkit
.guessFromContent(content
.length
);
121 if (guessed
== CharsetToolkit
.GuessedEncoding
.VALID_UTF8
) {
122 setUtfCharsetWasDetectedFromBytes(virtualFile
, true);
123 return CharsetToolkit
.UTF8_CHARSET
; //UTF detected, ignore all directives
127 FileType fileType
= virtualFile
.getFileType();
128 String charsetName
= fileType
.getCharset(virtualFile
, content
);
130 if (charsetName
== null) {
131 Charset saved
= EncodingManager
.getInstance().getEncoding(virtualFile
, true);
132 if (saved
!= null) return saved
;
134 return CharsetToolkit
.forName(charsetName
);
137 private static int skipBOM(final VirtualFile virtualFile
, byte[] content
) {
138 final byte[] bom
= getBOM(content
, Patches
.SUN_BUG_ID_4508058? virtualFile
.getCharset() : null);
139 if (bom
.length
!= 0) {
140 virtualFile
.setBOM(bom
);
146 private static byte[] getBOM(byte[] content
, final Charset charset
) {
147 if (Patches
.SUN_BUG_ID_4508058
) {
148 if (charset
!= null && charset
.name().contains(CharsetToolkit
.UTF8
) && CharsetToolkit
.hasUTF8Bom(content
)) {
149 return CharsetToolkit
.UTF8_BOM
;
152 if (CharsetToolkit
.hasUTF16LEBom(content
)) {
153 return CharsetToolkit
.UTF16LE_BOM
;
155 if (CharsetToolkit
.hasUTF16BEBom(content
)) {
156 return CharsetToolkit
.UTF16BE_BOM
;
158 return ArrayUtil
.EMPTY_BYTE_ARRAY
;
162 * Gets the <code>Writer</code> for this file and sets modification stamp and time stamp to the specified values
163 * after closing the Writer.<p>
165 * Normally you should not use this method.
169 * @param requestor any object to control who called this method. Note that
170 * it is considered to be an external change if <code>requestor</code> is <code>null</code>.
171 * See {@link com.intellij.openapi.vfs.VirtualFileEvent#getRequestor}
173 * @param newModificationStamp new modification stamp or -1 if no special value should be set @return <code>Writer</code>
174 * @throws java.io.IOException if an I/O error occurs
175 * @see VirtualFile#getModificationStamp()
177 @SuppressWarnings({"IOResourceOpenedButNotSafelyClosed"})
178 public static Writer
getWriter(@Nullable Project project
, final VirtualFile virtualFile
, Object requestor
, final String text
, final long newModificationStamp
)
180 Charset existing
= virtualFile
.getCharset();
181 Charset specified
= extractCharsetFromFileContent(project
, virtualFile
, text
);
182 Charset charset
= chooseMostlyHarmlessCharset(existing
, specified
, text
);
183 if (charset
!= null && !charset
.equals(existing
)) {
184 virtualFile
.setCharset(charset
);
185 if (virtualFile
.getBOM() != null) {
186 // prevent file to be reloaded in other encoding after save with BOM
187 setUtfCharsetWasDetectedFromBytes(virtualFile
, true);
190 OutputStream outputStream
= virtualFile
.getOutputStream(requestor
, newModificationStamp
, -1);
191 return new BufferedWriter(charset
== null ?
new OutputStreamWriter(outputStream
) : new OutputStreamWriter(outputStream
, charset
));
194 private static Charset
chooseMostlyHarmlessCharset(Charset existing
, Charset specified
, String text
) {
195 if (existing
== null) return specified
;
196 if (specified
== null) return existing
;
197 if (specified
.equals(existing
)) return specified
;
198 if (isSupported(specified
, text
)) return specified
; //if explicitly specified encoding is safe, return it
199 if (isSupported(existing
, text
)) return existing
; //otherwise stick to the old encoding if it's ok
200 return specified
; //if both are bad there is no difference
203 private static boolean isSupported(Charset charset
, String str
) {
204 if (!charset
.canEncode()) return false;
205 ByteBuffer out
= charset
.encode(str
);
206 CharBuffer buffer
= charset
.decode(out
);
207 return str
.equals(buffer
.toString());
210 public static Charset
extractCharsetFromFileContent(@Nullable Project project
, final VirtualFile virtualFile
, final String text
) {
211 Charset charset
= charsetFromContentOrNull(project
, virtualFile
, text
);
212 if (charset
== null) charset
= virtualFile
.getCharset();
216 @Nullable("returns null if cannot determine from content")
217 public static Charset
charsetFromContentOrNull(@Nullable Project project
, @NotNull VirtualFile virtualFile
, @NotNull String text
) {
218 FileType fileType
= virtualFile
.getFileType();
219 if (fileType
instanceof LanguageFileType
) {
220 return ((LanguageFileType
)fileType
).extractCharsetFromFileContent(project
, virtualFile
, text
);
225 public static CharSequence
loadText(@NotNull VirtualFile file
) {
226 return loadText(file
, false);
229 public static CharSequence
loadText(@NotNull VirtualFile file
, final boolean allowMissingDecompiler
) {
230 if (file
instanceof LightVirtualFile
) {
231 CharSequence content
= ((LightVirtualFile
)file
).getContent();
232 if (StringUtil
.indexOf(content
, '\r') == -1) return content
;
234 CharBuffer buffer
= CharBuffer
.allocate(content
.length());
235 buffer
.append(content
);
237 return convertLineSeparators(buffer
).first
;
240 assert !file
.isDirectory() : "'"+file
.getPresentableUrl() + "' is directory";
241 final FileType fileType
= file
.getFileType();
243 if (fileType
.isBinary()) {
244 final BinaryFileDecompiler decompiler
= BinaryFileTypeDecompilers
.INSTANCE
.forFileType(fileType
);
245 if (decompiler
!= null) {
246 CharSequence text
= decompiler
.decompile(file
);
247 StringUtil
.assertValidSeparators(text
);
251 if (allowMissingDecompiler
) return null;
252 throw new IllegalArgumentException("Attempt to load text for binary file, that doesn't have decompiler plugged in: "+file
.getPresentableUrl());
256 byte[] bytes
= file
.contentsToByteArray();
257 return getTextByBinaryPresentation(bytes
, file
);
259 catch (IOException e
) {
260 return ArrayUtil
.EMPTY_CHAR_SEQUENCE
;
265 public static CharSequence
getTextByBinaryPresentation(@NotNull final byte[] bytes
, @NotNull VirtualFile virtualFile
) {
266 return getTextByBinaryPresentation(bytes
, virtualFile
, true);
270 public static CharSequence
getTextByBinaryPresentation(@NotNull byte[] bytes
, @NotNull VirtualFile virtualFile
, final boolean rememberDetectedSeparators
) {
271 final Charset charset
= detectCharset(virtualFile
, bytes
);
272 final int offset
= skipBOM(virtualFile
, bytes
);
274 final Pair
<CharSequence
, String
> result
= convertBytes(bytes
, charset
, offset
);
275 if (rememberDetectedSeparators
) {
276 virtualFile
.putUserData(DETECTED_LINE_SEPARATOR_KEY
, result
.getSecond());
278 return result
.getFirst();
282 * Get detected line separator, if the file never been loaded, is loaded if checkFile parameter is specified.
284 * @param file the file to check
285 * @param checkFile if the line separator was not detected before, try to detect it
286 * @return the detected line separator or null
289 public static String
detectLineSeparator(@NotNull VirtualFile file
, boolean checkFile
) {
290 String lineSeparator
= file
.getUserData(DETECTED_LINE_SEPARATOR_KEY
);
291 if (lineSeparator
== null && checkFile
) {
293 getTextByBinaryPresentation(file
.contentsToByteArray(), file
);
294 lineSeparator
= file
.getUserData(DETECTED_LINE_SEPARATOR_KEY
);
296 catch (IOException e
) {
297 // null will be returned
300 return lineSeparator
;
304 * Change line separator for the file to the specified value (assumes that the documents were saved)
306 * @param project the project instance
307 * @param requestor the requestor for the operation
308 * @param file the file to convert
309 * @param newLineSeparator the new line separator for the file
310 * @throws IOException in the case of IO problem
312 public static void changeLineSeparator(@Nullable Project project
,
313 @Nullable Object requestor
,
314 @NotNull VirtualFile file
,
315 @NotNull String newLineSeparator
) throws IOException
{
316 String lineSeparator
= file
.getUserData(DETECTED_LINE_SEPARATOR_KEY
);
317 if (lineSeparator
!= null && lineSeparator
.equals(newLineSeparator
)) {
320 CharSequence cs
= getTextByBinaryPresentation(file
.contentsToByteArray(), file
);
321 lineSeparator
= file
.getUserData(DETECTED_LINE_SEPARATOR_KEY
);
322 if (lineSeparator
== null || lineSeparator
.equals(newLineSeparator
)) {
325 if (!newLineSeparator
.equals("\n")) {
326 cs
= StringUtil
.convertLineSeparators(cs
.toString(), newLineSeparator
);
328 String text
= cs
.toString();
329 file
.putUserData(DETECTED_LINE_SEPARATOR_KEY
, newLineSeparator
);
330 Writer w
= getWriter(project
, file
, requestor
, text
, System
.currentTimeMillis());
340 public static CharSequence
getTextByBinaryPresentation(@NotNull byte[] bytes
, Charset charset
) {
341 final int offset
= getBOM(bytes
, charset
).length
;
342 return convertBytes(bytes
, charset
, offset
).getFirst();
346 private static Pair
<CharSequence
, String
> convertBytes(@NotNull byte[] bytes
, Charset charset
, final int startOffset
) {
347 ByteBuffer byteBuffer
= ByteBuffer
.wrap(bytes
, startOffset
, bytes
.length
- startOffset
);
349 if (charset
== null) {
350 charset
= CharsetToolkit
.getDefaultSystemCharset();
352 if (charset
== null) {
353 //noinspection HardCodedStringLiteral
354 charset
= Charset
.forName("ISO-8859-1");
356 CharBuffer charBuffer
= charset
.decode(byteBuffer
);
357 return convertLineSeparators(charBuffer
);
360 private static final Key
<Boolean
> UTF_CHARSET_WAS_DETECTED_FROM_BYTES
= new Key
<Boolean
>("UTF_CHARSET_WAS_DETECTED_FROM_BYTES");
361 public static boolean utfCharsetWasDetectedFromBytes(@NotNull VirtualFile virtualFile
) {
362 return virtualFile
.getUserData(UTF_CHARSET_WAS_DETECTED_FROM_BYTES
) != null;
364 private static void setUtfCharsetWasDetectedFromBytes(@NotNull VirtualFile virtualFile
, boolean flag
) {
365 virtualFile
.putUserData(UTF_CHARSET_WAS_DETECTED_FROM_BYTES
, flag ? Boolean
.TRUE
: null);