git4idea: IDEADEV-41015, IDEADEV-40952: added support for text files without new...
[fedora-idea.git] / platform / platform-impl / src / com / intellij / openapi / fileEditor / impl / LoadTextUtil.java
blobfd0fcb549368edf6b00ffac2085cc9669a32af2d
1 /*
2 * Copyright 2000-2009 JetBrains s.r.o.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 package com.intellij.openapi.fileEditor.impl;
18 import com.intellij.Patches;
19 import com.intellij.lang.properties.charset.Native2AsciiCharset;
20 import com.intellij.openapi.fileTypes.*;
21 import com.intellij.openapi.project.Project;
22 import com.intellij.openapi.util.Key;
23 import com.intellij.openapi.util.Pair;
24 import com.intellij.openapi.util.text.StringUtil;
25 import com.intellij.openapi.vfs.CharsetToolkit;
26 import com.intellij.openapi.vfs.VirtualFile;
27 import com.intellij.openapi.vfs.encoding.EncodingManager;
28 import com.intellij.testFramework.LightVirtualFile;
29 import com.intellij.util.ArrayUtil;
30 import org.jetbrains.annotations.NotNull;
31 import org.jetbrains.annotations.Nullable;
33 import java.io.*;
34 import java.nio.ByteBuffer;
35 import java.nio.CharBuffer;
36 import java.nio.charset.Charset;
38 public final class LoadTextUtil {
39 static final Key<String> DETECTED_LINE_SEPARATOR_KEY = Key.create("DETECTED_LINE_SEPARATOR_KEY");
41 private LoadTextUtil() {
44 private static Pair<CharSequence, String> convertLineSeparators(final CharBuffer buffer) {
45 final int LF = 1;
46 final int CR = 2;
47 int line_separator = 0;
49 int dst = 0;
50 char prev = ' ';
51 final int length = buffer.length();
52 for (int src = 0; src < length; src++) {
53 char c = buffer.charAt(src);
54 switch (c) {
55 case '\r':
56 buffer.put(dst++, '\n');
57 line_separator = CR;
58 break;
59 case '\n':
60 if (prev == '\r') {
61 line_separator = CR + LF;
63 else {
64 buffer.put(dst++, '\n');
65 line_separator = LF;
67 break;
68 default:
69 buffer.put(dst++, c);
70 break;
72 prev = c;
75 String detectedLineSeparator = null;
76 switch (line_separator) {
77 case CR:
78 detectedLineSeparator = "\r";
79 break;
80 case LF:
81 detectedLineSeparator = "\n";
82 break;
83 case CR + LF:
84 detectedLineSeparator = "\r\n";
85 break;
88 CharSequence result;
89 if (buffer.length() == dst) {
90 result = buffer;
92 else {
93 result = buffer.subSequence(0, dst);
95 return Pair.create(result, detectedLineSeparator);
98 public static Charset detectCharset(final VirtualFile virtualFile, final byte[] content) {
99 Charset charset = dodetectCharset(virtualFile, content);
100 charset = charset == null ? EncodingManager.getInstance().getDefaultCharset() : charset;
101 if (virtualFile.getFileType() == StdFileTypes.PROPERTIES && EncodingManager.getInstance().isNative2AsciiForPropertiesFiles(virtualFile)) {
102 charset = Native2AsciiCharset.wrap(charset);
104 virtualFile.setCharset(charset);
105 return charset;
108 private static Charset dodetectCharset(final VirtualFile virtualFile, final byte[] content) {
109 EncodingManager settings = EncodingManager.getInstance();
110 boolean shouldGuess = settings != null && settings.isUseUTFGuessing(virtualFile);
111 CharsetToolkit toolkit = shouldGuess ? new CharsetToolkit(content, EncodingManager.getInstance().getDefaultCharset()) : null;
112 setUtfCharsetWasDetectedFromBytes(virtualFile, false);
113 if (shouldGuess) {
114 toolkit.setEnforce8Bit(true);
115 Charset charset = toolkit.guessFromBOM();
116 if (charset != null) {
117 setUtfCharsetWasDetectedFromBytes(virtualFile, true);
118 return charset;
120 CharsetToolkit.GuessedEncoding guessed = toolkit.guessFromContent(content.length);
121 if (guessed == CharsetToolkit.GuessedEncoding.VALID_UTF8) {
122 setUtfCharsetWasDetectedFromBytes(virtualFile, true);
123 return CharsetToolkit.UTF8_CHARSET; //UTF detected, ignore all directives
127 FileType fileType = virtualFile.getFileType();
128 String charsetName = fileType.getCharset(virtualFile, content);
130 if (charsetName == null) {
131 Charset saved = EncodingManager.getInstance().getEncoding(virtualFile, true);
132 if (saved != null) return saved;
134 return CharsetToolkit.forName(charsetName);
137 private static int skipBOM(final VirtualFile virtualFile, byte[] content) {
138 final byte[] bom = getBOM(content, Patches.SUN_BUG_ID_4508058? virtualFile.getCharset() : null);
139 if (bom.length != 0) {
140 virtualFile.setBOM(bom);
142 return bom.length;
145 @NotNull
146 private static byte[] getBOM(byte[] content, final Charset charset) {
147 if (Patches.SUN_BUG_ID_4508058) {
148 if (charset != null && charset.name().contains(CharsetToolkit.UTF8) && CharsetToolkit.hasUTF8Bom(content)) {
149 return CharsetToolkit.UTF8_BOM;
152 if (CharsetToolkit.hasUTF16LEBom(content)) {
153 return CharsetToolkit.UTF16LE_BOM;
155 if (CharsetToolkit.hasUTF16BEBom(content)) {
156 return CharsetToolkit.UTF16BE_BOM;
158 return ArrayUtil.EMPTY_BYTE_ARRAY;
162 * Gets the <code>Writer</code> for this file and sets modification stamp and time stamp to the specified values
163 * after closing the Writer.<p>
164 * <p/>
165 * Normally you should not use this method.
167 * @param project
168 *@param virtualFile
169 * @param requestor any object to control who called this method. Note that
170 * it is considered to be an external change if <code>requestor</code> is <code>null</code>.
171 * See {@link com.intellij.openapi.vfs.VirtualFileEvent#getRequestor}
172 * @param text
173 * @param newModificationStamp new modification stamp or -1 if no special value should be set @return <code>Writer</code>
174 * @throws java.io.IOException if an I/O error occurs
175 * @see VirtualFile#getModificationStamp()
177 @SuppressWarnings({"IOResourceOpenedButNotSafelyClosed"})
178 public static Writer getWriter(@Nullable Project project, final VirtualFile virtualFile, Object requestor, final String text, final long newModificationStamp)
179 throws IOException {
180 Charset existing = virtualFile.getCharset();
181 Charset specified = extractCharsetFromFileContent(project, virtualFile, text);
182 Charset charset = chooseMostlyHarmlessCharset(existing, specified, text);
183 if (charset != null && !charset.equals(existing)) {
184 virtualFile.setCharset(charset);
185 if (virtualFile.getBOM() != null) {
186 // prevent file to be reloaded in other encoding after save with BOM
187 setUtfCharsetWasDetectedFromBytes(virtualFile, true);
190 OutputStream outputStream = virtualFile.getOutputStream(requestor, newModificationStamp, -1);
191 return new BufferedWriter(charset == null ? new OutputStreamWriter(outputStream) : new OutputStreamWriter(outputStream, charset));
194 private static Charset chooseMostlyHarmlessCharset(Charset existing, Charset specified, String text) {
195 if (existing == null) return specified;
196 if (specified == null) return existing;
197 if (specified.equals(existing)) return specified;
198 if (isSupported(specified, text)) return specified; //if explicitly specified encoding is safe, return it
199 if (isSupported(existing, text)) return existing; //otherwise stick to the old encoding if it's ok
200 return specified; //if both are bad there is no difference
203 private static boolean isSupported(Charset charset, String str) {
204 if (!charset.canEncode()) return false;
205 ByteBuffer out = charset.encode(str);
206 CharBuffer buffer = charset.decode(out);
207 return str.equals(buffer.toString());
210 public static Charset extractCharsetFromFileContent(@Nullable Project project, final VirtualFile virtualFile, final String text) {
211 Charset charset = charsetFromContentOrNull(project, virtualFile, text);
212 if (charset == null) charset = virtualFile.getCharset();
213 return charset;
216 @Nullable("returns null if cannot determine from content")
217 public static Charset charsetFromContentOrNull(@Nullable Project project, @NotNull VirtualFile virtualFile, @NotNull String text) {
218 FileType fileType = virtualFile.getFileType();
219 if (fileType instanceof LanguageFileType) {
220 return ((LanguageFileType)fileType).extractCharsetFromFileContent(project, virtualFile, text);
222 return null;
225 public static CharSequence loadText(@NotNull VirtualFile file) {
226 return loadText(file, false);
229 public static CharSequence loadText(@NotNull VirtualFile file, final boolean allowMissingDecompiler) {
230 if (file instanceof LightVirtualFile) {
231 CharSequence content = ((LightVirtualFile)file).getContent();
232 if (StringUtil.indexOf(content, '\r') == -1) return content;
234 CharBuffer buffer = CharBuffer.allocate(content.length());
235 buffer.append(content);
236 buffer.rewind();
237 return convertLineSeparators(buffer).first;
240 assert !file.isDirectory() : "'"+file.getPresentableUrl() + "' is directory";
241 final FileType fileType = file.getFileType();
243 if (fileType.isBinary()) {
244 final BinaryFileDecompiler decompiler = BinaryFileTypeDecompilers.INSTANCE.forFileType(fileType);
245 if (decompiler != null) {
246 CharSequence text = decompiler.decompile(file);
247 StringUtil.assertValidSeparators(text);
248 return text;
251 if (allowMissingDecompiler) return null;
252 throw new IllegalArgumentException("Attempt to load text for binary file, that doesn't have decompiler plugged in: "+file.getPresentableUrl());
255 try {
256 byte[] bytes = file.contentsToByteArray();
257 return getTextByBinaryPresentation(bytes, file);
259 catch (IOException e) {
260 return ArrayUtil.EMPTY_CHAR_SEQUENCE;
264 @NotNull
265 public static CharSequence getTextByBinaryPresentation(@NotNull final byte[] bytes, @NotNull VirtualFile virtualFile) {
266 return getTextByBinaryPresentation(bytes, virtualFile, true);
269 @NotNull
270 public static CharSequence getTextByBinaryPresentation(@NotNull byte[] bytes, @NotNull VirtualFile virtualFile, final boolean rememberDetectedSeparators) {
271 final Charset charset = detectCharset(virtualFile, bytes);
272 final int offset = skipBOM(virtualFile, bytes);
274 final Pair<CharSequence, String> result = convertBytes(bytes, charset, offset);
275 if (rememberDetectedSeparators) {
276 virtualFile.putUserData(DETECTED_LINE_SEPARATOR_KEY, result.getSecond());
278 return result.getFirst();
282 * Get detected line separator, if the file never been loaded, is loaded if checkFile parameter is specified.
284 * @param file the file to check
285 * @param checkFile if the line separator was not detected before, try to detect it
286 * @return the detected line separator or null
288 @Nullable
289 public static String detectLineSeparator(@NotNull VirtualFile file, boolean checkFile) {
290 String lineSeparator = file.getUserData(DETECTED_LINE_SEPARATOR_KEY);
291 if (lineSeparator == null && checkFile) {
292 try {
293 getTextByBinaryPresentation(file.contentsToByteArray(), file);
294 lineSeparator = file.getUserData(DETECTED_LINE_SEPARATOR_KEY);
296 catch (IOException e) {
297 // null will be returned
300 return lineSeparator;
304 * Change line separator for the file to the specified value (assumes that the documents were saved)
306 * @param project the project instance
307 * @param requestor the requestor for the operation
308 * @param file the file to convert
309 * @param newLineSeparator the new line separator for the file
310 * @throws IOException in the case of IO problem
312 public static void changeLineSeparator(@Nullable Project project,
313 @Nullable Object requestor,
314 @NotNull VirtualFile file,
315 @NotNull String newLineSeparator) throws IOException {
316 String lineSeparator = file.getUserData(DETECTED_LINE_SEPARATOR_KEY);
317 if (lineSeparator != null && lineSeparator.equals(newLineSeparator)) {
318 return;
320 CharSequence cs = getTextByBinaryPresentation(file.contentsToByteArray(), file);
321 lineSeparator = file.getUserData(DETECTED_LINE_SEPARATOR_KEY);
322 if (lineSeparator == null || lineSeparator.equals(newLineSeparator)) {
323 return;
325 if (!newLineSeparator.equals("\n")) {
326 cs = StringUtil.convertLineSeparators(cs.toString(), newLineSeparator);
328 String text = cs.toString();
329 file.putUserData(DETECTED_LINE_SEPARATOR_KEY, newLineSeparator);
330 Writer w = getWriter(project, file, requestor, text, System.currentTimeMillis());
331 try {
332 w.write(text);
334 finally {
335 w.close();
339 @NotNull
340 public static CharSequence getTextByBinaryPresentation(@NotNull byte[] bytes, Charset charset) {
341 final int offset = getBOM(bytes, charset).length;
342 return convertBytes(bytes, charset, offset).getFirst();
345 @NotNull
346 private static Pair<CharSequence, String> convertBytes(@NotNull byte[] bytes, Charset charset, final int startOffset) {
347 ByteBuffer byteBuffer = ByteBuffer.wrap(bytes, startOffset, bytes.length - startOffset);
349 if (charset == null) {
350 charset = CharsetToolkit.getDefaultSystemCharset();
352 if (charset == null) {
353 //noinspection HardCodedStringLiteral
354 charset = Charset.forName("ISO-8859-1");
356 CharBuffer charBuffer = charset.decode(byteBuffer);
357 return convertLineSeparators(charBuffer);
360 private static final Key<Boolean> UTF_CHARSET_WAS_DETECTED_FROM_BYTES = new Key<Boolean>("UTF_CHARSET_WAS_DETECTED_FROM_BYTES");
361 public static boolean utfCharsetWasDetectedFromBytes(@NotNull VirtualFile virtualFile) {
362 return virtualFile.getUserData(UTF_CHARSET_WAS_DETECTED_FROM_BYTES) != null;
364 private static void setUtfCharsetWasDetectedFromBytes(@NotNull VirtualFile virtualFile, boolean flag) {
365 virtualFile.putUserData(UTF_CHARSET_WAS_DETECTED_FROM_BYTES, flag ? Boolean.TRUE : null);