compilation fix?
[fedora-idea.git] / plugins / spellchecker / src / com / intellij / spellchecker / inspections / TextSplitter.java
blobf12b64eb5aad3903b6e631e2aec3758ca375d973
1 /*
2 * Copyright 2000-2009 JetBrains s.r.o.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 package com.intellij.spellchecker.inspections;
18 import com.intellij.openapi.util.TextRange;
19 import com.intellij.openapi.util.text.StringUtil;
20 import com.intellij.psi.codeStyle.NameUtil;
21 import com.intellij.spellchecker.util.Strings;
22 import org.jetbrains.annotations.NonNls;
23 import org.jetbrains.annotations.NotNull;
24 import org.jetbrains.annotations.Nullable;
26 import java.util.ArrayList;
27 import java.util.List;
28 import java.util.regex.Matcher;
29 import java.util.regex.Pattern;
31 /**
32 * @author shkate@jetbrains.com
34 public class TextSplitter {
36 @NonNls
37 private static final Pattern NON_SPACE = Pattern.compile("\\S+");
39 @NonNls
40 private static final Pattern HTML = Pattern.compile("<(0)>");
42 @NonNls
44 private static final Pattern WORD = Pattern.compile("\\b\\p{L}*'?\\p{L}*");
46 private static final Pattern EXTENDED_WORD = Pattern.compile("\\b\\p{L}*'?\\p{L}(_*\\p{L})*");
48 private static final String WORD_SPLITTER = "\\s+|<[^>]+>";
51 @NonNls
52 private static final Pattern URL = Pattern.compile("(https?|ftp|mailto)\\:\\/\\/");
53 @NonNls
54 private static final Pattern COMPLEX = Pattern.compile("(\\.[^\\.]+)|([@]+)");
56 @NonNls
57 private static final Pattern SPECIAL = Pattern.compile("^&\\p{Alnum}{4};");
60 private TextSplitter() {
64 @Nullable
65 public static List<CheckArea> splitText(@Nullable String text) {
66 if (text == null || StringUtil.isEmpty(text)) {
67 return null;
70 int i = Math.max(text.indexOf("<!--"), text.indexOf("<%--"));
71 i = (i > -1) ? i + 4 : 0;
72 List<CheckArea> results = new ArrayList<CheckArea>();
73 String[] pieces = text.substring(i).split(WORD_SPLITTER);
74 for (String s : pieces) {
75 if (s.length() > 0) {
76 int p1 = text.indexOf(s, i);
77 TextRange range = TextRange.from(p1, s.length());
78 List<CheckArea> areaList = splitNonSpace(text, range);
79 if (areaList != null) {
80 results.addAll(areaList);
82 i += (range.getEndOffset() - range.getStartOffset());
85 return (results.size() == 0) ? null : results;
88 @Nullable
89 private static List<CheckArea> splitNonSpace(String text, TextRange range) {
90 String nonSpaceArea = text.substring(range.getStartOffset(), range.getEndOffset());
91 if (URL.matcher(nonSpaceArea).find() || COMPLEX.matcher(nonSpaceArea).find()) {
92 return null;
94 return splitWord(text, range);
98 @NotNull
99 private static List<CheckArea> splitSimpleWord(String text, TextRange range) {
100 List<CheckArea> results = new ArrayList<CheckArea>();
101 if (text==null || range==null || range.getLength()<1){
102 return results;
104 String word = text.substring(range.getStartOffset(), range.getEndOffset());
105 String[] words = NameUtil.splitNameIntoWords(word);
106 if (words == null || words.length==0) {
107 return results;
110 if (words.length == 1) {
111 Matcher matcher = WORD.matcher(words[0]);
112 if (matcher.find()) {
113 TextRange found = matcherRange(range, matcher);
114 addWord(text, results, false, found);
116 return results;
119 boolean isCapitalized = Strings.isCapitalized(words[0]);
120 boolean containsShortWord = containsShortWord(words);
122 if (isCapitalized && containsShortWord) {
123 results.add(new CheckArea(text, range, true));
124 return results;
127 boolean isAllWordsAreUpperCased = isAllWordsAreUpperCased(words);
128 int index = 0;
129 for (String s : words) {
130 int start = word.indexOf(s, index);
131 int end = start + s.length();
132 boolean isUpperCase = Strings.isUpperCase(s);
133 boolean flag = (isUpperCase && !isAllWordsAreUpperCased) || isKeyword(s);
134 Matcher matcher = WORD.matcher(s);
135 if (matcher.find()) {
136 TextRange found = matcherRange(subRange(range, start, end), matcher);
137 addWord(text, results, flag, found);
139 index = end;
141 return results;
145 @Nullable
146 private static List<CheckArea> splitWord(String text, TextRange range) {
147 if (StringUtil.isEmpty(text) || range.getLength() <= 1) {
148 return null;
151 List<CheckArea> results = new ArrayList<CheckArea>();
152 String word = text.substring(range.getStartOffset(), range.getEndOffset());
154 Matcher specialMatcher = SPECIAL.matcher(word);
155 if (specialMatcher.find()) {
156 TextRange found = matcherRange(range, specialMatcher);
157 addWord(text, results, true, found);
158 return results;
161 Matcher extendedMatcher = EXTENDED_WORD.matcher(word);
162 if (extendedMatcher.find()) {
163 TextRange found = matcherRange(range, extendedMatcher);
164 results.addAll(splitSimpleWord(text, found));
167 return results;
171 private static void addWord(String text, List<CheckArea> results, boolean flag, TextRange found) {
172 boolean tooShort = (found.getEndOffset() - found.getStartOffset()) <= 3;
173 results.add(new CheckArea(text, found, flag || tooShort));
176 private static boolean isKeyword(String s) {
177 return false;
180 private static boolean isAllWordsAreUpperCased(String[] words) {
181 if (words == null) return false;
182 for (String word : words) {
183 if (!Strings.isUpperCase(word)) {
184 return false;
187 return true;
190 private static boolean containsShortWord(String[] words) {
191 if (words == null) return false;
192 for (String word : words) {
193 if (word.length() < 2) {
194 return true;
197 return false;
200 @NotNull
201 private static TextRange matcherRange(@NotNull TextRange range, @NotNull Matcher matcher) {
202 return subRange(range, matcher.start(), matcher.end());
205 @NotNull
206 private static TextRange subRange(@NotNull TextRange range, int start, int end) {
207 return TextRange.from(range.getStartOffset() + start, end - start);