Update Scintilla to 4.0.4
[TortoiseGit.git] / ext / scintilla / lexlib / WordList.cxx
bloba0e812b02dd18b6cdcdf8a7ef01b74d5b0955d3b
1 // Scintilla source code edit control
2 /** @file WordList.cxx
3 ** Hold a list of words.
4 **/
5 // Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <cstdlib>
9 #include <cassert>
10 #include <cstring>
12 #include <algorithm>
13 #include <iterator>
15 #include "StringCopy.h"
16 #include "WordList.h"
18 using namespace Scintilla;
20 /**
21 * Creates an array that points into each word in the string and puts \0 terminators
22 * after each word.
24 static char **ArrayFromWordList(char *wordlist, int *len, bool onlyLineEnds = false) {
25 int prev = '\n';
26 int words = 0;
27 // For rapid determination of whether a character is a separator, build
28 // a look up table.
29 bool wordSeparator[256] = {}; // Initialise all to false.
30 wordSeparator[static_cast<unsigned int>('\r')] = true;
31 wordSeparator[static_cast<unsigned int>('\n')] = true;
32 if (!onlyLineEnds) {
33 wordSeparator[static_cast<unsigned int>(' ')] = true;
34 wordSeparator[static_cast<unsigned int>('\t')] = true;
36 for (int j = 0; wordlist[j]; j++) {
37 const int curr = static_cast<unsigned char>(wordlist[j]);
38 if (!wordSeparator[curr] && wordSeparator[prev])
39 words++;
40 prev = curr;
42 char **keywords = new char *[words + 1];
43 int wordsStore = 0;
44 const size_t slen = strlen(wordlist);
45 if (words) {
46 prev = '\0';
47 for (size_t k = 0; k < slen; k++) {
48 if (!wordSeparator[static_cast<unsigned char>(wordlist[k])]) {
49 if (!prev) {
50 keywords[wordsStore] = &wordlist[k];
51 wordsStore++;
53 } else {
54 wordlist[k] = '\0';
56 prev = wordlist[k];
59 assert(wordsStore < (words + 1));
60 keywords[wordsStore] = &wordlist[slen];
61 *len = wordsStore;
62 return keywords;
65 WordList::WordList(bool onlyLineEnds_) :
66 words(0), list(0), len(0), onlyLineEnds(onlyLineEnds_) {
67 // Prevent warnings by static analyzers about uninitialized starts.
68 starts[0] = -1;
71 WordList::~WordList() {
72 Clear();
75 WordList::operator bool() const {
76 return len ? true : false;
79 bool WordList::operator!=(const WordList &other) const {
80 if (len != other.len)
81 return true;
82 for (int i=0; i<len; i++) {
83 if (strcmp(words[i], other.words[i]) != 0)
84 return true;
86 return false;
89 int WordList::Length() const {
90 return len;
93 void WordList::Clear() {
94 if (words) {
95 delete []list;
96 delete []words;
98 words = 0;
99 list = 0;
100 len = 0;
103 #ifdef _MSC_VER
105 static bool cmpWords(const char *a, const char *b) {
106 return strcmp(a, b) < 0;
109 #else
111 static int cmpWords(const void *a, const void *b) {
112 return strcmp(*static_cast<const char * const *>(a), *static_cast<const char * const *>(b));
115 static void SortWordList(char **words, unsigned int len) {
116 qsort(static_cast<void *>(words), len, sizeof(*words), cmpWords);
119 #endif
121 void WordList::Set(const char *s) {
122 Clear();
123 const size_t lenS = strlen(s) + 1;
124 list = new char[lenS];
125 memcpy(list, s, lenS);
126 words = ArrayFromWordList(list, &len, onlyLineEnds);
127 #ifdef _MSC_VER
128 std::sort(words, words + len, cmpWords);
129 #else
130 SortWordList(words, len);
131 #endif
132 std::fill(starts, std::end(starts), -1);
133 for (int l = len - 1; l >= 0; l--) {
134 unsigned char indexChar = words[l][0];
135 starts[indexChar] = l;
139 /** Check whether a string is in the list.
140 * List elements are either exact matches or prefixes.
141 * Prefix elements start with '^' and match all strings that start with the rest of the element
142 * so '^GTK_' matches 'GTK_X', 'GTK_MAJOR_VERSION', and 'GTK_'.
144 bool WordList::InList(const char *s) const {
145 if (0 == words)
146 return false;
147 const unsigned char firstChar = s[0];
148 int j = starts[firstChar];
149 if (j >= 0) {
150 while (static_cast<unsigned char>(words[j][0]) == firstChar) {
151 if (s[1] == words[j][1]) {
152 const char *a = words[j] + 1;
153 const char *b = s + 1;
154 while (*a && *a == *b) {
155 a++;
156 b++;
158 if (!*a && !*b)
159 return true;
161 j++;
164 j = starts[static_cast<unsigned int>('^')];
165 if (j >= 0) {
166 while (words[j][0] == '^') {
167 const char *a = words[j] + 1;
168 const char *b = s;
169 while (*a && *a == *b) {
170 a++;
171 b++;
173 if (!*a)
174 return true;
175 j++;
178 return false;
181 /** similar to InList, but word s can be a substring of keyword.
182 * eg. the keyword define is defined as def~ine. This means the word must start
183 * with def to be a keyword, but also defi, defin and define are valid.
184 * The marker is ~ in this case.
186 bool WordList::InListAbbreviated(const char *s, const char marker) const {
187 if (0 == words)
188 return false;
189 const unsigned char firstChar = s[0];
190 int j = starts[firstChar];
191 if (j >= 0) {
192 while (static_cast<unsigned char>(words[j][0]) == firstChar) {
193 bool isSubword = false;
194 int start = 1;
195 if (words[j][1] == marker) {
196 isSubword = true;
197 start++;
199 if (s[1] == words[j][start]) {
200 const char *a = words[j] + start;
201 const char *b = s + 1;
202 while (*a && *a == *b) {
203 a++;
204 if (*a == marker) {
205 isSubword = true;
206 a++;
208 b++;
210 if ((!*a || isSubword) && !*b)
211 return true;
213 j++;
216 j = starts[static_cast<unsigned int>('^')];
217 if (j >= 0) {
218 while (words[j][0] == '^') {
219 const char *a = words[j] + 1;
220 const char *b = s;
221 while (*a && *a == *b) {
222 a++;
223 b++;
225 if (!*a)
226 return true;
227 j++;
230 return false;
233 /** similar to InListAbbreviated, but word s can be a abridged version of a keyword.
234 * eg. the keyword is defined as "after.~:". This means the word must have a prefix (begins with) of
235 * "after." and suffix (ends with) of ":" to be a keyword, Hence "after.field:" , "after.form.item:" are valid.
236 * Similarly "~.is.valid" keyword is suffix only... hence "field.is.valid" , "form.is.valid" are valid.
237 * The marker is ~ in this case.
238 * No multiple markers check is done and wont work.
240 bool WordList::InListAbridged(const char *s, const char marker) const {
241 if (0 == words)
242 return false;
243 const unsigned char firstChar = s[0];
244 int j = starts[firstChar];
245 if (j >= 0) {
246 while (static_cast<unsigned char>(words[j][0]) == firstChar) {
247 const char *a = words[j];
248 const char *b = s;
249 while (*a && *a == *b) {
250 a++;
251 if (*a == marker) {
252 a++;
253 const size_t suffixLengthA = strlen(a);
254 const size_t suffixLengthB = strlen(b);
255 if (suffixLengthA >= suffixLengthB)
256 break;
257 b = b + suffixLengthB - suffixLengthA - 1;
259 b++;
261 if (!*a && !*b)
262 return true;
263 j++;
267 j = starts[static_cast<unsigned int>(marker)];
268 if (j >= 0) {
269 while (words[j][0] == marker) {
270 const char *a = words[j] + 1;
271 const char *b = s;
272 const size_t suffixLengthA = strlen(a);
273 const size_t suffixLengthB = strlen(b);
274 if (suffixLengthA > suffixLengthB) {
275 j++;
276 continue;
278 b = b + suffixLengthB - suffixLengthA;
280 while (*a && *a == *b) {
281 a++;
282 b++;
284 if (!*a && !*b)
285 return true;
286 j++;
290 return false;
293 const char *WordList::WordAt(int n) const {
294 return words[n];