Merge pull request #2212 from TwlyY29/bibtex-parser
[geany-mirror.git] / scintilla / lexlib / WordList.cxx
blobc8b4cd63b5a1e5a54365572956faf99dcda50380
1 // Scintilla source code edit control
2 /** @file WordList.cxx
3 ** Hold a list of words.
4 **/
5 // Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <cstdlib>
9 #include <cassert>
10 #include <cstring>
12 #include <algorithm>
13 #include <iterator>
15 #include "WordList.h"
17 using namespace Scintilla;
19 /**
20 * Creates an array that points into each word in the string and puts \0 terminators
21 * after each word.
23 static char **ArrayFromWordList(char *wordlist, int *len, bool onlyLineEnds = false) {
24 int prev = '\n';
25 int words = 0;
26 // For rapid determination of whether a character is a separator, build
27 // a look up table.
28 bool wordSeparator[256] = {}; // Initialise all to false.
29 wordSeparator[static_cast<unsigned int>('\r')] = true;
30 wordSeparator[static_cast<unsigned int>('\n')] = true;
31 if (!onlyLineEnds) {
32 wordSeparator[static_cast<unsigned int>(' ')] = true;
33 wordSeparator[static_cast<unsigned int>('\t')] = true;
35 for (int j = 0; wordlist[j]; j++) {
36 const int curr = static_cast<unsigned char>(wordlist[j]);
37 if (!wordSeparator[curr] && wordSeparator[prev])
38 words++;
39 prev = curr;
41 char **keywords = new char *[words + 1];
42 int wordsStore = 0;
43 const size_t slen = strlen(wordlist);
44 if (words) {
45 prev = '\0';
46 for (size_t k = 0; k < slen; k++) {
47 if (!wordSeparator[static_cast<unsigned char>(wordlist[k])]) {
48 if (!prev) {
49 keywords[wordsStore] = &wordlist[k];
50 wordsStore++;
52 } else {
53 wordlist[k] = '\0';
55 prev = wordlist[k];
58 assert(wordsStore < (words + 1));
59 keywords[wordsStore] = &wordlist[slen];
60 *len = wordsStore;
61 return keywords;
64 WordList::WordList(bool onlyLineEnds_) :
65 words(0), list(0), len(0), onlyLineEnds(onlyLineEnds_) {
66 // Prevent warnings by static analyzers about uninitialized starts.
67 starts[0] = -1;
70 WordList::~WordList() {
71 Clear();
74 WordList::operator bool() const {
75 return len ? true : false;
78 bool WordList::operator!=(const WordList &other) const {
79 if (len != other.len)
80 return true;
81 for (int i=0; i<len; i++) {
82 if (strcmp(words[i], other.words[i]) != 0)
83 return true;
85 return false;
88 int WordList::Length() const {
89 return len;
92 void WordList::Clear() {
93 if (words) {
94 delete []list;
95 delete []words;
97 words = 0;
98 list = 0;
99 len = 0;
102 #ifdef _MSC_VER
104 static bool cmpWords(const char *a, const char *b) {
105 return strcmp(a, b) < 0;
108 #else
110 static int cmpWords(const void *a, const void *b) {
111 return strcmp(*static_cast<const char * const *>(a), *static_cast<const char * const *>(b));
114 static void SortWordList(char **words, unsigned int len) {
115 qsort(words, len, sizeof(*words), cmpWords);
118 #endif
120 void WordList::Set(const char *s) {
121 Clear();
122 const size_t lenS = strlen(s) + 1;
123 list = new char[lenS];
124 memcpy(list, s, lenS);
125 words = ArrayFromWordList(list, &len, onlyLineEnds);
126 #ifdef _MSC_VER
127 std::sort(words, words + len, cmpWords);
128 #else
129 SortWordList(words, len);
130 #endif
131 std::fill(starts, std::end(starts), -1);
132 for (int l = len - 1; l >= 0; l--) {
133 unsigned char indexChar = words[l][0];
134 starts[indexChar] = l;
138 /** Check whether a string is in the list.
139 * List elements are either exact matches or prefixes.
140 * Prefix elements start with '^' and match all strings that start with the rest of the element
141 * so '^GTK_' matches 'GTK_X', 'GTK_MAJOR_VERSION', and 'GTK_'.
143 bool WordList::InList(const char *s) const {
144 if (0 == words)
145 return false;
146 const unsigned char firstChar = s[0];
147 int j = starts[firstChar];
148 if (j >= 0) {
149 while (words[j][0] == firstChar) {
150 if (s[1] == words[j][1]) {
151 const char *a = words[j] + 1;
152 const char *b = s + 1;
153 while (*a && *a == *b) {
154 a++;
155 b++;
157 if (!*a && !*b)
158 return true;
160 j++;
163 j = starts[static_cast<unsigned int>('^')];
164 if (j >= 0) {
165 while (words[j][0] == '^') {
166 const char *a = words[j] + 1;
167 const char *b = s;
168 while (*a && *a == *b) {
169 a++;
170 b++;
172 if (!*a)
173 return true;
174 j++;
177 return false;
180 /** similar to InList, but word s can be a substring of keyword.
181 * eg. the keyword define is defined as def~ine. This means the word must start
182 * with def to be a keyword, but also defi, defin and define are valid.
183 * The marker is ~ in this case.
185 bool WordList::InListAbbreviated(const char *s, const char marker) const {
186 if (0 == words)
187 return false;
188 const unsigned char firstChar = s[0];
189 int j = starts[firstChar];
190 if (j >= 0) {
191 while (words[j][0] == firstChar) {
192 bool isSubword = false;
193 int start = 1;
194 if (words[j][1] == marker) {
195 isSubword = true;
196 start++;
198 if (s[1] == words[j][start]) {
199 const char *a = words[j] + start;
200 const char *b = s + 1;
201 while (*a && *a == *b) {
202 a++;
203 if (*a == marker) {
204 isSubword = true;
205 a++;
207 b++;
209 if ((!*a || isSubword) && !*b)
210 return true;
212 j++;
215 j = starts[static_cast<unsigned int>('^')];
216 if (j >= 0) {
217 while (words[j][0] == '^') {
218 const char *a = words[j] + 1;
219 const char *b = s;
220 while (*a && *a == *b) {
221 a++;
222 b++;
224 if (!*a)
225 return true;
226 j++;
229 return false;
232 /** similar to InListAbbreviated, but word s can be a abridged version of a keyword.
233 * eg. the keyword is defined as "after.~:". This means the word must have a prefix (begins with) of
234 * "after." and suffix (ends with) of ":" to be a keyword, Hence "after.field:" , "after.form.item:" are valid.
235 * Similarly "~.is.valid" keyword is suffix only... hence "field.is.valid" , "form.is.valid" are valid.
236 * The marker is ~ in this case.
237 * No multiple markers check is done and wont work.
239 bool WordList::InListAbridged(const char *s, const char marker) const {
240 if (0 == words)
241 return false;
242 const unsigned char firstChar = s[0];
243 int j = starts[firstChar];
244 if (j >= 0) {
245 while (words[j][0] == firstChar) {
246 const char *a = words[j];
247 const char *b = s;
248 while (*a && *a == *b) {
249 a++;
250 if (*a == marker) {
251 a++;
252 const size_t suffixLengthA = strlen(a);
253 const size_t suffixLengthB = strlen(b);
254 if (suffixLengthA >= suffixLengthB)
255 break;
256 b = b + suffixLengthB - suffixLengthA - 1;
258 b++;
260 if (!*a && !*b)
261 return true;
262 j++;
266 j = starts[static_cast<unsigned int>(marker)];
267 if (j >= 0) {
268 while (words[j][0] == marker) {
269 const char *a = words[j] + 1;
270 const char *b = s;
271 const size_t suffixLengthA = strlen(a);
272 const size_t suffixLengthB = strlen(b);
273 if (suffixLengthA > suffixLengthB) {
274 j++;
275 continue;
277 b = b + suffixLengthB - suffixLengthA;
279 while (*a && *a == *b) {
280 a++;
281 b++;
283 if (!*a && !*b)
284 return true;
285 j++;
289 return false;
292 const char *WordList::WordAt(int n) const {
293 return words[n];