Update Scintilla to version 3.5.3
[TortoiseGit.git] / ext / scintilla / lexlib / WordList.cxx
blob7ac00ad2b74bedf16d2b8396d6cdb2f7a54076f2
1 // Scintilla source code edit control
2 /** @file KeyWords.cxx
3 ** Colourise for particular languages.
4 **/
5 // Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <stdarg.h>
12 #include <ctype.h>
14 #include <algorithm>
16 #include "StringCopy.h"
17 #include "WordList.h"
19 #ifdef SCI_NAMESPACE
20 using namespace Scintilla;
21 #endif
23 /**
24 * Creates an array that points into each word in the string and puts \0 terminators
25 * after each word.
27 static char **ArrayFromWordList(char *wordlist, int *len, bool onlyLineEnds = false) {
28 int prev = '\n';
29 int words = 0;
30 // For rapid determination of whether a character is a separator, build
31 // a look up table.
32 bool wordSeparator[256];
33 for (int i=0; i<256; i++) {
34 wordSeparator[i] = false;
36 wordSeparator[static_cast<unsigned int>('\r')] = true;
37 wordSeparator[static_cast<unsigned int>('\n')] = true;
38 if (!onlyLineEnds) {
39 wordSeparator[static_cast<unsigned int>(' ')] = true;
40 wordSeparator[static_cast<unsigned int>('\t')] = true;
42 for (int j = 0; wordlist[j]; j++) {
43 int curr = static_cast<unsigned char>(wordlist[j]);
44 if (!wordSeparator[curr] && wordSeparator[prev])
45 words++;
46 prev = curr;
48 char **keywords = new char *[words + 1];
49 int wordsStore = 0;
50 const size_t slen = strlen(wordlist);
51 if (words) {
52 prev = '\0';
53 for (size_t k = 0; k < slen; k++) {
54 if (!wordSeparator[static_cast<unsigned char>(wordlist[k])]) {
55 if (!prev) {
56 keywords[wordsStore] = &wordlist[k];
57 wordsStore++;
59 } else {
60 wordlist[k] = '\0';
62 prev = wordlist[k];
65 keywords[wordsStore] = &wordlist[slen];
66 *len = wordsStore;
67 return keywords;
70 WordList::WordList(bool onlyLineEnds_) :
71 words(0), list(0), len(0), onlyLineEnds(onlyLineEnds_) {
72 // Prevent warnings by static analyzers about uninitialized starts.
73 starts[0] = -1;
76 WordList::~WordList() {
77 Clear();
80 WordList::operator bool() const {
81 return len ? true : false;
84 bool WordList::operator!=(const WordList &other) const {
85 if (len != other.len)
86 return true;
87 for (int i=0; i<len; i++) {
88 if (strcmp(words[i], other.words[i]) != 0)
89 return true;
91 return false;
94 int WordList::Length() const {
95 return len;
98 void WordList::Clear() {
99 if (words) {
100 delete []list;
101 delete []words;
103 words = 0;
104 list = 0;
105 len = 0;
108 #ifdef _MSC_VER
110 static bool cmpWords(const char *a, const char *b) {
111 return strcmp(a, b) < 0;
114 #else
116 static int cmpWords(const void *a, const void *b) {
117 return strcmp(*static_cast<const char * const *>(a), *static_cast<const char * const *>(b));
120 static void SortWordList(char **words, unsigned int len) {
121 qsort(reinterpret_cast<void *>(words), len, sizeof(*words), cmpWords);
124 #endif
126 void WordList::Set(const char *s) {
127 Clear();
128 const size_t lenS = strlen(s) + 1;
129 list = new char[lenS];
130 memcpy(list, s, lenS);
131 words = ArrayFromWordList(list, &len, onlyLineEnds);
132 #ifdef _MSC_VER
133 std::sort(words, words + len, cmpWords);
134 #else
135 SortWordList(words, len);
136 #endif
137 for (unsigned int k = 0; k < ELEMENTS(starts); k++)
138 starts[k] = -1;
139 for (int l = len - 1; l >= 0; l--) {
140 unsigned char indexChar = words[l][0];
141 starts[indexChar] = l;
145 /** Check whether a string is in the list.
146 * List elements are either exact matches or prefixes.
147 * Prefix elements start with '^' and match all strings that start with the rest of the element
148 * so '^GTK_' matches 'GTK_X', 'GTK_MAJOR_VERSION', and 'GTK_'.
150 bool WordList::InList(const char *s) const {
151 if (0 == words)
152 return false;
153 unsigned char firstChar = s[0];
154 int j = starts[firstChar];
155 if (j >= 0) {
156 while (static_cast<unsigned char>(words[j][0]) == firstChar) {
157 if (s[1] == words[j][1]) {
158 const char *a = words[j] + 1;
159 const char *b = s + 1;
160 while (*a && *a == *b) {
161 a++;
162 b++;
164 if (!*a && !*b)
165 return true;
167 j++;
170 j = starts[static_cast<unsigned int>('^')];
171 if (j >= 0) {
172 while (words[j][0] == '^') {
173 const char *a = words[j] + 1;
174 const char *b = s;
175 while (*a && *a == *b) {
176 a++;
177 b++;
179 if (!*a)
180 return true;
181 j++;
184 return false;
187 /** similar to InList, but word s can be a substring of keyword.
188 * eg. the keyword define is defined as def~ine. This means the word must start
189 * with def to be a keyword, but also defi, defin and define are valid.
190 * The marker is ~ in this case.
192 bool WordList::InListAbbreviated(const char *s, const char marker) const {
193 if (0 == words)
194 return false;
195 unsigned char firstChar = s[0];
196 int j = starts[firstChar];
197 if (j >= 0) {
198 while (static_cast<unsigned char>(words[j][0]) == firstChar) {
199 bool isSubword = false;
200 int start = 1;
201 if (words[j][1] == marker) {
202 isSubword = true;
203 start++;
205 if (s[1] == words[j][start]) {
206 const char *a = words[j] + start;
207 const char *b = s + 1;
208 while (*a && *a == *b) {
209 a++;
210 if (*a == marker) {
211 isSubword = true;
212 a++;
214 b++;
216 if ((!*a || isSubword) && !*b)
217 return true;
219 j++;
222 j = starts[static_cast<unsigned int>('^')];
223 if (j >= 0) {
224 while (words[j][0] == '^') {
225 const char *a = words[j] + 1;
226 const char *b = s;
227 while (*a && *a == *b) {
228 a++;
229 b++;
231 if (!*a)
232 return true;
233 j++;
236 return false;
239 const char *WordList::WordAt(int n) const {
240 return words[n];