updated Scintilla to 2.29
[TortoiseGit.git] / ext / scintilla / lexlib / WordList.cxx
blob1d6cf5661d5c9d0d27a9949ec12b5e2b2080e1ce
1 // Scintilla source code edit control
2 /** @file KeyWords.cxx
3 ** Colourise for particular languages.
4 **/
5 // Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <stdlib.h>
9 #include <string.h>
10 #include <ctype.h>
11 #include <stdio.h>
12 #include <stdarg.h>
14 #include <algorithm>
16 #include "WordList.h"
18 #ifdef SCI_NAMESPACE
19 using namespace Scintilla;
20 #endif
22 /**
23 * Creates an array that points into each word in the string and puts \0 terminators
24 * after each word.
26 static char **ArrayFromWordList(char *wordlist, int *len, bool onlyLineEnds = false) {
27 int prev = '\n';
28 int words = 0;
29 // For rapid determination of whether a character is a separator, build
30 // a look up table.
31 bool wordSeparator[256];
32 for (int i=0; i<256; i++) {
33 wordSeparator[i] = false;
35 wordSeparator['\r'] = true;
36 wordSeparator['\n'] = true;
37 if (!onlyLineEnds) {
38 wordSeparator[' '] = true;
39 wordSeparator['\t'] = true;
41 for (int j = 0; wordlist[j]; j++) {
42 int curr = static_cast<unsigned char>(wordlist[j]);
43 if (!wordSeparator[curr] && wordSeparator[prev])
44 words++;
45 prev = curr;
47 char **keywords = new char *[words + 1];
48 if (keywords) {
49 words = 0;
50 prev = '\0';
51 size_t slen = strlen(wordlist);
52 for (size_t k = 0; k < slen; k++) {
53 if (!wordSeparator[static_cast<unsigned char>(wordlist[k])]) {
54 if (!prev) {
55 keywords[words] = &wordlist[k];
56 words++;
58 } else {
59 wordlist[k] = '\0';
61 prev = wordlist[k];
63 keywords[words] = &wordlist[slen];
64 *len = words;
65 } else {
66 *len = 0;
68 return keywords;
71 bool WordList::operator!=(const WordList &other) const {
72 if (len != other.len)
73 return true;
74 for (int i=0; i<len; i++) {
75 if (strcmp(words[i], other.words[i]) != 0)
76 return true;
78 return false;
81 void WordList::Clear() {
82 if (words) {
83 delete []list;
84 delete []words;
86 words = 0;
87 list = 0;
88 len = 0;
91 #ifdef _MSC_VER
93 static bool cmpWords(const char *a, const char *b) {
94 return strcmp(a, b) == -1;
97 #else
99 static int cmpWords(const void *a, const void *b) {
100 return strcmp(*static_cast<const char * const *>(a), *static_cast<const char * const *>(b));
103 static void SortWordList(char **words, unsigned int len) {
104 qsort(reinterpret_cast<void *>(words), len, sizeof(*words), cmpWords);
107 #endif
109 void WordList::Set(const char *s) {
110 Clear();
111 list = new char[strlen(s) + 1];
112 strcpy(list, s);
113 words = ArrayFromWordList(list, &len, onlyLineEnds);
114 #ifdef _MSC_VER
115 std::sort(words, words + len, cmpWords);
116 #else
117 SortWordList(words, len);
118 #endif
119 for (unsigned int k = 0; k < (sizeof(starts) / sizeof(starts[0])); k++)
120 starts[k] = -1;
121 for (int l = len - 1; l >= 0; l--) {
122 unsigned char indexChar = words[l][0];
123 starts[indexChar] = l;
127 /** Check whether a string is in the list.
128 * List elements are either exact matches or prefixes.
129 * Prefix elements start with '^' and match all strings that start with the rest of the element
130 * so '^GTK_' matches 'GTK_X', 'GTK_MAJOR_VERSION', and 'GTK_'.
132 bool WordList::InList(const char *s) const {
133 if (0 == words)
134 return false;
135 unsigned char firstChar = s[0];
136 int j = starts[firstChar];
137 if (j >= 0) {
138 while (static_cast<unsigned char>(words[j][0]) == firstChar) {
139 if (s[1] == words[j][1]) {
140 const char *a = words[j] + 1;
141 const char *b = s + 1;
142 while (*a && *a == *b) {
143 a++;
144 b++;
146 if (!*a && !*b)
147 return true;
149 j++;
152 j = starts['^'];
153 if (j >= 0) {
154 while (words[j][0] == '^') {
155 const char *a = words[j] + 1;
156 const char *b = s;
157 while (*a && *a == *b) {
158 a++;
159 b++;
161 if (!*a)
162 return true;
163 j++;
166 return false;
169 /** similar to InList, but word s can be a substring of keyword.
170 * eg. the keyword define is defined as def~ine. This means the word must start
171 * with def to be a keyword, but also defi, defin and define are valid.
172 * The marker is ~ in this case.
174 bool WordList::InListAbbreviated(const char *s, const char marker) const {
175 if (0 == words)
176 return false;
177 unsigned char firstChar = s[0];
178 int j = starts[firstChar];
179 if (j >= 0) {
180 while (static_cast<unsigned char>(words[j][0]) == firstChar) {
181 bool isSubword = false;
182 int start = 1;
183 if (words[j][1] == marker) {
184 isSubword = true;
185 start++;
187 if (s[1] == words[j][start]) {
188 const char *a = words[j] + start;
189 const char *b = s + 1;
190 while (*a && *a == *b) {
191 a++;
192 if (*a == marker) {
193 isSubword = true;
194 a++;
196 b++;
198 if ((!*a || isSubword) && !*b)
199 return true;
201 j++;
204 j = starts['^'];
205 if (j >= 0) {
206 while (words[j][0] == '^') {
207 const char *a = words[j] + 1;
208 const char *b = s;
209 while (*a && *a == *b) {
210 a++;
211 b++;
213 if (!*a)
214 return true;
215 j++;
218 return false;