1 // Scintilla source code edit control
3 ** Hold a list of words.
5 // Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
15 #include "StringCopy.h"
18 using namespace Scintilla
;
21 * Creates an array that points into each word in the string and puts \0 terminators
24 static char **ArrayFromWordList(char *wordlist
, int *len
, bool onlyLineEnds
= false) {
27 // For rapid determination of whether a character is a separator, build
29 bool wordSeparator
[256] = {}; // Initialise all to false.
30 wordSeparator
[static_cast<unsigned int>('\r')] = true;
31 wordSeparator
[static_cast<unsigned int>('\n')] = true;
33 wordSeparator
[static_cast<unsigned int>(' ')] = true;
34 wordSeparator
[static_cast<unsigned int>('\t')] = true;
36 for (int j
= 0; wordlist
[j
]; j
++) {
37 const int curr
= static_cast<unsigned char>(wordlist
[j
]);
38 if (!wordSeparator
[curr
] && wordSeparator
[prev
])
42 char **keywords
= new char *[words
+ 1];
44 const size_t slen
= strlen(wordlist
);
47 for (size_t k
= 0; k
< slen
; k
++) {
48 if (!wordSeparator
[static_cast<unsigned char>(wordlist
[k
])]) {
50 keywords
[wordsStore
] = &wordlist
[k
];
59 assert(wordsStore
< (words
+ 1));
60 keywords
[wordsStore
] = &wordlist
[slen
];
65 WordList::WordList(bool onlyLineEnds_
) :
66 words(0), list(0), len(0), onlyLineEnds(onlyLineEnds_
) {
67 // Prevent warnings by static analyzers about uninitialized starts.
71 WordList::~WordList() {
75 WordList::operator bool() const {
76 return len
? true : false;
79 bool WordList::operator!=(const WordList
&other
) const {
82 for (int i
=0; i
<len
; i
++) {
83 if (strcmp(words
[i
], other
.words
[i
]) != 0)
89 int WordList::Length() const {
93 void WordList::Clear() {
105 static bool cmpWords(const char *a
, const char *b
) {
106 return strcmp(a
, b
) < 0;
111 static int cmpWords(const void *a
, const void *b
) {
112 return strcmp(*static_cast<const char * const *>(a
), *static_cast<const char * const *>(b
));
115 static void SortWordList(char **words
, unsigned int len
) {
116 qsort(static_cast<void *>(words
), len
, sizeof(*words
), cmpWords
);
121 void WordList::Set(const char *s
) {
123 const size_t lenS
= strlen(s
) + 1;
124 list
= new char[lenS
];
125 memcpy(list
, s
, lenS
);
126 words
= ArrayFromWordList(list
, &len
, onlyLineEnds
);
128 std::sort(words
, words
+ len
, cmpWords
);
130 SortWordList(words
, len
);
132 std::fill(starts
, std::end(starts
), -1);
133 for (int l
= len
- 1; l
>= 0; l
--) {
134 unsigned char indexChar
= words
[l
][0];
135 starts
[indexChar
] = l
;
139 /** Check whether a string is in the list.
140 * List elements are either exact matches or prefixes.
141 * Prefix elements start with '^' and match all strings that start with the rest of the element
142 * so '^GTK_' matches 'GTK_X', 'GTK_MAJOR_VERSION', and 'GTK_'.
144 bool WordList::InList(const char *s
) const {
147 const unsigned char firstChar
= s
[0];
148 int j
= starts
[firstChar
];
150 while (static_cast<unsigned char>(words
[j
][0]) == firstChar
) {
151 if (s
[1] == words
[j
][1]) {
152 const char *a
= words
[j
] + 1;
153 const char *b
= s
+ 1;
154 while (*a
&& *a
== *b
) {
164 j
= starts
[static_cast<unsigned int>('^')];
166 while (words
[j
][0] == '^') {
167 const char *a
= words
[j
] + 1;
169 while (*a
&& *a
== *b
) {
181 /** similar to InList, but word s can be a substring of keyword.
182 * eg. the keyword define is defined as def~ine. This means the word must start
183 * with def to be a keyword, but also defi, defin and define are valid.
184 * The marker is ~ in this case.
186 bool WordList::InListAbbreviated(const char *s
, const char marker
) const {
189 const unsigned char firstChar
= s
[0];
190 int j
= starts
[firstChar
];
192 while (static_cast<unsigned char>(words
[j
][0]) == firstChar
) {
193 bool isSubword
= false;
195 if (words
[j
][1] == marker
) {
199 if (s
[1] == words
[j
][start
]) {
200 const char *a
= words
[j
] + start
;
201 const char *b
= s
+ 1;
202 while (*a
&& *a
== *b
) {
210 if ((!*a
|| isSubword
) && !*b
)
216 j
= starts
[static_cast<unsigned int>('^')];
218 while (words
[j
][0] == '^') {
219 const char *a
= words
[j
] + 1;
221 while (*a
&& *a
== *b
) {
233 /** similar to InListAbbreviated, but word s can be a abridged version of a keyword.
234 * eg. the keyword is defined as "after.~:". This means the word must have a prefix (begins with) of
235 * "after." and suffix (ends with) of ":" to be a keyword, Hence "after.field:" , "after.form.item:" are valid.
236 * Similarly "~.is.valid" keyword is suffix only... hence "field.is.valid" , "form.is.valid" are valid.
237 * The marker is ~ in this case.
238 * No multiple markers check is done and wont work.
240 bool WordList::InListAbridged(const char *s
, const char marker
) const {
243 const unsigned char firstChar
= s
[0];
244 int j
= starts
[firstChar
];
246 while (static_cast<unsigned char>(words
[j
][0]) == firstChar
) {
247 const char *a
= words
[j
];
249 while (*a
&& *a
== *b
) {
253 const size_t suffixLengthA
= strlen(a
);
254 const size_t suffixLengthB
= strlen(b
);
255 if (suffixLengthA
>= suffixLengthB
)
257 b
= b
+ suffixLengthB
- suffixLengthA
- 1;
267 j
= starts
[static_cast<unsigned int>(marker
)];
269 while (words
[j
][0] == marker
) {
270 const char *a
= words
[j
] + 1;
272 const size_t suffixLengthA
= strlen(a
);
273 const size_t suffixLengthB
= strlen(b
);
274 if (suffixLengthA
> suffixLengthB
) {
278 b
= b
+ suffixLengthB
- suffixLengthA
;
280 while (*a
&& *a
== *b
) {
293 const char *WordList::WordAt(int n
) const {