Bug 1893155 - Part 6: Correct constant for minimum epoch day. r=spidermonkey-reviewer...
[gecko.git] / storage / mozStorageSQLFunctions.cpp
blobc05010c2ff1e96ca1df2c2514c1de81dd3ce0ddd
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: sw=2 ts=2 et lcs=trail\:.,tab\:>~ :
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "mozilla/ArrayUtils.h"
9 #include "mozStorageSQLFunctions.h"
10 #include "nsTArray.h"
11 #include "nsUnicharUtils.h"
12 #include <algorithm>
13 #include "sqlite3.h"
15 namespace mozilla {
16 namespace storage {
18 ////////////////////////////////////////////////////////////////////////////////
19 //// Local Helper Functions
21 namespace {
23 /**
24 * Performs the LIKE comparison of a string against a pattern. For more detail
25 * see http://www.sqlite.org/lang_expr.html#like.
27 * @param aPatternItr
28 * An iterator at the start of the pattern to check for.
29 * @param aPatternEnd
30 * An iterator at the end of the pattern to check for.
31 * @param aStringItr
32 * An iterator at the start of the string to check for the pattern.
33 * @param aStringEnd
34 * An iterator at the end of the string to check for the pattern.
35 * @param aEscapeChar
36 * The character to use for escaping symbols in the pattern.
37 * @return 1 if the pattern is found, 0 otherwise.
39 int likeCompare(nsAString::const_iterator aPatternItr,
40 nsAString::const_iterator aPatternEnd,
41 nsAString::const_iterator aStringItr,
42 nsAString::const_iterator aStringEnd, char16_t aEscapeChar) {
43 const char16_t MATCH_ALL('%');
44 const char16_t MATCH_ONE('_');
46 bool lastWasEscape = false;
47 while (aPatternItr != aPatternEnd) {
48 /**
49 * What we do in here is take a look at each character from the input
50 * pattern, and do something with it. There are 4 possibilities:
51 * 1) character is an un-escaped match-all character
52 * 2) character is an un-escaped match-one character
53 * 3) character is an un-escaped escape character
54 * 4) character is not any of the above
56 if (!lastWasEscape && *aPatternItr == MATCH_ALL) {
57 // CASE 1
58 /**
59 * Now we need to skip any MATCH_ALL or MATCH_ONE characters that follow a
60 * MATCH_ALL character. For each MATCH_ONE character, skip one character
61 * in the pattern string.
63 while (*aPatternItr == MATCH_ALL || *aPatternItr == MATCH_ONE) {
64 if (*aPatternItr == MATCH_ONE) {
65 // If we've hit the end of the string we are testing, no match
66 if (aStringItr == aStringEnd) return 0;
67 aStringItr++;
69 aPatternItr++;
72 // If we've hit the end of the pattern string, match
73 if (aPatternItr == aPatternEnd) return 1;
75 while (aStringItr != aStringEnd) {
76 if (likeCompare(aPatternItr, aPatternEnd, aStringItr, aStringEnd,
77 aEscapeChar)) {
78 // we've hit a match, so indicate this
79 return 1;
81 aStringItr++;
84 // No match
85 return 0;
87 if (!lastWasEscape && *aPatternItr == MATCH_ONE) {
88 // CASE 2
89 if (aStringItr == aStringEnd) {
90 // If we've hit the end of the string we are testing, no match
91 return 0;
93 aStringItr++;
94 lastWasEscape = false;
95 } else if (!lastWasEscape && *aPatternItr == aEscapeChar) {
96 // CASE 3
97 lastWasEscape = true;
98 } else {
99 // CASE 4
100 if (::ToUpperCase(*aStringItr) != ::ToUpperCase(*aPatternItr)) {
101 // If we've hit a point where the strings don't match, there is no match
102 return 0;
104 aStringItr++;
105 lastWasEscape = false;
108 aPatternItr++;
111 return aStringItr == aStringEnd;
115 * Compute the Levenshtein Edit Distance between two strings.
117 * @param aStringS
118 * a string
119 * @param aStringT
120 * another string
121 * @param _result
122 * an outparam that will receive the edit distance between the arguments
123 * @return a Sqlite result code, e.g. SQLITE_OK, SQLITE_NOMEM, etc.
125 int levenshteinDistance(const nsAString& aStringS, const nsAString& aStringT,
126 int* _result) {
127 // Set the result to a non-sensical value in case we encounter an error.
128 *_result = -1;
130 const uint32_t sLen = aStringS.Length();
131 const uint32_t tLen = aStringT.Length();
133 if (sLen == 0) {
134 *_result = tLen;
135 return SQLITE_OK;
137 if (tLen == 0) {
138 *_result = sLen;
139 return SQLITE_OK;
142 // Notionally, Levenshtein Distance is computed in a matrix. If we
143 // assume s = "span" and t = "spam", the matrix would look like this:
144 // s -->
145 // t s p a n
146 // | 0 1 2 3 4
147 // V s 1 * * * *
148 // p 2 * * * *
149 // a 3 * * * *
150 // m 4 * * * *
152 // Note that the row width is sLen + 1 and the column height is tLen + 1,
153 // where sLen is the length of the string "s" and tLen is the length of "t".
154 // The first row and the first column are initialized as shown, and
155 // the algorithm computes the remaining cells row-by-row, and
156 // left-to-right within each row. The computation only requires that
157 // we be able to see the current row and the previous one.
159 // Allocate memory for two rows.
160 AutoTArray<int, nsAutoString::kStorageSize> row1;
161 AutoTArray<int, nsAutoString::kStorageSize> row2;
163 // Declare the raw pointers that will actually be used to access the memory.
164 int* prevRow = row1.AppendElements(sLen + 1);
165 int* currRow = row2.AppendElements(sLen + 1);
167 // Initialize the first row.
168 for (uint32_t i = 0; i <= sLen; i++) prevRow[i] = i;
170 const char16_t* s = aStringS.BeginReading();
171 const char16_t* t = aStringT.BeginReading();
173 // Compute the empty cells in the "matrix" row-by-row, starting with
174 // the second row.
175 for (uint32_t ti = 1; ti <= tLen; ti++) {
176 // Initialize the first cell in this row.
177 currRow[0] = ti;
179 // Get the character from "t" that corresponds to this row.
180 const char16_t tch = t[ti - 1];
182 // Compute the remaining cells in this row, left-to-right,
183 // starting at the second column (and first character of "s").
184 for (uint32_t si = 1; si <= sLen; si++) {
185 // Get the character from "s" that corresponds to this column,
186 // compare it to the t-character, and compute the "cost".
187 const char16_t sch = s[si - 1];
188 int cost = (sch == tch) ? 0 : 1;
190 // ............ We want to calculate the value of cell "d" from
191 // ...ab....... the previously calculated (or initialized) cells
192 // ...cd....... "a", "b", and "c", where d = min(a', b', c').
193 // ............
194 int aPrime = prevRow[si - 1] + cost;
195 int bPrime = prevRow[si] + 1;
196 int cPrime = currRow[si - 1] + 1;
197 currRow[si] = std::min(aPrime, std::min(bPrime, cPrime));
200 // Advance to the next row. The current row becomes the previous
201 // row and we recycle the old previous row as the new current row.
202 // We don't need to re-initialize the new current row since we will
203 // rewrite all of its cells anyway.
204 int* oldPrevRow = prevRow;
205 prevRow = currRow;
206 currRow = oldPrevRow;
209 // The final result is the value of the last cell in the last row.
210 // Note that that's now in the "previous" row, since we just swapped them.
211 *_result = prevRow[sLen];
212 return SQLITE_OK;
215 // This struct is used only by registerFunctions below, but ISO C++98 forbids
216 // instantiating a template dependent on a locally-defined type. Boo-urns!
217 struct Functions {
218 const char* zName;
219 int nArg;
220 int enc;
221 void* pContext;
222 void (*xFunc)(::sqlite3_context*, int, sqlite3_value**);
225 } // namespace
227 ////////////////////////////////////////////////////////////////////////////////
228 //// Exposed Functions
230 int registerFunctions(sqlite3* aDB) {
231 Functions functions[] = {
232 {"lower", 1, SQLITE_UTF16, 0, caseFunction},
233 {"lower", 1, SQLITE_UTF8, 0, caseFunction},
234 {"upper", 1, SQLITE_UTF16, (void*)1, caseFunction},
235 {"upper", 1, SQLITE_UTF8, (void*)1, caseFunction},
237 {"like", 2, SQLITE_UTF16, 0, likeFunction},
238 {"like", 2, SQLITE_UTF8, 0, likeFunction},
239 {"like", 3, SQLITE_UTF16, 0, likeFunction},
240 {"like", 3, SQLITE_UTF8, 0, likeFunction},
242 {"levenshteinDistance", 2, SQLITE_UTF16, 0, levenshteinDistanceFunction},
243 {"levenshteinDistance", 2, SQLITE_UTF8, 0, levenshteinDistanceFunction},
245 {"utf16Length", 1, SQLITE_UTF16, 0, utf16LengthFunction},
246 {"utf16Length", 1, SQLITE_UTF8, 0, utf16LengthFunction},
249 int rv = SQLITE_OK;
250 for (size_t i = 0; SQLITE_OK == rv && i < ArrayLength(functions); ++i) {
251 struct Functions* p = &functions[i];
252 rv = ::sqlite3_create_function(aDB, p->zName, p->nArg, p->enc, p->pContext,
253 p->xFunc, nullptr, nullptr);
256 return rv;
259 ////////////////////////////////////////////////////////////////////////////////
260 //// SQL Functions
262 void caseFunction(sqlite3_context* aCtx, int aArgc, sqlite3_value** aArgv) {
263 NS_ASSERTION(1 == aArgc, "Invalid number of arguments!");
265 const char16_t* value =
266 static_cast<const char16_t*>(::sqlite3_value_text16(aArgv[0]));
267 nsAutoString data(value,
268 ::sqlite3_value_bytes16(aArgv[0]) / sizeof(char16_t));
269 bool toUpper = ::sqlite3_user_data(aCtx) ? true : false;
271 if (toUpper)
272 ::ToUpperCase(data);
273 else
274 ::ToLowerCase(data);
276 // Set the result.
277 ::sqlite3_result_text16(aCtx, data.get(), data.Length() * sizeof(char16_t),
278 SQLITE_TRANSIENT);
282 * This implements the like() SQL function. This is used by the LIKE operator.
283 * The SQL statement 'A LIKE B' is implemented as 'like(B, A)', and if there is
284 * an escape character, say E, it is implemented as 'like(B, A, E)'.
286 void likeFunction(sqlite3_context* aCtx, int aArgc, sqlite3_value** aArgv) {
287 NS_ASSERTION(2 == aArgc || 3 == aArgc, "Invalid number of arguments!");
289 if (::sqlite3_value_bytes(aArgv[0]) >
290 ::sqlite3_limit(::sqlite3_context_db_handle(aCtx),
291 SQLITE_LIMIT_LIKE_PATTERN_LENGTH, -1)) {
292 ::sqlite3_result_error(aCtx, "LIKE or GLOB pattern too complex",
293 SQLITE_TOOBIG);
294 return;
297 if (!::sqlite3_value_text16(aArgv[0]) || !::sqlite3_value_text16(aArgv[1]))
298 return;
300 const char16_t* a =
301 static_cast<const char16_t*>(::sqlite3_value_text16(aArgv[1]));
302 int aLen = ::sqlite3_value_bytes16(aArgv[1]) / sizeof(char16_t);
303 nsDependentString A(a, aLen);
305 const char16_t* b =
306 static_cast<const char16_t*>(::sqlite3_value_text16(aArgv[0]));
307 int bLen = ::sqlite3_value_bytes16(aArgv[0]) / sizeof(char16_t);
308 nsDependentString B(b, bLen);
309 NS_ASSERTION(!B.IsEmpty(), "LIKE string must not be null!");
311 char16_t E = 0;
312 if (3 == aArgc)
313 E = static_cast<const char16_t*>(::sqlite3_value_text16(aArgv[2]))[0];
315 nsAString::const_iterator itrString, endString;
316 A.BeginReading(itrString);
317 A.EndReading(endString);
318 nsAString::const_iterator itrPattern, endPattern;
319 B.BeginReading(itrPattern);
320 B.EndReading(endPattern);
321 ::sqlite3_result_int(
322 aCtx, likeCompare(itrPattern, endPattern, itrString, endString, E));
325 void levenshteinDistanceFunction(sqlite3_context* aCtx, int aArgc,
326 sqlite3_value** aArgv) {
327 NS_ASSERTION(2 == aArgc, "Invalid number of arguments!");
329 // If either argument is a SQL NULL, then return SQL NULL.
330 if (::sqlite3_value_type(aArgv[0]) == SQLITE_NULL ||
331 ::sqlite3_value_type(aArgv[1]) == SQLITE_NULL) {
332 ::sqlite3_result_null(aCtx);
333 return;
336 const char16_t* a =
337 static_cast<const char16_t*>(::sqlite3_value_text16(aArgv[0]));
338 int aLen = ::sqlite3_value_bytes16(aArgv[0]) / sizeof(char16_t);
340 const char16_t* b =
341 static_cast<const char16_t*>(::sqlite3_value_text16(aArgv[1]));
342 int bLen = ::sqlite3_value_bytes16(aArgv[1]) / sizeof(char16_t);
344 // Compute the Levenshtein Distance, and return the result (or error).
345 int distance = -1;
346 const nsDependentString A(a, aLen);
347 const nsDependentString B(b, bLen);
348 int status = levenshteinDistance(A, B, &distance);
349 if (status == SQLITE_OK) {
350 ::sqlite3_result_int(aCtx, distance);
351 } else if (status == SQLITE_NOMEM) {
352 ::sqlite3_result_error_nomem(aCtx);
353 } else {
354 ::sqlite3_result_error(aCtx, "User function returned error code", -1);
358 void utf16LengthFunction(sqlite3_context* aCtx, int aArgc,
359 sqlite3_value** aArgv) {
360 NS_ASSERTION(1 == aArgc, "Invalid number of arguments!");
362 int len = ::sqlite3_value_bytes16(aArgv[0]) / sizeof(char16_t);
364 // Set the result.
365 ::sqlite3_result_int(aCtx, len);
368 } // namespace storage
369 } // namespace mozilla