Bug 634734 - Fennec ASSERTION: mFUnitsConvFactor not valid: mFUnitsConvFactor > 0...
[mozilla-central.git] / storage / src / mozStorageSQLFunctions.cpp
blob0072ca19059e117f5f777ca873ca88c0491449f4
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: sw=2 ts=2 et lcs=trail\:.,tab\:>~ :
3 * ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
16 * The Original Code is unicode functions code.
18 * The Initial Developer of the Original Code is
19 * Mozilla Corporation.
20 * Portions created by the Initial Developer are Copyright (C) 2007
21 * the Initial Developer. All Rights Reserved.
23 * This code is based off of icu.c from the sqlite code
24 * whose original author is danielk1977
26 * Contributor(s):
27 * Shawn Wilsher <me@shawnwilsher.com> (Original Author)
29 * Alternatively, the contents of this file may be used under the terms of
30 * either the GNU General Public License Version 2 or later (the "GPL"), or
31 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
32 * in which case the provisions of the GPL or the LGPL are applicable instead
33 * of those above. If you wish to allow use of your version of this file only
34 * under the terms of either the GPL or the LGPL, and not to allow others to
35 * use your version of this file under the terms of the MPL, indicate your
36 * decision by deleting the provisions above and replace them with the notice
37 * and other provisions required by the GPL or the LGPL. If you do not delete
38 * the provisions above, a recipient may use your version of this file under
39 * the terms of any one of the MPL, the GPL or the LGPL.
41 * ***** END LICENSE BLOCK ***** */
43 #include "mozStorageSQLFunctions.h"
44 #include "nsUnicharUtils.h"
46 namespace mozilla {
47 namespace storage {
49 ////////////////////////////////////////////////////////////////////////////////
50 //// Local Helper Functions
52 namespace {
54 /**
55 * Performs the LIKE comparison of a string against a pattern. For more detail
56 * see http://www.sqlite.org/lang_expr.html#like.
58 * @param aPatternItr
59 * An iterator at the start of the pattern to check for.
60 * @param aPatternEnd
61 * An iterator at the end of the pattern to check for.
62 * @param aStringItr
63 * An iterator at the start of the string to check for the pattern.
64 * @param aStringEnd
65 * An iterator at the end of the string to check for the pattern.
66 * @param aEscapeChar
67 * The character to use for escaping symbols in the pattern.
68 * @return 1 if the pattern is found, 0 otherwise.
70 int
71 likeCompare(nsAString::const_iterator aPatternItr,
72 nsAString::const_iterator aPatternEnd,
73 nsAString::const_iterator aStringItr,
74 nsAString::const_iterator aStringEnd,
75 PRUnichar aEscapeChar)
77 const PRUnichar MATCH_ALL('%');
78 const PRUnichar MATCH_ONE('_');
80 PRBool lastWasEscape = PR_FALSE;
81 while (aPatternItr != aPatternEnd) {
82 /**
83 * What we do in here is take a look at each character from the input
84 * pattern, and do something with it. There are 4 possibilities:
85 * 1) character is an un-escaped match-all character
86 * 2) character is an un-escaped match-one character
87 * 3) character is an un-escaped escape character
88 * 4) character is not any of the above
90 if (!lastWasEscape && *aPatternItr == MATCH_ALL) {
91 // CASE 1
92 /**
93 * Now we need to skip any MATCH_ALL or MATCH_ONE characters that follow a
94 * MATCH_ALL character. For each MATCH_ONE character, skip one character
95 * in the pattern string.
97 while (*aPatternItr == MATCH_ALL || *aPatternItr == MATCH_ONE) {
98 if (*aPatternItr == MATCH_ONE) {
99 // If we've hit the end of the string we are testing, no match
100 if (aStringItr == aStringEnd)
101 return 0;
102 aStringItr++;
104 aPatternItr++;
107 // If we've hit the end of the pattern string, match
108 if (aPatternItr == aPatternEnd)
109 return 1;
111 while (aStringItr != aStringEnd) {
112 if (likeCompare(aPatternItr, aPatternEnd, aStringItr, aStringEnd,
113 aEscapeChar)) {
114 // we've hit a match, so indicate this
115 return 1;
117 aStringItr++;
120 // No match
121 return 0;
123 else if (!lastWasEscape && *aPatternItr == MATCH_ONE) {
124 // CASE 2
125 if (aStringItr == aStringEnd) {
126 // If we've hit the end of the string we are testing, no match
127 return 0;
129 aStringItr++;
130 lastWasEscape = PR_FALSE;
132 else if (!lastWasEscape && *aPatternItr == aEscapeChar) {
133 // CASE 3
134 lastWasEscape = PR_TRUE;
136 else {
137 // CASE 4
138 if (::ToUpperCase(*aStringItr) != ::ToUpperCase(*aPatternItr)) {
139 // If we've hit a point where the strings don't match, there is no match
140 return 0;
142 aStringItr++;
143 lastWasEscape = PR_FALSE;
146 aPatternItr++;
149 return aStringItr == aStringEnd;
153 * This class manages a dynamic array. It can represent an array of any
154 * reasonable size, but if the array is "N" elements or smaller, it will be
155 * stored using fixed space inside the auto array itself. If the auto array
156 * is a local variable, this internal storage will be allocated cheaply on the
157 * stack, similar to nsAutoString. If a larger size is requested, the memory
158 * will be dynamically allocated from the heap. Since the destructor will
159 * free any heap-allocated memory, client code doesn't need to care where the
160 * memory came from.
162 template <class T, size_t N> class AutoArray
165 public:
167 AutoArray(size_t size)
168 : mBuffer(size <= N ? mAutoBuffer : new T[size])
172 ~AutoArray()
174 if (mBuffer != mAutoBuffer)
175 delete[] mBuffer;
179 * Return the pointer to the allocated array.
180 * @note If the array allocation failed, get() will return NULL!
182 * @return the pointer to the allocated array
184 T *get()
186 return mBuffer;
189 private:
190 T *mBuffer; // Points to mAutoBuffer if we can use it, heap otherwise.
191 T mAutoBuffer[N]; // The internal memory buffer that we use if we can.
195 * Compute the Levenshtein Edit Distance between two strings.
197 * @param aStringS
198 * a string
199 * @param aStringT
200 * another string
201 * @param _result
202 * an outparam that will receive the edit distance between the arguments
203 * @return a Sqlite result code, e.g. SQLITE_OK, SQLITE_NOMEM, etc.
206 levenshteinDistance(const nsAString &aStringS,
207 const nsAString &aStringT,
208 int *_result)
210 // Set the result to a non-sensical value in case we encounter an error.
211 *_result = -1;
213 const PRUint32 sLen = aStringS.Length();
214 const PRUint32 tLen = aStringT.Length();
216 if (sLen == 0) {
217 *_result = tLen;
218 return SQLITE_OK;
220 if (tLen == 0) {
221 *_result = sLen;
222 return SQLITE_OK;
225 // Notionally, Levenshtein Distance is computed in a matrix. If we
226 // assume s = "span" and t = "spam", the matrix would look like this:
227 // s -->
228 // t s p a n
229 // | 0 1 2 3 4
230 // V s 1 * * * *
231 // p 2 * * * *
232 // a 3 * * * *
233 // m 4 * * * *
235 // Note that the row width is sLen + 1 and the column height is tLen + 1,
236 // where sLen is the length of the string "s" and tLen is the length of "t".
237 // The first row and the first column are initialized as shown, and
238 // the algorithm computes the remaining cells row-by-row, and
239 // left-to-right within each row. The computation only requires that
240 // we be able to see the current row and the previous one.
242 // Allocate memory for two rows. Use AutoArray's to manage the memory
243 // so we don't have to explicitly free it, and so we can avoid the expense
244 // of memory allocations for relatively small strings.
245 AutoArray<int, nsAutoString::kDefaultStorageSize> row1(sLen + 1);
246 AutoArray<int, nsAutoString::kDefaultStorageSize> row2(sLen + 1);
248 // Declare the raw pointers that will actually be used to access the memory.
249 int *prevRow = row1.get();
250 NS_ENSURE_TRUE(prevRow, SQLITE_NOMEM);
251 int *currRow = row2.get();
252 NS_ENSURE_TRUE(currRow, SQLITE_NOMEM);
254 // Initialize the first row.
255 for (PRUint32 i = 0; i <= sLen; i++)
256 prevRow[i] = i;
258 const PRUnichar *s = aStringS.BeginReading();
259 const PRUnichar *t = aStringT.BeginReading();
261 // Compute the empty cells in the "matrix" row-by-row, starting with
262 // the second row.
263 for (PRUint32 ti = 1; ti <= tLen; ti++) {
265 // Initialize the first cell in this row.
266 currRow[0] = ti;
268 // Get the character from "t" that corresponds to this row.
269 const PRUnichar tch = t[ti - 1];
271 // Compute the remaining cells in this row, left-to-right,
272 // starting at the second column (and first character of "s").
273 for (PRUint32 si = 1; si <= sLen; si++) {
275 // Get the character from "s" that corresponds to this column,
276 // compare it to the t-character, and compute the "cost".
277 const PRUnichar sch = s[si - 1];
278 int cost = (sch == tch) ? 0 : 1;
280 // ............ We want to calculate the value of cell "d" from
281 // ...ab....... the previously calculated (or initialized) cells
282 // ...cd....... "a", "b", and "c", where d = min(a', b', c').
283 // ............
284 int aPrime = prevRow[si - 1] + cost;
285 int bPrime = prevRow[si] + 1;
286 int cPrime = currRow[si - 1] + 1;
287 currRow[si] = NS_MIN(aPrime, NS_MIN(bPrime, cPrime));
290 // Advance to the next row. The current row becomes the previous
291 // row and we recycle the old previous row as the new current row.
292 // We don't need to re-initialize the new current row since we will
293 // rewrite all of its cells anyway.
294 int *oldPrevRow = prevRow;
295 prevRow = currRow;
296 currRow = oldPrevRow;
299 // The final result is the value of the last cell in the last row.
300 // Note that that's now in the "previous" row, since we just swapped them.
301 *_result = prevRow[sLen];
302 return SQLITE_OK;
305 } // anonymous namespace
307 ////////////////////////////////////////////////////////////////////////////////
308 //// Exposed Functions
311 registerFunctions(sqlite3 *aDB)
313 struct Functions {
314 const char *zName;
315 int nArg;
316 int enc;
317 void *pContext;
318 void (*xFunc)(::sqlite3_context*, int, sqlite3_value**);
321 Functions functions[] = {
322 {"lower",
324 SQLITE_UTF16,
326 caseFunction},
327 {"lower",
329 SQLITE_UTF8,
331 caseFunction},
332 {"upper",
334 SQLITE_UTF16,
335 (void*)1,
336 caseFunction},
337 {"upper",
339 SQLITE_UTF8,
340 (void*)1,
341 caseFunction},
343 {"like",
345 SQLITE_UTF16,
347 likeFunction},
348 {"like",
350 SQLITE_UTF8,
352 likeFunction},
353 {"like",
355 SQLITE_UTF16,
357 likeFunction},
358 {"like",
360 SQLITE_UTF8,
362 likeFunction},
364 {"levenshteinDistance",
366 SQLITE_UTF16,
368 levenshteinDistanceFunction},
369 {"levenshteinDistance",
371 SQLITE_UTF8,
373 levenshteinDistanceFunction},
376 int rv = SQLITE_OK;
377 for (size_t i = 0; SQLITE_OK == rv && i < NS_ARRAY_LENGTH(functions); ++i) {
378 struct Functions *p = &functions[i];
379 rv = ::sqlite3_create_function(aDB, p->zName, p->nArg, p->enc, p->pContext,
380 p->xFunc, NULL, NULL);
383 return rv;
386 ////////////////////////////////////////////////////////////////////////////////
387 //// SQL Functions
389 void
390 caseFunction(sqlite3_context *aCtx,
391 int aArgc,
392 sqlite3_value **aArgv)
394 NS_ASSERTION(1 == aArgc, "Invalid number of arguments!");
396 nsAutoString data(static_cast<const PRUnichar *>(::sqlite3_value_text16(aArgv[0])));
397 PRBool toUpper = ::sqlite3_user_data(aCtx) ? PR_TRUE : PR_FALSE;
399 if (toUpper)
400 ::ToUpperCase(data);
401 else
402 ::ToLowerCase(data);
404 // Set the result.
405 ::sqlite3_result_text16(aCtx, data.get(), -1, SQLITE_TRANSIENT);
409 * This implements the like() SQL function. This is used by the LIKE operator.
410 * The SQL statement 'A LIKE B' is implemented as 'like(B, A)', and if there is
411 * an escape character, say E, it is implemented as 'like(B, A, E)'.
413 void
414 likeFunction(sqlite3_context *aCtx,
415 int aArgc,
416 sqlite3_value **aArgv)
418 NS_ASSERTION(2 == aArgc || 3 == aArgc, "Invalid number of arguments!");
420 if (::sqlite3_value_bytes(aArgv[0]) > SQLITE_MAX_LIKE_PATTERN_LENGTH) {
421 ::sqlite3_result_error(aCtx, "LIKE or GLOB pattern too complex",
422 SQLITE_TOOBIG);
423 return;
426 if (!::sqlite3_value_text16(aArgv[0]) || !::sqlite3_value_text16(aArgv[1]))
427 return;
429 nsDependentString A(static_cast<const PRUnichar *>(::sqlite3_value_text16(aArgv[1])));
430 nsDependentString B(static_cast<const PRUnichar *>(::sqlite3_value_text16(aArgv[0])));
431 NS_ASSERTION(!B.IsEmpty(), "LIKE string must not be null!");
433 PRUnichar E = 0;
434 if (3 == aArgc)
435 E = static_cast<const PRUnichar *>(::sqlite3_value_text16(aArgv[2]))[0];
437 nsAString::const_iterator itrString, endString;
438 A.BeginReading(itrString);
439 A.EndReading(endString);
440 nsAString::const_iterator itrPattern, endPattern;
441 B.BeginReading(itrPattern);
442 B.EndReading(endPattern);
443 ::sqlite3_result_int(aCtx, likeCompare(itrPattern, endPattern, itrString,
444 endString, E));
447 void levenshteinDistanceFunction(sqlite3_context *aCtx,
448 int aArgc,
449 sqlite3_value **aArgv)
451 NS_ASSERTION(2 == aArgc, "Invalid number of arguments!");
453 // If either argument is a SQL NULL, then return SQL NULL.
454 if (::sqlite3_value_type(aArgv[0]) == SQLITE_NULL ||
455 ::sqlite3_value_type(aArgv[1]) == SQLITE_NULL) {
456 ::sqlite3_result_null(aCtx);
457 return;
460 int aLen = ::sqlite3_value_bytes16(aArgv[0]) / sizeof(PRUnichar);
461 const PRUnichar *a = static_cast<const PRUnichar *>(::sqlite3_value_text16(aArgv[0]));
463 int bLen = ::sqlite3_value_bytes16(aArgv[1]) / sizeof(PRUnichar);
464 const PRUnichar *b = static_cast<const PRUnichar *>(::sqlite3_value_text16(aArgv[1]));
466 // Compute the Levenshtein Distance, and return the result (or error).
467 int distance = -1;
468 const nsDependentString A(a, aLen);
469 const nsDependentString B(b, bLen);
470 int status = levenshteinDistance(A, B, &distance);
471 if (status == SQLITE_OK) {
472 ::sqlite3_result_int(aCtx, distance);
474 else if (status == SQLITE_NOMEM) {
475 ::sqlite3_result_error_nomem(aCtx);
477 else {
478 ::sqlite3_result_error(aCtx, "User function returned error code", -1);
482 } // namespace storage
483 } // namespace mozilla