1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: sw=2 ts=2 et lcs=trail\:.,tab\:>~ :
3 * ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
16 * The Original Code is unicode functions code.
18 * The Initial Developer of the Original Code is
19 * Mozilla Corporation.
20 * Portions created by the Initial Developer are Copyright (C) 2007
21 * the Initial Developer. All Rights Reserved.
23 * This code is based off of icu.c from the sqlite code
24 * whose original author is danielk1977
27 * Shawn Wilsher <me@shawnwilsher.com> (Original Author)
29 * Alternatively, the contents of this file may be used under the terms of
30 * either the GNU General Public License Version 2 or later (the "GPL"), or
31 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
32 * in which case the provisions of the GPL or the LGPL are applicable instead
33 * of those above. If you wish to allow use of your version of this file only
34 * under the terms of either the GPL or the LGPL, and not to allow others to
35 * use your version of this file under the terms of the MPL, indicate your
36 * decision by deleting the provisions above and replace them with the notice
37 * and other provisions required by the GPL or the LGPL. If you do not delete
38 * the provisions above, a recipient may use your version of this file under
39 * the terms of any one of the MPL, the GPL or the LGPL.
41 * ***** END LICENSE BLOCK ***** */
43 #include "mozStorageSQLFunctions.h"
44 #include "nsUnicharUtils.h"
49 ////////////////////////////////////////////////////////////////////////////////
50 //// Local Helper Functions
55 * Performs the LIKE comparison of a string against a pattern. For more detail
56 * see http://www.sqlite.org/lang_expr.html#like.
59 * An iterator at the start of the pattern to check for.
61 * An iterator at the end of the pattern to check for.
63 * An iterator at the start of the string to check for the pattern.
65 * An iterator at the end of the string to check for the pattern.
67 * The character to use for escaping symbols in the pattern.
68 * @return 1 if the pattern is found, 0 otherwise.
71 likeCompare(nsAString::const_iterator aPatternItr
,
72 nsAString::const_iterator aPatternEnd
,
73 nsAString::const_iterator aStringItr
,
74 nsAString::const_iterator aStringEnd
,
75 PRUnichar aEscapeChar
)
77 const PRUnichar
MATCH_ALL('%');
78 const PRUnichar
MATCH_ONE('_');
80 PRBool lastWasEscape
= PR_FALSE
;
81 while (aPatternItr
!= aPatternEnd
) {
83 * What we do in here is take a look at each character from the input
84 * pattern, and do something with it. There are 4 possibilities:
85 * 1) character is an un-escaped match-all character
86 * 2) character is an un-escaped match-one character
87 * 3) character is an un-escaped escape character
88 * 4) character is not any of the above
90 if (!lastWasEscape
&& *aPatternItr
== MATCH_ALL
) {
93 * Now we need to skip any MATCH_ALL or MATCH_ONE characters that follow a
94 * MATCH_ALL character. For each MATCH_ONE character, skip one character
95 * in the pattern string.
97 while (*aPatternItr
== MATCH_ALL
|| *aPatternItr
== MATCH_ONE
) {
98 if (*aPatternItr
== MATCH_ONE
) {
99 // If we've hit the end of the string we are testing, no match
100 if (aStringItr
== aStringEnd
)
107 // If we've hit the end of the pattern string, match
108 if (aPatternItr
== aPatternEnd
)
111 while (aStringItr
!= aStringEnd
) {
112 if (likeCompare(aPatternItr
, aPatternEnd
, aStringItr
, aStringEnd
,
114 // we've hit a match, so indicate this
123 else if (!lastWasEscape
&& *aPatternItr
== MATCH_ONE
) {
125 if (aStringItr
== aStringEnd
) {
126 // If we've hit the end of the string we are testing, no match
130 lastWasEscape
= PR_FALSE
;
132 else if (!lastWasEscape
&& *aPatternItr
== aEscapeChar
) {
134 lastWasEscape
= PR_TRUE
;
138 if (::ToUpperCase(*aStringItr
) != ::ToUpperCase(*aPatternItr
)) {
139 // If we've hit a point where the strings don't match, there is no match
143 lastWasEscape
= PR_FALSE
;
149 return aStringItr
== aStringEnd
;
153 * This class manages a dynamic array. It can represent an array of any
154 * reasonable size, but if the array is "N" elements or smaller, it will be
155 * stored using fixed space inside the auto array itself. If the auto array
156 * is a local variable, this internal storage will be allocated cheaply on the
157 * stack, similar to nsAutoString. If a larger size is requested, the memory
158 * will be dynamically allocated from the heap. Since the destructor will
159 * free any heap-allocated memory, client code doesn't need to care where the
162 template <class T
, size_t N
> class AutoArray
167 AutoArray(size_t size
)
168 : mBuffer(size
<= N
? mAutoBuffer
: new T
[size
])
174 if (mBuffer
!= mAutoBuffer
)
179 * Return the pointer to the allocated array.
180 * @note If the array allocation failed, get() will return NULL!
182 * @return the pointer to the allocated array
190 T
*mBuffer
; // Points to mAutoBuffer if we can use it, heap otherwise.
191 T mAutoBuffer
[N
]; // The internal memory buffer that we use if we can.
195 * Compute the Levenshtein Edit Distance between two strings.
202 * an outparam that will receive the edit distance between the arguments
203 * @return a Sqlite result code, e.g. SQLITE_OK, SQLITE_NOMEM, etc.
206 levenshteinDistance(const nsAString
&aStringS
,
207 const nsAString
&aStringT
,
210 // Set the result to a non-sensical value in case we encounter an error.
213 const PRUint32 sLen
= aStringS
.Length();
214 const PRUint32 tLen
= aStringT
.Length();
225 // Notionally, Levenshtein Distance is computed in a matrix. If we
226 // assume s = "span" and t = "spam", the matrix would look like this:
235 // Note that the row width is sLen + 1 and the column height is tLen + 1,
236 // where sLen is the length of the string "s" and tLen is the length of "t".
237 // The first row and the first column are initialized as shown, and
238 // the algorithm computes the remaining cells row-by-row, and
239 // left-to-right within each row. The computation only requires that
240 // we be able to see the current row and the previous one.
242 // Allocate memory for two rows. Use AutoArray's to manage the memory
243 // so we don't have to explicitly free it, and so we can avoid the expense
244 // of memory allocations for relatively small strings.
245 AutoArray
<int, nsAutoString::kDefaultStorageSize
> row1(sLen
+ 1);
246 AutoArray
<int, nsAutoString::kDefaultStorageSize
> row2(sLen
+ 1);
248 // Declare the raw pointers that will actually be used to access the memory.
249 int *prevRow
= row1
.get();
250 NS_ENSURE_TRUE(prevRow
, SQLITE_NOMEM
);
251 int *currRow
= row2
.get();
252 NS_ENSURE_TRUE(currRow
, SQLITE_NOMEM
);
254 // Initialize the first row.
255 for (PRUint32 i
= 0; i
<= sLen
; i
++)
258 const PRUnichar
*s
= aStringS
.BeginReading();
259 const PRUnichar
*t
= aStringT
.BeginReading();
261 // Compute the empty cells in the "matrix" row-by-row, starting with
263 for (PRUint32 ti
= 1; ti
<= tLen
; ti
++) {
265 // Initialize the first cell in this row.
268 // Get the character from "t" that corresponds to this row.
269 const PRUnichar tch
= t
[ti
- 1];
271 // Compute the remaining cells in this row, left-to-right,
272 // starting at the second column (and first character of "s").
273 for (PRUint32 si
= 1; si
<= sLen
; si
++) {
275 // Get the character from "s" that corresponds to this column,
276 // compare it to the t-character, and compute the "cost".
277 const PRUnichar sch
= s
[si
- 1];
278 int cost
= (sch
== tch
) ? 0 : 1;
280 // ............ We want to calculate the value of cell "d" from
281 // ...ab....... the previously calculated (or initialized) cells
282 // ...cd....... "a", "b", and "c", where d = min(a', b', c').
284 int aPrime
= prevRow
[si
- 1] + cost
;
285 int bPrime
= prevRow
[si
] + 1;
286 int cPrime
= currRow
[si
- 1] + 1;
287 currRow
[si
] = NS_MIN(aPrime
, NS_MIN(bPrime
, cPrime
));
290 // Advance to the next row. The current row becomes the previous
291 // row and we recycle the old previous row as the new current row.
292 // We don't need to re-initialize the new current row since we will
293 // rewrite all of its cells anyway.
294 int *oldPrevRow
= prevRow
;
296 currRow
= oldPrevRow
;
299 // The final result is the value of the last cell in the last row.
300 // Note that that's now in the "previous" row, since we just swapped them.
301 *_result
= prevRow
[sLen
];
305 } // anonymous namespace
307 ////////////////////////////////////////////////////////////////////////////////
308 //// Exposed Functions
311 registerFunctions(sqlite3
*aDB
)
318 void (*xFunc
)(::sqlite3_context
*, int, sqlite3_value
**);
321 Functions functions
[] = {
364 {"levenshteinDistance",
368 levenshteinDistanceFunction
},
369 {"levenshteinDistance",
373 levenshteinDistanceFunction
},
377 for (size_t i
= 0; SQLITE_OK
== rv
&& i
< NS_ARRAY_LENGTH(functions
); ++i
) {
378 struct Functions
*p
= &functions
[i
];
379 rv
= ::sqlite3_create_function(aDB
, p
->zName
, p
->nArg
, p
->enc
, p
->pContext
,
380 p
->xFunc
, NULL
, NULL
);
386 ////////////////////////////////////////////////////////////////////////////////
390 caseFunction(sqlite3_context
*aCtx
,
392 sqlite3_value
**aArgv
)
394 NS_ASSERTION(1 == aArgc
, "Invalid number of arguments!");
396 nsAutoString
data(static_cast<const PRUnichar
*>(::sqlite3_value_text16(aArgv
[0])));
397 PRBool toUpper
= ::sqlite3_user_data(aCtx
) ? PR_TRUE
: PR_FALSE
;
405 ::sqlite3_result_text16(aCtx
, data
.get(), -1, SQLITE_TRANSIENT
);
409 * This implements the like() SQL function. This is used by the LIKE operator.
410 * The SQL statement 'A LIKE B' is implemented as 'like(B, A)', and if there is
411 * an escape character, say E, it is implemented as 'like(B, A, E)'.
414 likeFunction(sqlite3_context
*aCtx
,
416 sqlite3_value
**aArgv
)
418 NS_ASSERTION(2 == aArgc
|| 3 == aArgc
, "Invalid number of arguments!");
420 if (::sqlite3_value_bytes(aArgv
[0]) > SQLITE_MAX_LIKE_PATTERN_LENGTH
) {
421 ::sqlite3_result_error(aCtx
, "LIKE or GLOB pattern too complex",
426 if (!::sqlite3_value_text16(aArgv
[0]) || !::sqlite3_value_text16(aArgv
[1]))
429 nsDependentString
A(static_cast<const PRUnichar
*>(::sqlite3_value_text16(aArgv
[1])));
430 nsDependentString
B(static_cast<const PRUnichar
*>(::sqlite3_value_text16(aArgv
[0])));
431 NS_ASSERTION(!B
.IsEmpty(), "LIKE string must not be null!");
435 E
= static_cast<const PRUnichar
*>(::sqlite3_value_text16(aArgv
[2]))[0];
437 nsAString::const_iterator itrString
, endString
;
438 A
.BeginReading(itrString
);
439 A
.EndReading(endString
);
440 nsAString::const_iterator itrPattern
, endPattern
;
441 B
.BeginReading(itrPattern
);
442 B
.EndReading(endPattern
);
443 ::sqlite3_result_int(aCtx
, likeCompare(itrPattern
, endPattern
, itrString
,
447 void levenshteinDistanceFunction(sqlite3_context
*aCtx
,
449 sqlite3_value
**aArgv
)
451 NS_ASSERTION(2 == aArgc
, "Invalid number of arguments!");
453 // If either argument is a SQL NULL, then return SQL NULL.
454 if (::sqlite3_value_type(aArgv
[0]) == SQLITE_NULL
||
455 ::sqlite3_value_type(aArgv
[1]) == SQLITE_NULL
) {
456 ::sqlite3_result_null(aCtx
);
460 int aLen
= ::sqlite3_value_bytes16(aArgv
[0]) / sizeof(PRUnichar
);
461 const PRUnichar
*a
= static_cast<const PRUnichar
*>(::sqlite3_value_text16(aArgv
[0]));
463 int bLen
= ::sqlite3_value_bytes16(aArgv
[1]) / sizeof(PRUnichar
);
464 const PRUnichar
*b
= static_cast<const PRUnichar
*>(::sqlite3_value_text16(aArgv
[1]));
466 // Compute the Levenshtein Distance, and return the result (or error).
468 const nsDependentString
A(a
, aLen
);
469 const nsDependentString
B(b
, bLen
);
470 int status
= levenshteinDistance(A
, B
, &distance
);
471 if (status
== SQLITE_OK
) {
472 ::sqlite3_result_int(aCtx
, distance
);
474 else if (status
== SQLITE_NOMEM
) {
475 ::sqlite3_result_error_nomem(aCtx
);
478 ::sqlite3_result_error(aCtx
, "User function returned error code", -1);
482 } // namespace storage
483 } // namespace mozilla