From c10fdab65db31447ff70523649e420124d7e94b0 Mon Sep 17 00:00:00 2001
From: Dan Kennedy <danielk1977@gmail.com>
Date: Fri, 6 Oct 2017 18:00:36 +0000
Subject: [PATCH] Add tests for the example fts3 "rank" function that appears
 in the documentation.

---
 src/test_func.c    | 118 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 test/fts3rank.test |  64 +++++++++++++++++++++++++++++
 2 files changed, 182 insertions(+)
 create mode 100644 test/fts3rank.test
diff --git a/src/test_func.c b/src/test_func.c
index c7860fe887..59fe677c33 100644
--- a/src/test_func.c
+++ b/src/test_func.c
@@ -792,6 +792,123 @@ abuse_err:
 
 
 /*
+** SQLite user defined function to use with matchinfo() to calculate the
+** relevancy of an FTS match. The value returned is the relevancy score
+** (a real value greater than or equal to zero). A larger value indicates 
+** a more relevant document.
+**
+** The overall relevancy returned is the sum of the relevancies of each 
+** column value in the FTS table. The relevancy of a column value is the
+** sum of the following for each reportable phrase in the FTS query:
+**
+**   (<hit count> / <global hit count>) * <column weight>
+**
+** where <hit count> is the number of instances of the phrase in the
+** column value of the current row and <global hit count> is the number
+** of instances of the phrase in the same column of all rows in the FTS
+** table. The <column weight> is a weighting factor assigned to each
+** column by the caller (see below).
+**
+** The first argument to this function must be the return value of the FTS 
+** matchinfo() function. Following this must be one argument for each column 
+** of the FTS table containing a numeric weight factor for the corresponding 
+** column. Example:
+**
+**     CREATE VIRTUAL TABLE documents USING fts3(title, content)
+**
+** The following query returns the docids of documents that match the full-text
+** query <query> sorted from most to least relevant. When calculating
+** relevance, query term instances in the 'title' column are given twice the
+** weighting of those in the 'content' column.
+**
+**     SELECT docid FROM documents 
+**     WHERE documents MATCH <query> 
+**     ORDER BY rank(matchinfo(documents), 1.0, 0.5) DESC
+*/
+static void rankfunc(sqlite3_context *pCtx, int nVal, sqlite3_value **apVal){
+  int *aMatchinfo;                /* Return value of matchinfo() */
+  int nMatchinfo;                 /* Number of elements in aMatchinfo[] */
+  int nCol = 0;                   /* Number of columns in the table */
+  int nPhrase = 0;                /* Number of phrases in the query */
+  int iPhrase;                    /* Current phrase */
+  double score = 0.0;             /* Value to return */
+
+  assert( sizeof(int)==4 );
+
+  /* Check that the number of arguments passed to this function is correct.
+  ** If not, jump to wrong_number_args. Set aMatchinfo to point to the array
+  ** of unsigned integer values returned by FTS function matchinfo. Set
+  ** nPhrase to contain the number of reportable phrases in the users full-text
+  ** query, and nCol to the number of columns in the table. Then check that the
+  ** size of the matchinfo blob is as expected. Return an error if it is not.
+  */
+  if( nVal<1 ) goto wrong_number_args;
+  aMatchinfo = (unsigned int *)sqlite3_value_blob(apVal[0]);
+  nMatchinfo = sqlite3_value_bytes(apVal[0]) / sizeof(int);
+  if( nMatchinfo>=2 ){
+    nPhrase = aMatchinfo[0];
+    nCol = aMatchinfo[1];
+  }
+  if( nMatchinfo!=(2+3*nCol*nPhrase) ){
+    sqlite3_result_error(pCtx,
+        "invalid matchinfo blob passed to function rank()", -1);
+    return;
+  }
+  if( nVal!=(1+nCol) ) goto wrong_number_args;
+
+  /* Iterate through each phrase in the users query. */
+  for(iPhrase=0; iPhrase<nPhrase; iPhrase++){
+    int iCol;                     /* Current column */
+
+    /* Now iterate through each column in the users query. For each column,
+    ** increment the relevancy score by:
+    **
+    **   (<hit count> / <global hit count>) * <column weight>
+    **
+    ** aPhraseinfo[] points to the start of the data for phrase iPhrase. So
+    ** the hit count and global hit counts for each column are found in 
+    ** aPhraseinfo[iCol*3] and aPhraseinfo[iCol*3+1], respectively.
+    */
+    int *aPhraseinfo = &aMatchinfo[2 + iPhrase*nCol*3];
+    for(iCol=0; iCol<nCol; iCol++){
+      int nHitCount = aPhraseinfo[3*iCol];
+      int nGlobalHitCount = aPhraseinfo[3*iCol+1];
+      double weight = sqlite3_value_double(apVal[iCol+1]);
+      if( nHitCount>0 ){
+        score += ((double)nHitCount / (double)nGlobalHitCount) * weight;
+      }
+    }
+  }
+
+  sqlite3_result_double(pCtx, score);
+  return;
+
+  /* Jump here if the wrong number of arguments are passed to this function */
+wrong_number_args:
+  sqlite3_result_error(pCtx, "wrong number of arguments to function rank()", -1);
+}
+
+static int SQLITE_TCLAPI install_fts3_rank_function(
+  void * clientData,
+  Tcl_Interp *interp,
+  int objc,
+  Tcl_Obj *CONST objv[]
+){
+  extern int getDbPointer(Tcl_Interp*, const char*, sqlite3**);
+  sqlite3 *db;
+
+  if( objc!=2 ){
+    Tcl_WrongNumArgs(interp, 1, objv, "DB");
+    return TCL_ERROR;
+  }
+
+  if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ) return TCL_ERROR;
+  sqlite3_create_function(db, "rank", -1, SQLITE_UTF8, 0, rankfunc, 0, 0);
+  return TCL_OK;
+}
+
+
+/*
 ** Register commands with the TCL interpreter.
 */
 int Sqlitetest_func_Init(Tcl_Interp *interp){
@@ -801,6 +918,7 @@ int Sqlitetest_func_Init(Tcl_Interp *interp){
   } aObjCmd[] = {
      { "autoinstall_test_functions",    autoinstall_test_funcs },
      { "abuse_create_function",         abuse_create_function  },
+     { "install_fts3_rank_function",    install_fts3_rank_function  },
   };
   int i;
   extern int Md5_Register(sqlite3 *, char **, const sqlite3_api_routines *);
diff --git a/test/fts3rank.test b/test/fts3rank.test
new file mode 100644
index 0000000000..7ee3143a76
--- /dev/null
+++ b/test/fts3rank.test
@@ -0,0 +1,64 @@
+# 2017 October 7
+#
+# The author disclaims copyright to this source code.  In place of
+# a legal notice, here is a blessing:
+#
+#    May you do good and not evil.
+#    May you find forgiveness for yourself and forgive others.
+#    May you share freely, never taking more than you give.
+#
+#*************************************************************************
+# This file implements regression tests for SQLite library.  The
+# focus of this script is testing the FTS3 module.
+#
+
+set testdir [file dirname $argv0]
+source $testdir/tester.tcl
+set testprefix fts3expr5
+
+# If SQLITE_ENABLE_FTS3 is defined, omit this file.
+ifcapable !fts3 {
+  finish_test
+  return
+}
+
+install_fts3_rank_function db
+do_execsql_test 1.0 {
+  CREATE VIRTUAL TABLE t1 USING fts3(a, b);
+  INSERT INTO t1 VALUES('one two', 'one');
+  INSERT INTO t1 VALUES('one two', 'three');
+  INSERT INTO t1 VALUES('one two', 'two');
+}
+
+do_execsql_test 1.1 {
+  SELECT * FROM t1 WHERE t1 MATCH 'one' 
+  ORDER BY rank(matchinfo(t1), 1.0, 1.0) DESC, rowid
+} {
+  {one two} one
+  {one two} three
+  {one two} two
+}
+
+do_execsql_test 1.2 {
+  SELECT * FROM t1 WHERE t1 MATCH 'two' 
+  ORDER BY rank(matchinfo(t1), 1.0, 1.0) DESC, rowid
+} {
+  {one two} two
+  {one two} one
+  {one two} three
+}
+
+do_catchsql_test 1.3 {
+  SELECT * FROM t1 ORDER BY rank(matchinfo(t1), 1.0, 1.0) DESC, rowid
+} {1 {invalid matchinfo blob passed to function rank()}}
+
+do_catchsql_test 1.4 {
+  SELECT * FROM t1 ORDER BY rank(x'0000000000000000') DESC, rowid
+} {0 {{one two} one {one two} three {one two} two}}
+
+do_catchsql_test 1.5 {
+  SELECT * FROM t1 ORDER BY rank(x'0100000001000000') DESC, rowid
+} {1 {invalid matchinfo blob passed to function rank()}}
+
+finish_test
+
-- 
2.11.4.GIT