mirror of
https://github.com/sqlite/sqlite.git
synced 2024-11-28 16:09:31 +01:00
0218424e5a
FossilOrigin-Name: c51dc2a5e75baacbd905cf314e7b1a58a81993ff05ca656739e028d7db25d5b2
1115 lines
30 KiB
C
1115 lines
30 KiB
C
/*
|
|
** 2014 Jun 09
|
|
**
|
|
** The author disclaims copyright to this source code. In place of
|
|
** a legal notice, here is a blessing:
|
|
**
|
|
** May you do good and not evil.
|
|
** May you find forgiveness for yourself and forgive others.
|
|
** May you share freely, never taking more than you give.
|
|
**
|
|
******************************************************************************
|
|
**
|
|
** This is an SQLite module implementing full-text search.
|
|
*/
|
|
|
|
|
|
#include "fts5Int.h"
|
|
|
|
#define FTS5_DEFAULT_PAGE_SIZE 4050
|
|
#define FTS5_DEFAULT_AUTOMERGE 4
|
|
#define FTS5_DEFAULT_USERMERGE 4
|
|
#define FTS5_DEFAULT_CRISISMERGE 16
|
|
#define FTS5_DEFAULT_HASHSIZE (1024*1024)
|
|
|
|
#define FTS5_DEFAULT_DELETE_AUTOMERGE 10 /* default 10% */
|
|
|
|
/* Maximum allowed page size */
|
|
#define FTS5_MAX_PAGE_SIZE (64*1024)
|
|
|
|
static int fts5_iswhitespace(char x){
|
|
return (x==' ');
|
|
}
|
|
|
|
static int fts5_isopenquote(char x){
|
|
return (x=='"' || x=='\'' || x=='[' || x=='`');
|
|
}
|
|
|
|
/*
|
|
** Argument pIn points to a character that is part of a nul-terminated
|
|
** string. Return a pointer to the first character following *pIn in
|
|
** the string that is not a white-space character.
|
|
*/
|
|
static const char *fts5ConfigSkipWhitespace(const char *pIn){
|
|
const char *p = pIn;
|
|
if( p ){
|
|
while( fts5_iswhitespace(*p) ){ p++; }
|
|
}
|
|
return p;
|
|
}
|
|
|
|
/*
|
|
** Argument pIn points to a character that is part of a nul-terminated
|
|
** string. Return a pointer to the first character following *pIn in
|
|
** the string that is not a "bareword" character.
|
|
*/
|
|
static const char *fts5ConfigSkipBareword(const char *pIn){
|
|
const char *p = pIn;
|
|
while ( sqlite3Fts5IsBareword(*p) ) p++;
|
|
if( p==pIn ) p = 0;
|
|
return p;
|
|
}
|
|
|
|
static int fts5_isdigit(char a){
|
|
return (a>='0' && a<='9');
|
|
}
|
|
|
|
|
|
|
|
static const char *fts5ConfigSkipLiteral(const char *pIn){
|
|
const char *p = pIn;
|
|
switch( *p ){
|
|
case 'n': case 'N':
|
|
if( sqlite3_strnicmp("null", p, 4)==0 ){
|
|
p = &p[4];
|
|
}else{
|
|
p = 0;
|
|
}
|
|
break;
|
|
|
|
case 'x': case 'X':
|
|
p++;
|
|
if( *p=='\'' ){
|
|
p++;
|
|
while( (*p>='a' && *p<='f')
|
|
|| (*p>='A' && *p<='F')
|
|
|| (*p>='0' && *p<='9')
|
|
){
|
|
p++;
|
|
}
|
|
if( *p=='\'' && 0==((p-pIn)%2) ){
|
|
p++;
|
|
}else{
|
|
p = 0;
|
|
}
|
|
}else{
|
|
p = 0;
|
|
}
|
|
break;
|
|
|
|
case '\'':
|
|
p++;
|
|
while( p ){
|
|
if( *p=='\'' ){
|
|
p++;
|
|
if( *p!='\'' ) break;
|
|
}
|
|
p++;
|
|
if( *p==0 ) p = 0;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
/* maybe a number */
|
|
if( *p=='+' || *p=='-' ) p++;
|
|
while( fts5_isdigit(*p) ) p++;
|
|
|
|
/* At this point, if the literal was an integer, the parse is
|
|
** finished. Or, if it is a floating point value, it may continue
|
|
** with either a decimal point or an 'E' character. */
|
|
if( *p=='.' && fts5_isdigit(p[1]) ){
|
|
p += 2;
|
|
while( fts5_isdigit(*p) ) p++;
|
|
}
|
|
if( p==pIn ) p = 0;
|
|
|
|
break;
|
|
}
|
|
|
|
return p;
|
|
}
|
|
|
|
/*
|
|
** The first character of the string pointed to by argument z is guaranteed
|
|
** to be an open-quote character (see function fts5_isopenquote()).
|
|
**
|
|
** This function searches for the corresponding close-quote character within
|
|
** the string and, if found, dequotes the string in place and adds a new
|
|
** nul-terminator byte.
|
|
**
|
|
** If the close-quote is found, the value returned is the byte offset of
|
|
** the character immediately following it. Or, if the close-quote is not
|
|
** found, -1 is returned. If -1 is returned, the buffer is left in an
|
|
** undefined state.
|
|
*/
|
|
static int fts5Dequote(char *z){
|
|
char q;
|
|
int iIn = 1;
|
|
int iOut = 0;
|
|
q = z[0];
|
|
|
|
/* Set stack variable q to the close-quote character */
|
|
assert( q=='[' || q=='\'' || q=='"' || q=='`' );
|
|
if( q=='[' ) q = ']';
|
|
|
|
while( z[iIn] ){
|
|
if( z[iIn]==q ){
|
|
if( z[iIn+1]!=q ){
|
|
/* Character iIn was the close quote. */
|
|
iIn++;
|
|
break;
|
|
}else{
|
|
/* Character iIn and iIn+1 form an escaped quote character. Skip
|
|
** the input cursor past both and copy a single quote character
|
|
** to the output buffer. */
|
|
iIn += 2;
|
|
z[iOut++] = q;
|
|
}
|
|
}else{
|
|
z[iOut++] = z[iIn++];
|
|
}
|
|
}
|
|
|
|
z[iOut] = '\0';
|
|
return iIn;
|
|
}
|
|
|
|
/*
|
|
** Convert an SQL-style quoted string into a normal string by removing
|
|
** the quote characters. The conversion is done in-place. If the
|
|
** input does not begin with a quote character, then this routine
|
|
** is a no-op.
|
|
**
|
|
** Examples:
|
|
**
|
|
** "abc" becomes abc
|
|
** 'xyz' becomes xyz
|
|
** [pqr] becomes pqr
|
|
** `mno` becomes mno
|
|
*/
|
|
void sqlite3Fts5Dequote(char *z){
|
|
char quote; /* Quote character (if any ) */
|
|
|
|
assert( 0==fts5_iswhitespace(z[0]) );
|
|
quote = z[0];
|
|
if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){
|
|
fts5Dequote(z);
|
|
}
|
|
}
|
|
|
|
|
|
struct Fts5Enum {
|
|
const char *zName;
|
|
int eVal;
|
|
};
|
|
typedef struct Fts5Enum Fts5Enum;
|
|
|
|
static int fts5ConfigSetEnum(
|
|
const Fts5Enum *aEnum,
|
|
const char *zEnum,
|
|
int *peVal
|
|
){
|
|
int nEnum = (int)strlen(zEnum);
|
|
int i;
|
|
int iVal = -1;
|
|
|
|
for(i=0; aEnum[i].zName; i++){
|
|
if( sqlite3_strnicmp(aEnum[i].zName, zEnum, nEnum)==0 ){
|
|
if( iVal>=0 ) return SQLITE_ERROR;
|
|
iVal = aEnum[i].eVal;
|
|
}
|
|
}
|
|
|
|
*peVal = iVal;
|
|
return iVal<0 ? SQLITE_ERROR : SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Parse a "special" CREATE VIRTUAL TABLE directive and update
|
|
** configuration object pConfig as appropriate.
|
|
**
|
|
** If successful, object pConfig is updated and SQLITE_OK returned. If
|
|
** an error occurs, an SQLite error code is returned and an error message
|
|
** may be left in *pzErr. It is the responsibility of the caller to
|
|
** eventually free any such error message using sqlite3_free().
|
|
*/
|
|
static int fts5ConfigParseSpecial(
|
|
Fts5Config *pConfig, /* Configuration object to update */
|
|
const char *zCmd, /* Special command to parse */
|
|
const char *zArg, /* Argument to parse */
|
|
char **pzErr /* OUT: Error message */
|
|
){
|
|
int rc = SQLITE_OK;
|
|
int nCmd = (int)strlen(zCmd);
|
|
|
|
if( sqlite3_strnicmp("prefix", zCmd, nCmd)==0 ){
|
|
const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES;
|
|
const char *p;
|
|
int bFirst = 1;
|
|
if( pConfig->aPrefix==0 ){
|
|
pConfig->aPrefix = sqlite3Fts5MallocZero(&rc, nByte);
|
|
if( rc ) return rc;
|
|
}
|
|
|
|
p = zArg;
|
|
while( 1 ){
|
|
int nPre = 0;
|
|
|
|
while( p[0]==' ' ) p++;
|
|
if( bFirst==0 && p[0]==',' ){
|
|
p++;
|
|
while( p[0]==' ' ) p++;
|
|
}else if( p[0]=='\0' ){
|
|
break;
|
|
}
|
|
if( p[0]<'0' || p[0]>'9' ){
|
|
*pzErr = sqlite3_mprintf("malformed prefix=... directive");
|
|
rc = SQLITE_ERROR;
|
|
break;
|
|
}
|
|
|
|
if( pConfig->nPrefix==FTS5_MAX_PREFIX_INDEXES ){
|
|
*pzErr = sqlite3_mprintf(
|
|
"too many prefix indexes (max %d)", FTS5_MAX_PREFIX_INDEXES
|
|
);
|
|
rc = SQLITE_ERROR;
|
|
break;
|
|
}
|
|
|
|
while( p[0]>='0' && p[0]<='9' && nPre<1000 ){
|
|
nPre = nPre*10 + (p[0] - '0');
|
|
p++;
|
|
}
|
|
|
|
if( nPre<=0 || nPre>=1000 ){
|
|
*pzErr = sqlite3_mprintf("prefix length out of range (max 999)");
|
|
rc = SQLITE_ERROR;
|
|
break;
|
|
}
|
|
|
|
pConfig->aPrefix[pConfig->nPrefix] = nPre;
|
|
pConfig->nPrefix++;
|
|
bFirst = 0;
|
|
}
|
|
assert( pConfig->nPrefix<=FTS5_MAX_PREFIX_INDEXES );
|
|
return rc;
|
|
}
|
|
|
|
if( sqlite3_strnicmp("tokenize", zCmd, nCmd)==0 ){
|
|
const char *p = (const char*)zArg;
|
|
sqlite3_int64 nArg = strlen(zArg) + 1;
|
|
char **azArg = sqlite3Fts5MallocZero(&rc, (sizeof(char*) + 2) * nArg);
|
|
|
|
if( azArg ){
|
|
char *pSpace = (char*)&azArg[nArg];
|
|
if( pConfig->t.azArg ){
|
|
*pzErr = sqlite3_mprintf("multiple tokenize=... directives");
|
|
rc = SQLITE_ERROR;
|
|
}else{
|
|
for(nArg=0; p && *p; nArg++){
|
|
const char *p2 = fts5ConfigSkipWhitespace(p);
|
|
if( *p2=='\'' ){
|
|
p = fts5ConfigSkipLiteral(p2);
|
|
}else{
|
|
p = fts5ConfigSkipBareword(p2);
|
|
}
|
|
if( p ){
|
|
memcpy(pSpace, p2, p-p2);
|
|
azArg[nArg] = pSpace;
|
|
sqlite3Fts5Dequote(pSpace);
|
|
pSpace += (p - p2) + 1;
|
|
p = fts5ConfigSkipWhitespace(p);
|
|
}
|
|
}
|
|
if( p==0 ){
|
|
*pzErr = sqlite3_mprintf("parse error in tokenize directive");
|
|
rc = SQLITE_ERROR;
|
|
}else{
|
|
pConfig->t.azArg = (const char**)azArg;
|
|
pConfig->t.nArg = nArg;
|
|
azArg = 0;
|
|
}
|
|
}
|
|
}
|
|
sqlite3_free(azArg);
|
|
|
|
return rc;
|
|
}
|
|
|
|
if( sqlite3_strnicmp("content", zCmd, nCmd)==0 ){
|
|
if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){
|
|
*pzErr = sqlite3_mprintf("multiple content=... directives");
|
|
rc = SQLITE_ERROR;
|
|
}else{
|
|
if( zArg[0] ){
|
|
pConfig->eContent = FTS5_CONTENT_EXTERNAL;
|
|
pConfig->zContent = sqlite3Fts5Mprintf(&rc, "%Q.%Q", pConfig->zDb,zArg);
|
|
}else{
|
|
pConfig->eContent = FTS5_CONTENT_NONE;
|
|
}
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
if( sqlite3_strnicmp("contentless_delete", zCmd, nCmd)==0 ){
|
|
if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
|
|
*pzErr = sqlite3_mprintf("malformed contentless_delete=... directive");
|
|
rc = SQLITE_ERROR;
|
|
}else{
|
|
pConfig->bContentlessDelete = (zArg[0]=='1');
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
if( sqlite3_strnicmp("contentless_unindexed", zCmd, nCmd)==0 ){
|
|
if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
|
|
*pzErr = sqlite3_mprintf("malformed contentless_delete=... directive");
|
|
rc = SQLITE_ERROR;
|
|
}else{
|
|
pConfig->bContentlessUnindexed = (zArg[0]=='1');
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
if( sqlite3_strnicmp("content_rowid", zCmd, nCmd)==0 ){
|
|
if( pConfig->zContentRowid ){
|
|
*pzErr = sqlite3_mprintf("multiple content_rowid=... directives");
|
|
rc = SQLITE_ERROR;
|
|
}else{
|
|
pConfig->zContentRowid = sqlite3Fts5Strndup(&rc, zArg, -1);
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
if( sqlite3_strnicmp("columnsize", zCmd, nCmd)==0 ){
|
|
if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
|
|
*pzErr = sqlite3_mprintf("malformed columnsize=... directive");
|
|
rc = SQLITE_ERROR;
|
|
}else{
|
|
pConfig->bColumnsize = (zArg[0]=='1');
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
if( sqlite3_strnicmp("locale", zCmd, nCmd)==0 ){
|
|
if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
|
|
*pzErr = sqlite3_mprintf("malformed locale=... directive");
|
|
rc = SQLITE_ERROR;
|
|
}else{
|
|
pConfig->bLocale = (zArg[0]=='1');
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
if( sqlite3_strnicmp("detail", zCmd, nCmd)==0 ){
|
|
const Fts5Enum aDetail[] = {
|
|
{ "none", FTS5_DETAIL_NONE },
|
|
{ "full", FTS5_DETAIL_FULL },
|
|
{ "columns", FTS5_DETAIL_COLUMNS },
|
|
{ 0, 0 }
|
|
};
|
|
|
|
if( (rc = fts5ConfigSetEnum(aDetail, zArg, &pConfig->eDetail)) ){
|
|
*pzErr = sqlite3_mprintf("malformed detail=... directive");
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
if( sqlite3_strnicmp("tokendata", zCmd, nCmd)==0 ){
|
|
if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
|
|
*pzErr = sqlite3_mprintf("malformed tokendata=... directive");
|
|
rc = SQLITE_ERROR;
|
|
}else{
|
|
pConfig->bTokendata = (zArg[0]=='1');
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
*pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd);
|
|
return SQLITE_ERROR;
|
|
}
|
|
|
|
/*
|
|
** Gobble up the first bareword or quoted word from the input buffer zIn.
|
|
** Return a pointer to the character immediately following the last in
|
|
** the gobbled word if successful, or a NULL pointer otherwise (failed
|
|
** to find close-quote character).
|
|
**
|
|
** Before returning, set pzOut to point to a new buffer containing a
|
|
** nul-terminated, dequoted copy of the gobbled word. If the word was
|
|
** quoted, *pbQuoted is also set to 1 before returning.
|
|
**
|
|
** If *pRc is other than SQLITE_OK when this function is called, it is
|
|
** a no-op (NULL is returned). Otherwise, if an OOM occurs within this
|
|
** function, *pRc is set to SQLITE_NOMEM before returning. *pRc is *not*
|
|
** set if a parse error (failed to find close quote) occurs.
|
|
*/
|
|
static const char *fts5ConfigGobbleWord(
|
|
int *pRc, /* IN/OUT: Error code */
|
|
const char *zIn, /* Buffer to gobble string/bareword from */
|
|
char **pzOut, /* OUT: malloc'd buffer containing str/bw */
|
|
int *pbQuoted /* OUT: Set to true if dequoting required */
|
|
){
|
|
const char *zRet = 0;
|
|
|
|
sqlite3_int64 nIn = strlen(zIn);
|
|
char *zOut = sqlite3_malloc64(nIn+1);
|
|
|
|
assert( *pRc==SQLITE_OK );
|
|
*pbQuoted = 0;
|
|
*pzOut = 0;
|
|
|
|
if( zOut==0 ){
|
|
*pRc = SQLITE_NOMEM;
|
|
}else{
|
|
memcpy(zOut, zIn, (size_t)(nIn+1));
|
|
if( fts5_isopenquote(zOut[0]) ){
|
|
int ii = fts5Dequote(zOut);
|
|
zRet = &zIn[ii];
|
|
*pbQuoted = 1;
|
|
}else{
|
|
zRet = fts5ConfigSkipBareword(zIn);
|
|
if( zRet ){
|
|
zOut[zRet-zIn] = '\0';
|
|
}
|
|
}
|
|
}
|
|
|
|
if( zRet==0 ){
|
|
sqlite3_free(zOut);
|
|
}else{
|
|
*pzOut = zOut;
|
|
}
|
|
|
|
return zRet;
|
|
}
|
|
|
|
static int fts5ConfigParseColumn(
|
|
Fts5Config *p,
|
|
char *zCol,
|
|
char *zArg,
|
|
char **pzErr,
|
|
int *pbUnindexed
|
|
){
|
|
int rc = SQLITE_OK;
|
|
if( 0==sqlite3_stricmp(zCol, FTS5_RANK_NAME)
|
|
|| 0==sqlite3_stricmp(zCol, FTS5_ROWID_NAME)
|
|
){
|
|
*pzErr = sqlite3_mprintf("reserved fts5 column name: %s", zCol);
|
|
rc = SQLITE_ERROR;
|
|
}else if( zArg ){
|
|
if( 0==sqlite3_stricmp(zArg, "unindexed") ){
|
|
p->abUnindexed[p->nCol] = 1;
|
|
*pbUnindexed = 1;
|
|
}else{
|
|
*pzErr = sqlite3_mprintf("unrecognized column option: %s", zArg);
|
|
rc = SQLITE_ERROR;
|
|
}
|
|
}
|
|
|
|
p->azCol[p->nCol++] = zCol;
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Populate the Fts5Config.zContentExprlist string.
|
|
*/
|
|
static int fts5ConfigMakeExprlist(Fts5Config *p){
|
|
int i;
|
|
int rc = SQLITE_OK;
|
|
Fts5Buffer buf = {0, 0, 0};
|
|
|
|
sqlite3Fts5BufferAppendPrintf(&rc, &buf, "T.%Q", p->zContentRowid);
|
|
if( p->eContent!=FTS5_CONTENT_NONE ){
|
|
assert( p->eContent==FTS5_CONTENT_EXTERNAL
|
|
|| p->eContent==FTS5_CONTENT_NORMAL
|
|
|| p->eContent==FTS5_CONTENT_UNINDEXED
|
|
);
|
|
for(i=0; i<p->nCol; i++){
|
|
if( p->eContent==FTS5_CONTENT_EXTERNAL ){
|
|
sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.%Q", p->azCol[i]);
|
|
}else if( p->eContent==FTS5_CONTENT_NORMAL || p->abUnindexed[i] ){
|
|
sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.c%d", i);
|
|
}else{
|
|
sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", NULL");
|
|
}
|
|
}
|
|
}
|
|
if( p->eContent==FTS5_CONTENT_NORMAL && p->bLocale ){
|
|
for(i=0; i<p->nCol; i++){
|
|
if( p->abUnindexed[i]==0 ){
|
|
sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.l%d", i);
|
|
}else{
|
|
sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", NULL");
|
|
}
|
|
}
|
|
}
|
|
|
|
assert( p->zContentExprlist==0 );
|
|
p->zContentExprlist = (char*)buf.p;
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Arguments nArg/azArg contain the string arguments passed to the xCreate
|
|
** or xConnect method of the virtual table. This function attempts to
|
|
** allocate an instance of Fts5Config containing the results of parsing
|
|
** those arguments.
|
|
**
|
|
** If successful, SQLITE_OK is returned and *ppOut is set to point to the
|
|
** new Fts5Config object. If an error occurs, an SQLite error code is
|
|
** returned, *ppOut is set to NULL and an error message may be left in
|
|
** *pzErr. It is the responsibility of the caller to eventually free any
|
|
** such error message using sqlite3_free().
|
|
*/
|
|
int sqlite3Fts5ConfigParse(
|
|
Fts5Global *pGlobal,
|
|
sqlite3 *db,
|
|
int nArg, /* Number of arguments */
|
|
const char **azArg, /* Array of nArg CREATE VIRTUAL TABLE args */
|
|
Fts5Config **ppOut, /* OUT: Results of parse */
|
|
char **pzErr /* OUT: Error message */
|
|
){
|
|
int rc = SQLITE_OK; /* Return code */
|
|
Fts5Config *pRet; /* New object to return */
|
|
int i;
|
|
sqlite3_int64 nByte;
|
|
int bUnindexed = 0; /* True if there are one or more UNINDEXED */
|
|
|
|
*ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config));
|
|
if( pRet==0 ) return SQLITE_NOMEM;
|
|
memset(pRet, 0, sizeof(Fts5Config));
|
|
pRet->pGlobal = pGlobal;
|
|
pRet->db = db;
|
|
pRet->iCookie = -1;
|
|
|
|
nByte = nArg * (sizeof(char*) + sizeof(u8));
|
|
pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte);
|
|
pRet->abUnindexed = pRet->azCol ? (u8*)&pRet->azCol[nArg] : 0;
|
|
pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1);
|
|
pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1);
|
|
pRet->bColumnsize = 1;
|
|
pRet->eDetail = FTS5_DETAIL_FULL;
|
|
#ifdef SQLITE_DEBUG
|
|
pRet->bPrefixIndex = 1;
|
|
#endif
|
|
if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){
|
|
*pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName);
|
|
rc = SQLITE_ERROR;
|
|
}
|
|
|
|
assert( (pRet->abUnindexed && pRet->azCol) || rc!=SQLITE_OK );
|
|
for(i=3; rc==SQLITE_OK && i<nArg; i++){
|
|
const char *zOrig = azArg[i];
|
|
const char *z;
|
|
char *zOne = 0;
|
|
char *zTwo = 0;
|
|
int bOption = 0;
|
|
int bMustBeCol = 0;
|
|
|
|
z = fts5ConfigGobbleWord(&rc, zOrig, &zOne, &bMustBeCol);
|
|
z = fts5ConfigSkipWhitespace(z);
|
|
if( z && *z=='=' ){
|
|
bOption = 1;
|
|
assert( zOne!=0 );
|
|
z++;
|
|
if( bMustBeCol ) z = 0;
|
|
}
|
|
z = fts5ConfigSkipWhitespace(z);
|
|
if( z && z[0] ){
|
|
int bDummy;
|
|
z = fts5ConfigGobbleWord(&rc, z, &zTwo, &bDummy);
|
|
if( z && z[0] ) z = 0;
|
|
}
|
|
|
|
if( rc==SQLITE_OK ){
|
|
if( z==0 ){
|
|
*pzErr = sqlite3_mprintf("parse error in \"%s\"", zOrig);
|
|
rc = SQLITE_ERROR;
|
|
}else{
|
|
if( bOption ){
|
|
rc = fts5ConfigParseSpecial(pRet,
|
|
ALWAYS(zOne)?zOne:"",
|
|
zTwo?zTwo:"",
|
|
pzErr
|
|
);
|
|
}else{
|
|
rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr, &bUnindexed);
|
|
zOne = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
sqlite3_free(zOne);
|
|
sqlite3_free(zTwo);
|
|
}
|
|
|
|
/* We only allow contentless_delete=1 if the table is indeed contentless. */
|
|
if( rc==SQLITE_OK
|
|
&& pRet->bContentlessDelete
|
|
&& pRet->eContent!=FTS5_CONTENT_NONE
|
|
){
|
|
*pzErr = sqlite3_mprintf(
|
|
"contentless_delete=1 requires a contentless table"
|
|
);
|
|
rc = SQLITE_ERROR;
|
|
}
|
|
|
|
/* We only allow contentless_delete=1 if columnsize=0 is not present.
|
|
**
|
|
** This restriction may be removed at some point.
|
|
*/
|
|
if( rc==SQLITE_OK && pRet->bContentlessDelete && pRet->bColumnsize==0 ){
|
|
*pzErr = sqlite3_mprintf(
|
|
"contentless_delete=1 is incompatible with columnsize=0"
|
|
);
|
|
rc = SQLITE_ERROR;
|
|
}
|
|
|
|
/* We only allow contentless_unindexed=1 if the table is actually a
|
|
** contentless one.
|
|
*/
|
|
if( rc==SQLITE_OK
|
|
&& pRet->bContentlessUnindexed
|
|
&& pRet->eContent!=FTS5_CONTENT_NONE
|
|
){
|
|
*pzErr = sqlite3_mprintf(
|
|
"contentless_unindexed=1 requires a contentless table"
|
|
);
|
|
rc = SQLITE_ERROR;
|
|
}
|
|
|
|
/* If no zContent option was specified, fill in the default values. */
|
|
if( rc==SQLITE_OK && pRet->zContent==0 ){
|
|
const char *zTail = 0;
|
|
assert( pRet->eContent==FTS5_CONTENT_NORMAL
|
|
|| pRet->eContent==FTS5_CONTENT_NONE
|
|
);
|
|
if( pRet->eContent==FTS5_CONTENT_NORMAL ){
|
|
zTail = "content";
|
|
}else if( bUnindexed && pRet->bContentlessUnindexed ){
|
|
pRet->eContent = FTS5_CONTENT_UNINDEXED;
|
|
zTail = "content";
|
|
}else if( pRet->bColumnsize ){
|
|
zTail = "docsize";
|
|
}
|
|
|
|
if( zTail ){
|
|
pRet->zContent = sqlite3Fts5Mprintf(
|
|
&rc, "%Q.'%q_%s'", pRet->zDb, pRet->zName, zTail
|
|
);
|
|
}
|
|
}
|
|
|
|
if( rc==SQLITE_OK && pRet->zContentRowid==0 ){
|
|
pRet->zContentRowid = sqlite3Fts5Strndup(&rc, "rowid", -1);
|
|
}
|
|
|
|
/* Formulate the zContentExprlist text */
|
|
if( rc==SQLITE_OK ){
|
|
rc = fts5ConfigMakeExprlist(pRet);
|
|
}
|
|
|
|
if( rc!=SQLITE_OK ){
|
|
sqlite3Fts5ConfigFree(pRet);
|
|
*ppOut = 0;
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Free the configuration object passed as the only argument.
|
|
*/
|
|
void sqlite3Fts5ConfigFree(Fts5Config *pConfig){
|
|
if( pConfig ){
|
|
int i;
|
|
if( pConfig->t.pTok ){
|
|
if( pConfig->t.pApi1 ){
|
|
pConfig->t.pApi1->xDelete(pConfig->t.pTok);
|
|
}else{
|
|
pConfig->t.pApi2->xDelete(pConfig->t.pTok);
|
|
}
|
|
}
|
|
sqlite3_free((char*)pConfig->t.azArg);
|
|
sqlite3_free(pConfig->zDb);
|
|
sqlite3_free(pConfig->zName);
|
|
for(i=0; i<pConfig->nCol; i++){
|
|
sqlite3_free(pConfig->azCol[i]);
|
|
}
|
|
sqlite3_free(pConfig->azCol);
|
|
sqlite3_free(pConfig->aPrefix);
|
|
sqlite3_free(pConfig->zRank);
|
|
sqlite3_free(pConfig->zRankArgs);
|
|
sqlite3_free(pConfig->zContent);
|
|
sqlite3_free(pConfig->zContentRowid);
|
|
sqlite3_free(pConfig->zContentExprlist);
|
|
sqlite3_free(pConfig);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Call sqlite3_declare_vtab() based on the contents of the configuration
|
|
** object passed as the only argument. Return SQLITE_OK if successful, or
|
|
** an SQLite error code if an error occurs.
|
|
*/
|
|
int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){
|
|
int i;
|
|
int rc = SQLITE_OK;
|
|
char *zSql;
|
|
|
|
zSql = sqlite3Fts5Mprintf(&rc, "CREATE TABLE x(");
|
|
for(i=0; zSql && i<pConfig->nCol; i++){
|
|
const char *zSep = (i==0?"":", ");
|
|
zSql = sqlite3Fts5Mprintf(&rc, "%z%s%Q", zSql, zSep, pConfig->azCol[i]);
|
|
}
|
|
zSql = sqlite3Fts5Mprintf(&rc, "%z, %Q HIDDEN, %s HIDDEN)",
|
|
zSql, pConfig->zName, FTS5_RANK_NAME
|
|
);
|
|
|
|
assert( zSql || rc==SQLITE_NOMEM );
|
|
if( zSql ){
|
|
rc = sqlite3_declare_vtab(pConfig->db, zSql);
|
|
sqlite3_free(zSql);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Tokenize the text passed via the second and third arguments.
|
|
**
|
|
** The callback is invoked once for each token in the input text. The
|
|
** arguments passed to it are, in order:
|
|
**
|
|
** void *pCtx // Copy of 4th argument to sqlite3Fts5Tokenize()
|
|
** const char *pToken // Pointer to buffer containing token
|
|
** int nToken // Size of token in bytes
|
|
** int iStart // Byte offset of start of token within input text
|
|
** int iEnd // Byte offset of end of token within input text
|
|
** int iPos // Position of token in input (first token is 0)
|
|
**
|
|
** If the callback returns a non-zero value the tokenization is abandoned
|
|
** and no further callbacks are issued.
|
|
**
|
|
** This function returns SQLITE_OK if successful or an SQLite error code
|
|
** if an error occurs. If the tokenization was abandoned early because
|
|
** the callback returned SQLITE_DONE, this is not an error and this function
|
|
** still returns SQLITE_OK. Or, if the tokenization was abandoned early
|
|
** because the callback returned another non-zero value, it is assumed
|
|
** to be an SQLite error code and returned to the caller.
|
|
*/
|
|
int sqlite3Fts5Tokenize(
|
|
Fts5Config *pConfig, /* FTS5 Configuration object */
|
|
int flags, /* FTS5_TOKENIZE_* flags */
|
|
const char *pText, int nText, /* Text to tokenize */
|
|
void *pCtx, /* Context passed to xToken() */
|
|
int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
|
|
){
|
|
int rc = SQLITE_OK;
|
|
if( pText ){
|
|
if( pConfig->t.pTok==0 ){
|
|
rc = sqlite3Fts5LoadTokenizer(pConfig);
|
|
}
|
|
if( rc==SQLITE_OK ){
|
|
if( pConfig->t.pApi1 ){
|
|
rc = pConfig->t.pApi1->xTokenize(
|
|
pConfig->t.pTok, pCtx, flags, pText, nText, xToken
|
|
);
|
|
}else{
|
|
rc = pConfig->t.pApi2->xTokenize(pConfig->t.pTok, pCtx, flags,
|
|
pText, nText, pConfig->t.pLocale, pConfig->t.nLocale, xToken
|
|
);
|
|
}
|
|
}
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Argument pIn points to the first character in what is expected to be
|
|
** a comma-separated list of SQL literals followed by a ')' character.
|
|
** If it actually is this, return a pointer to the ')'. Otherwise, return
|
|
** NULL to indicate a parse error.
|
|
*/
|
|
static const char *fts5ConfigSkipArgs(const char *pIn){
|
|
const char *p = pIn;
|
|
|
|
while( 1 ){
|
|
p = fts5ConfigSkipWhitespace(p);
|
|
p = fts5ConfigSkipLiteral(p);
|
|
p = fts5ConfigSkipWhitespace(p);
|
|
if( p==0 || *p==')' ) break;
|
|
if( *p!=',' ){
|
|
p = 0;
|
|
break;
|
|
}
|
|
p++;
|
|
}
|
|
|
|
return p;
|
|
}
|
|
|
|
/*
|
|
** Parameter zIn contains a rank() function specification. The format of
|
|
** this is:
|
|
**
|
|
** + Bareword (function name)
|
|
** + Open parenthesis - "("
|
|
** + Zero or more SQL literals in a comma separated list
|
|
** + Close parenthesis - ")"
|
|
*/
|
|
int sqlite3Fts5ConfigParseRank(
|
|
const char *zIn, /* Input string */
|
|
char **pzRank, /* OUT: Rank function name */
|
|
char **pzRankArgs /* OUT: Rank function arguments */
|
|
){
|
|
const char *p = zIn;
|
|
const char *pRank;
|
|
char *zRank = 0;
|
|
char *zRankArgs = 0;
|
|
int rc = SQLITE_OK;
|
|
|
|
*pzRank = 0;
|
|
*pzRankArgs = 0;
|
|
|
|
if( p==0 ){
|
|
rc = SQLITE_ERROR;
|
|
}else{
|
|
p = fts5ConfigSkipWhitespace(p);
|
|
pRank = p;
|
|
p = fts5ConfigSkipBareword(p);
|
|
|
|
if( p ){
|
|
zRank = sqlite3Fts5MallocZero(&rc, 1 + p - pRank);
|
|
if( zRank ) memcpy(zRank, pRank, p-pRank);
|
|
}else{
|
|
rc = SQLITE_ERROR;
|
|
}
|
|
|
|
if( rc==SQLITE_OK ){
|
|
p = fts5ConfigSkipWhitespace(p);
|
|
if( *p!='(' ) rc = SQLITE_ERROR;
|
|
p++;
|
|
}
|
|
if( rc==SQLITE_OK ){
|
|
const char *pArgs;
|
|
p = fts5ConfigSkipWhitespace(p);
|
|
pArgs = p;
|
|
if( *p!=')' ){
|
|
p = fts5ConfigSkipArgs(p);
|
|
if( p==0 ){
|
|
rc = SQLITE_ERROR;
|
|
}else{
|
|
zRankArgs = sqlite3Fts5MallocZero(&rc, 1 + p - pArgs);
|
|
if( zRankArgs ) memcpy(zRankArgs, pArgs, p-pArgs);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if( rc!=SQLITE_OK ){
|
|
sqlite3_free(zRank);
|
|
assert( zRankArgs==0 );
|
|
}else{
|
|
*pzRank = zRank;
|
|
*pzRankArgs = zRankArgs;
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
int sqlite3Fts5ConfigSetValue(
|
|
Fts5Config *pConfig,
|
|
const char *zKey,
|
|
sqlite3_value *pVal,
|
|
int *pbBadkey
|
|
){
|
|
int rc = SQLITE_OK;
|
|
|
|
if( 0==sqlite3_stricmp(zKey, "pgsz") ){
|
|
int pgsz = 0;
|
|
if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
|
|
pgsz = sqlite3_value_int(pVal);
|
|
}
|
|
if( pgsz<32 || pgsz>FTS5_MAX_PAGE_SIZE ){
|
|
*pbBadkey = 1;
|
|
}else{
|
|
pConfig->pgsz = pgsz;
|
|
}
|
|
}
|
|
|
|
else if( 0==sqlite3_stricmp(zKey, "hashsize") ){
|
|
int nHashSize = -1;
|
|
if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
|
|
nHashSize = sqlite3_value_int(pVal);
|
|
}
|
|
if( nHashSize<=0 ){
|
|
*pbBadkey = 1;
|
|
}else{
|
|
pConfig->nHashSize = nHashSize;
|
|
}
|
|
}
|
|
|
|
else if( 0==sqlite3_stricmp(zKey, "automerge") ){
|
|
int nAutomerge = -1;
|
|
if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
|
|
nAutomerge = sqlite3_value_int(pVal);
|
|
}
|
|
if( nAutomerge<0 || nAutomerge>64 ){
|
|
*pbBadkey = 1;
|
|
}else{
|
|
if( nAutomerge==1 ) nAutomerge = FTS5_DEFAULT_AUTOMERGE;
|
|
pConfig->nAutomerge = nAutomerge;
|
|
}
|
|
}
|
|
|
|
else if( 0==sqlite3_stricmp(zKey, "usermerge") ){
|
|
int nUsermerge = -1;
|
|
if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
|
|
nUsermerge = sqlite3_value_int(pVal);
|
|
}
|
|
if( nUsermerge<2 || nUsermerge>16 ){
|
|
*pbBadkey = 1;
|
|
}else{
|
|
pConfig->nUsermerge = nUsermerge;
|
|
}
|
|
}
|
|
|
|
else if( 0==sqlite3_stricmp(zKey, "crisismerge") ){
|
|
int nCrisisMerge = -1;
|
|
if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
|
|
nCrisisMerge = sqlite3_value_int(pVal);
|
|
}
|
|
if( nCrisisMerge<0 ){
|
|
*pbBadkey = 1;
|
|
}else{
|
|
if( nCrisisMerge<=1 ) nCrisisMerge = FTS5_DEFAULT_CRISISMERGE;
|
|
if( nCrisisMerge>=FTS5_MAX_SEGMENT ) nCrisisMerge = FTS5_MAX_SEGMENT-1;
|
|
pConfig->nCrisisMerge = nCrisisMerge;
|
|
}
|
|
}
|
|
|
|
else if( 0==sqlite3_stricmp(zKey, "deletemerge") ){
|
|
int nVal = -1;
|
|
if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
|
|
nVal = sqlite3_value_int(pVal);
|
|
}else{
|
|
*pbBadkey = 1;
|
|
}
|
|
if( nVal<0 ) nVal = FTS5_DEFAULT_DELETE_AUTOMERGE;
|
|
if( nVal>100 ) nVal = 0;
|
|
pConfig->nDeleteMerge = nVal;
|
|
}
|
|
|
|
else if( 0==sqlite3_stricmp(zKey, "rank") ){
|
|
const char *zIn = (const char*)sqlite3_value_text(pVal);
|
|
char *zRank;
|
|
char *zRankArgs;
|
|
rc = sqlite3Fts5ConfigParseRank(zIn, &zRank, &zRankArgs);
|
|
if( rc==SQLITE_OK ){
|
|
sqlite3_free(pConfig->zRank);
|
|
sqlite3_free(pConfig->zRankArgs);
|
|
pConfig->zRank = zRank;
|
|
pConfig->zRankArgs = zRankArgs;
|
|
}else if( rc==SQLITE_ERROR ){
|
|
rc = SQLITE_OK;
|
|
*pbBadkey = 1;
|
|
}
|
|
}
|
|
|
|
else if( 0==sqlite3_stricmp(zKey, "secure-delete") ){
|
|
int bVal = -1;
|
|
if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
|
|
bVal = sqlite3_value_int(pVal);
|
|
}
|
|
if( bVal<0 ){
|
|
*pbBadkey = 1;
|
|
}else{
|
|
pConfig->bSecureDelete = (bVal ? 1 : 0);
|
|
}
|
|
}else{
|
|
*pbBadkey = 1;
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Load the contents of the %_config table into memory.
|
|
*/
|
|
int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){
|
|
const char *zSelect = "SELECT k, v FROM %Q.'%q_config'";
|
|
char *zSql;
|
|
sqlite3_stmt *p = 0;
|
|
int rc = SQLITE_OK;
|
|
int iVersion = 0;
|
|
|
|
/* Set default values */
|
|
pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE;
|
|
pConfig->nAutomerge = FTS5_DEFAULT_AUTOMERGE;
|
|
pConfig->nUsermerge = FTS5_DEFAULT_USERMERGE;
|
|
pConfig->nCrisisMerge = FTS5_DEFAULT_CRISISMERGE;
|
|
pConfig->nHashSize = FTS5_DEFAULT_HASHSIZE;
|
|
pConfig->nDeleteMerge = FTS5_DEFAULT_DELETE_AUTOMERGE;
|
|
|
|
zSql = sqlite3Fts5Mprintf(&rc, zSelect, pConfig->zDb, pConfig->zName);
|
|
if( zSql ){
|
|
rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p, 0);
|
|
sqlite3_free(zSql);
|
|
}
|
|
|
|
assert( rc==SQLITE_OK || p==0 );
|
|
if( rc==SQLITE_OK ){
|
|
while( SQLITE_ROW==sqlite3_step(p) ){
|
|
const char *zK = (const char*)sqlite3_column_text(p, 0);
|
|
sqlite3_value *pVal = sqlite3_column_value(p, 1);
|
|
if( 0==sqlite3_stricmp(zK, "version") ){
|
|
iVersion = sqlite3_value_int(pVal);
|
|
}else{
|
|
int bDummy = 0;
|
|
sqlite3Fts5ConfigSetValue(pConfig, zK, pVal, &bDummy);
|
|
}
|
|
}
|
|
rc = sqlite3_finalize(p);
|
|
}
|
|
|
|
if( rc==SQLITE_OK
|
|
&& iVersion!=FTS5_CURRENT_VERSION
|
|
&& iVersion!=FTS5_CURRENT_VERSION_SECUREDELETE
|
|
){
|
|
rc = SQLITE_ERROR;
|
|
sqlite3Fts5ConfigErrmsg(pConfig, "invalid fts5 file format "
|
|
"(found %d, expected %d or %d) - run 'rebuild'",
|
|
iVersion, FTS5_CURRENT_VERSION, FTS5_CURRENT_VERSION_SECUREDELETE
|
|
);
|
|
}else{
|
|
pConfig->iVersion = iVersion;
|
|
}
|
|
|
|
if( rc==SQLITE_OK ){
|
|
pConfig->iCookie = iCookie;
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Set (*pConfig->pzErrmsg) to point to an sqlite3_malloc()ed buffer
|
|
** containing the error message created using printf() style formatting
|
|
** string zFmt and its trailing arguments.
|
|
*/
|
|
void sqlite3Fts5ConfigErrmsg(Fts5Config *pConfig, const char *zFmt, ...){
|
|
va_list ap; /* ... printf arguments */
|
|
char *zMsg = 0;
|
|
|
|
va_start(ap, zFmt);
|
|
zMsg = sqlite3_vmprintf(zFmt, ap);
|
|
if( pConfig->pzErrmsg ){
|
|
assert( *pConfig->pzErrmsg==0 );
|
|
*pConfig->pzErrmsg = zMsg;
|
|
}else{
|
|
sqlite3_free(zMsg);
|
|
}
|
|
|
|
va_end(ap);
|
|
}
|
|
|
|
|