mirror of
https://github.com/sqlite/sqlite.git
synced 2024-11-25 08:59:33 +01:00
0ad63e5e0c
FossilOrigin-Name: 9bf811ea89a133d481a86407683a7518e909eccb7aa3aa02f7932bdb33387572
345 lines
6.9 KiB
C
345 lines
6.9 KiB
C
/*
|
|
** 2015 May 30
|
|
**
|
|
** The author disclaims copyright to this source code. In place of
|
|
** a legal notice, here is a blessing:
|
|
**
|
|
** May you do good and not evil.
|
|
** May you find forgiveness for yourself and forgive others.
|
|
** May you share freely, never taking more than you give.
|
|
**
|
|
******************************************************************************
|
|
**
|
|
** Routines for varint serialization and deserialization.
|
|
*/
|
|
|
|
|
|
#include "fts5Int.h"
|
|
|
|
/*
|
|
** This is a copy of the sqlite3GetVarint32() routine from the SQLite core.
|
|
** Except, this version does handle the single byte case that the core
|
|
** version depends on being handled before its function is called.
|
|
*/
|
|
int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){
|
|
u32 a,b;
|
|
|
|
/* The 1-byte case. Overwhelmingly the most common. */
|
|
a = *p;
|
|
/* a: p0 (unmasked) */
|
|
if (!(a&0x80))
|
|
{
|
|
/* Values between 0 and 127 */
|
|
*v = a;
|
|
return 1;
|
|
}
|
|
|
|
/* The 2-byte case */
|
|
p++;
|
|
b = *p;
|
|
/* b: p1 (unmasked) */
|
|
if (!(b&0x80))
|
|
{
|
|
/* Values between 128 and 16383 */
|
|
a &= 0x7f;
|
|
a = a<<7;
|
|
*v = a | b;
|
|
return 2;
|
|
}
|
|
|
|
/* The 3-byte case */
|
|
p++;
|
|
a = a<<14;
|
|
a |= *p;
|
|
/* a: p0<<14 | p2 (unmasked) */
|
|
if (!(a&0x80))
|
|
{
|
|
/* Values between 16384 and 2097151 */
|
|
a &= (0x7f<<14)|(0x7f);
|
|
b &= 0x7f;
|
|
b = b<<7;
|
|
*v = a | b;
|
|
return 3;
|
|
}
|
|
|
|
/* A 32-bit varint is used to store size information in btrees.
|
|
** Objects are rarely larger than 2MiB limit of a 3-byte varint.
|
|
** A 3-byte varint is sufficient, for example, to record the size
|
|
** of a 1048569-byte BLOB or string.
|
|
**
|
|
** We only unroll the first 1-, 2-, and 3- byte cases. The very
|
|
** rare larger cases can be handled by the slower 64-bit varint
|
|
** routine.
|
|
*/
|
|
{
|
|
u64 v64;
|
|
u8 n;
|
|
p -= 2;
|
|
n = sqlite3Fts5GetVarint(p, &v64);
|
|
*v = ((u32)v64) & 0x7FFFFFFF;
|
|
assert( n>3 && n<=9 );
|
|
return n;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
** Bitmasks used by sqlite3GetVarint(). These precomputed constants
|
|
** are defined here rather than simply putting the constant expressions
|
|
** inline in order to work around bugs in the RVT compiler.
|
|
**
|
|
** SLOT_2_0 A mask for (0x7f<<14) | 0x7f
|
|
**
|
|
** SLOT_4_2_0 A mask for (0x7f<<28) | SLOT_2_0
|
|
*/
|
|
#define SLOT_2_0 0x001fc07f
|
|
#define SLOT_4_2_0 0xf01fc07f
|
|
|
|
/*
|
|
** Read a 64-bit variable-length integer from memory starting at p[0].
|
|
** Return the number of bytes read. The value is stored in *v.
|
|
*/
|
|
u8 sqlite3Fts5GetVarint(const unsigned char *p, u64 *v){
|
|
u32 a,b,s;
|
|
|
|
a = *p;
|
|
/* a: p0 (unmasked) */
|
|
if (!(a&0x80))
|
|
{
|
|
*v = a;
|
|
return 1;
|
|
}
|
|
|
|
p++;
|
|
b = *p;
|
|
/* b: p1 (unmasked) */
|
|
if (!(b&0x80))
|
|
{
|
|
a &= 0x7f;
|
|
a = a<<7;
|
|
a |= b;
|
|
*v = a;
|
|
return 2;
|
|
}
|
|
|
|
/* Verify that constants are precomputed correctly */
|
|
assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) );
|
|
assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) );
|
|
|
|
p++;
|
|
a = a<<14;
|
|
a |= *p;
|
|
/* a: p0<<14 | p2 (unmasked) */
|
|
if (!(a&0x80))
|
|
{
|
|
a &= SLOT_2_0;
|
|
b &= 0x7f;
|
|
b = b<<7;
|
|
a |= b;
|
|
*v = a;
|
|
return 3;
|
|
}
|
|
|
|
/* CSE1 from below */
|
|
a &= SLOT_2_0;
|
|
p++;
|
|
b = b<<14;
|
|
b |= *p;
|
|
/* b: p1<<14 | p3 (unmasked) */
|
|
if (!(b&0x80))
|
|
{
|
|
b &= SLOT_2_0;
|
|
/* moved CSE1 up */
|
|
/* a &= (0x7f<<14)|(0x7f); */
|
|
a = a<<7;
|
|
a |= b;
|
|
*v = a;
|
|
return 4;
|
|
}
|
|
|
|
/* a: p0<<14 | p2 (masked) */
|
|
/* b: p1<<14 | p3 (unmasked) */
|
|
/* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
|
|
/* moved CSE1 up */
|
|
/* a &= (0x7f<<14)|(0x7f); */
|
|
b &= SLOT_2_0;
|
|
s = a;
|
|
/* s: p0<<14 | p2 (masked) */
|
|
|
|
p++;
|
|
a = a<<14;
|
|
a |= *p;
|
|
/* a: p0<<28 | p2<<14 | p4 (unmasked) */
|
|
if (!(a&0x80))
|
|
{
|
|
/* we can skip these cause they were (effectively) done above in calc'ing s */
|
|
/* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
|
|
/* b &= (0x7f<<14)|(0x7f); */
|
|
b = b<<7;
|
|
a |= b;
|
|
s = s>>18;
|
|
*v = ((u64)s)<<32 | a;
|
|
return 5;
|
|
}
|
|
|
|
/* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
|
|
s = s<<7;
|
|
s |= b;
|
|
/* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
|
|
|
|
p++;
|
|
b = b<<14;
|
|
b |= *p;
|
|
/* b: p1<<28 | p3<<14 | p5 (unmasked) */
|
|
if (!(b&0x80))
|
|
{
|
|
/* we can skip this cause it was (effectively) done above in calc'ing s */
|
|
/* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
|
|
a &= SLOT_2_0;
|
|
a = a<<7;
|
|
a |= b;
|
|
s = s>>18;
|
|
*v = ((u64)s)<<32 | a;
|
|
return 6;
|
|
}
|
|
|
|
p++;
|
|
a = a<<14;
|
|
a |= *p;
|
|
/* a: p2<<28 | p4<<14 | p6 (unmasked) */
|
|
if (!(a&0x80))
|
|
{
|
|
a &= SLOT_4_2_0;
|
|
b &= SLOT_2_0;
|
|
b = b<<7;
|
|
a |= b;
|
|
s = s>>11;
|
|
*v = ((u64)s)<<32 | a;
|
|
return 7;
|
|
}
|
|
|
|
/* CSE2 from below */
|
|
a &= SLOT_2_0;
|
|
p++;
|
|
b = b<<14;
|
|
b |= *p;
|
|
/* b: p3<<28 | p5<<14 | p7 (unmasked) */
|
|
if (!(b&0x80))
|
|
{
|
|
b &= SLOT_4_2_0;
|
|
/* moved CSE2 up */
|
|
/* a &= (0x7f<<14)|(0x7f); */
|
|
a = a<<7;
|
|
a |= b;
|
|
s = s>>4;
|
|
*v = ((u64)s)<<32 | a;
|
|
return 8;
|
|
}
|
|
|
|
p++;
|
|
a = a<<15;
|
|
a |= *p;
|
|
/* a: p4<<29 | p6<<15 | p8 (unmasked) */
|
|
|
|
/* moved CSE2 up */
|
|
/* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */
|
|
b &= SLOT_2_0;
|
|
b = b<<8;
|
|
a |= b;
|
|
|
|
s = s<<4;
|
|
b = p[-4];
|
|
b &= 0x7f;
|
|
b = b>>3;
|
|
s |= b;
|
|
|
|
*v = ((u64)s)<<32 | a;
|
|
|
|
return 9;
|
|
}
|
|
|
|
/*
|
|
** The variable-length integer encoding is as follows:
|
|
**
|
|
** KEY:
|
|
** A = 0xxxxxxx 7 bits of data and one flag bit
|
|
** B = 1xxxxxxx 7 bits of data and one flag bit
|
|
** C = xxxxxxxx 8 bits of data
|
|
**
|
|
** 7 bits - A
|
|
** 14 bits - BA
|
|
** 21 bits - BBA
|
|
** 28 bits - BBBA
|
|
** 35 bits - BBBBA
|
|
** 42 bits - BBBBBA
|
|
** 49 bits - BBBBBBA
|
|
** 56 bits - BBBBBBBA
|
|
** 64 bits - BBBBBBBBC
|
|
*/
|
|
|
|
#ifdef SQLITE_NOINLINE
|
|
# define FTS5_NOINLINE SQLITE_NOINLINE
|
|
#else
|
|
# define FTS5_NOINLINE
|
|
#endif
|
|
|
|
/*
|
|
** Write a 64-bit variable-length integer to memory starting at p[0].
|
|
** The length of data write will be between 1 and 9 bytes. The number
|
|
** of bytes written is returned.
|
|
**
|
|
** A variable-length integer consists of the lower 7 bits of each byte
|
|
** for all bytes that have the 8th bit set and one byte with the 8th
|
|
** bit clear. Except, if we get to the 9th byte, it stores the full
|
|
** 8 bits and is the last byte.
|
|
*/
|
|
static int FTS5_NOINLINE fts5PutVarint64(unsigned char *p, u64 v){
|
|
int i, j, n;
|
|
u8 buf[10];
|
|
if( v & (((u64)0xff000000)<<32) ){
|
|
p[8] = (u8)v;
|
|
v >>= 8;
|
|
for(i=7; i>=0; i--){
|
|
p[i] = (u8)((v & 0x7f) | 0x80);
|
|
v >>= 7;
|
|
}
|
|
return 9;
|
|
}
|
|
n = 0;
|
|
do{
|
|
buf[n++] = (u8)((v & 0x7f) | 0x80);
|
|
v >>= 7;
|
|
}while( v!=0 );
|
|
buf[0] &= 0x7f;
|
|
assert( n<=9 );
|
|
for(i=0, j=n-1; j>=0; j--, i++){
|
|
p[i] = buf[j];
|
|
}
|
|
return n;
|
|
}
|
|
|
|
int sqlite3Fts5PutVarint(unsigned char *p, u64 v){
|
|
if( v<=0x7f ){
|
|
p[0] = v&0x7f;
|
|
return 1;
|
|
}
|
|
if( v<=0x3fff ){
|
|
p[0] = ((v>>7)&0x7f)|0x80;
|
|
p[1] = v&0x7f;
|
|
return 2;
|
|
}
|
|
return fts5PutVarint64(p,v);
|
|
}
|
|
|
|
|
|
int sqlite3Fts5GetVarintLen(u32 iVal){
|
|
#if 0
|
|
if( iVal<(1 << 7 ) ) return 1;
|
|
#endif
|
|
assert( iVal>=(1 << 7) );
|
|
if( iVal<(1 << 14) ) return 2;
|
|
if( iVal<(1 << 21) ) return 3;
|
|
if( iVal<(1 << 28) ) return 4;
|
|
return 5;
|
|
}
|