xref: /sqlite-3.40.0/ext/fts5/fts5_varint.c (revision 0ad63e5e)
1 /*
2 ** 2015 May 30
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** Routines for varint serialization and deserialization.
14 */
15 
16 
17 #include "fts5Int.h"
18 
19 /*
20 ** This is a copy of the sqlite3GetVarint32() routine from the SQLite core.
21 ** Except, this version does handle the single byte case that the core
22 ** version depends on being handled before its function is called.
23 */
sqlite3Fts5GetVarint32(const unsigned char * p,u32 * v)24 int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){
25   u32 a,b;
26 
27   /* The 1-byte case. Overwhelmingly the most common. */
28   a = *p;
29   /* a: p0 (unmasked) */
30   if (!(a&0x80))
31   {
32     /* Values between 0 and 127 */
33     *v = a;
34     return 1;
35   }
36 
37   /* The 2-byte case */
38   p++;
39   b = *p;
40   /* b: p1 (unmasked) */
41   if (!(b&0x80))
42   {
43     /* Values between 128 and 16383 */
44     a &= 0x7f;
45     a = a<<7;
46     *v = a | b;
47     return 2;
48   }
49 
50   /* The 3-byte case */
51   p++;
52   a = a<<14;
53   a |= *p;
54   /* a: p0<<14 | p2 (unmasked) */
55   if (!(a&0x80))
56   {
57     /* Values between 16384 and 2097151 */
58     a &= (0x7f<<14)|(0x7f);
59     b &= 0x7f;
60     b = b<<7;
61     *v = a | b;
62     return 3;
63   }
64 
65   /* A 32-bit varint is used to store size information in btrees.
66   ** Objects are rarely larger than 2MiB limit of a 3-byte varint.
67   ** A 3-byte varint is sufficient, for example, to record the size
68   ** of a 1048569-byte BLOB or string.
69   **
70   ** We only unroll the first 1-, 2-, and 3- byte cases.  The very
71   ** rare larger cases can be handled by the slower 64-bit varint
72   ** routine.
73   */
74   {
75     u64 v64;
76     u8 n;
77     p -= 2;
78     n = sqlite3Fts5GetVarint(p, &v64);
79     *v = ((u32)v64) & 0x7FFFFFFF;
80     assert( n>3 && n<=9 );
81     return n;
82   }
83 }
84 
85 
86 /*
87 ** Bitmasks used by sqlite3GetVarint().  These precomputed constants
88 ** are defined here rather than simply putting the constant expressions
89 ** inline in order to work around bugs in the RVT compiler.
90 **
91 ** SLOT_2_0     A mask for  (0x7f<<14) | 0x7f
92 **
93 ** SLOT_4_2_0   A mask for  (0x7f<<28) | SLOT_2_0
94 */
95 #define SLOT_2_0     0x001fc07f
96 #define SLOT_4_2_0   0xf01fc07f
97 
98 /*
99 ** Read a 64-bit variable-length integer from memory starting at p[0].
100 ** Return the number of bytes read.  The value is stored in *v.
101 */
sqlite3Fts5GetVarint(const unsigned char * p,u64 * v)102 u8 sqlite3Fts5GetVarint(const unsigned char *p, u64 *v){
103   u32 a,b,s;
104 
105   a = *p;
106   /* a: p0 (unmasked) */
107   if (!(a&0x80))
108   {
109     *v = a;
110     return 1;
111   }
112 
113   p++;
114   b = *p;
115   /* b: p1 (unmasked) */
116   if (!(b&0x80))
117   {
118     a &= 0x7f;
119     a = a<<7;
120     a |= b;
121     *v = a;
122     return 2;
123   }
124 
125   /* Verify that constants are precomputed correctly */
126   assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) );
127   assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) );
128 
129   p++;
130   a = a<<14;
131   a |= *p;
132   /* a: p0<<14 | p2 (unmasked) */
133   if (!(a&0x80))
134   {
135     a &= SLOT_2_0;
136     b &= 0x7f;
137     b = b<<7;
138     a |= b;
139     *v = a;
140     return 3;
141   }
142 
143   /* CSE1 from below */
144   a &= SLOT_2_0;
145   p++;
146   b = b<<14;
147   b |= *p;
148   /* b: p1<<14 | p3 (unmasked) */
149   if (!(b&0x80))
150   {
151     b &= SLOT_2_0;
152     /* moved CSE1 up */
153     /* a &= (0x7f<<14)|(0x7f); */
154     a = a<<7;
155     a |= b;
156     *v = a;
157     return 4;
158   }
159 
160   /* a: p0<<14 | p2 (masked) */
161   /* b: p1<<14 | p3 (unmasked) */
162   /* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
163   /* moved CSE1 up */
164   /* a &= (0x7f<<14)|(0x7f); */
165   b &= SLOT_2_0;
166   s = a;
167   /* s: p0<<14 | p2 (masked) */
168 
169   p++;
170   a = a<<14;
171   a |= *p;
172   /* a: p0<<28 | p2<<14 | p4 (unmasked) */
173   if (!(a&0x80))
174   {
175     /* we can skip these cause they were (effectively) done above in calc'ing s */
176     /* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
177     /* b &= (0x7f<<14)|(0x7f); */
178     b = b<<7;
179     a |= b;
180     s = s>>18;
181     *v = ((u64)s)<<32 | a;
182     return 5;
183   }
184 
185   /* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
186   s = s<<7;
187   s |= b;
188   /* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
189 
190   p++;
191   b = b<<14;
192   b |= *p;
193   /* b: p1<<28 | p3<<14 | p5 (unmasked) */
194   if (!(b&0x80))
195   {
196     /* we can skip this cause it was (effectively) done above in calc'ing s */
197     /* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
198     a &= SLOT_2_0;
199     a = a<<7;
200     a |= b;
201     s = s>>18;
202     *v = ((u64)s)<<32 | a;
203     return 6;
204   }
205 
206   p++;
207   a = a<<14;
208   a |= *p;
209   /* a: p2<<28 | p4<<14 | p6 (unmasked) */
210   if (!(a&0x80))
211   {
212     a &= SLOT_4_2_0;
213     b &= SLOT_2_0;
214     b = b<<7;
215     a |= b;
216     s = s>>11;
217     *v = ((u64)s)<<32 | a;
218     return 7;
219   }
220 
221   /* CSE2 from below */
222   a &= SLOT_2_0;
223   p++;
224   b = b<<14;
225   b |= *p;
226   /* b: p3<<28 | p5<<14 | p7 (unmasked) */
227   if (!(b&0x80))
228   {
229     b &= SLOT_4_2_0;
230     /* moved CSE2 up */
231     /* a &= (0x7f<<14)|(0x7f); */
232     a = a<<7;
233     a |= b;
234     s = s>>4;
235     *v = ((u64)s)<<32 | a;
236     return 8;
237   }
238 
239   p++;
240   a = a<<15;
241   a |= *p;
242   /* a: p4<<29 | p6<<15 | p8 (unmasked) */
243 
244   /* moved CSE2 up */
245   /* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */
246   b &= SLOT_2_0;
247   b = b<<8;
248   a |= b;
249 
250   s = s<<4;
251   b = p[-4];
252   b &= 0x7f;
253   b = b>>3;
254   s |= b;
255 
256   *v = ((u64)s)<<32 | a;
257 
258   return 9;
259 }
260 
261 /*
262 ** The variable-length integer encoding is as follows:
263 **
264 ** KEY:
265 **         A = 0xxxxxxx    7 bits of data and one flag bit
266 **         B = 1xxxxxxx    7 bits of data and one flag bit
267 **         C = xxxxxxxx    8 bits of data
268 **
269 **  7 bits - A
270 ** 14 bits - BA
271 ** 21 bits - BBA
272 ** 28 bits - BBBA
273 ** 35 bits - BBBBA
274 ** 42 bits - BBBBBA
275 ** 49 bits - BBBBBBA
276 ** 56 bits - BBBBBBBA
277 ** 64 bits - BBBBBBBBC
278 */
279 
280 #ifdef SQLITE_NOINLINE
281 # define FTS5_NOINLINE SQLITE_NOINLINE
282 #else
283 # define FTS5_NOINLINE
284 #endif
285 
286 /*
287 ** Write a 64-bit variable-length integer to memory starting at p[0].
288 ** The length of data write will be between 1 and 9 bytes.  The number
289 ** of bytes written is returned.
290 **
291 ** A variable-length integer consists of the lower 7 bits of each byte
292 ** for all bytes that have the 8th bit set and one byte with the 8th
293 ** bit clear.  Except, if we get to the 9th byte, it stores the full
294 ** 8 bits and is the last byte.
295 */
fts5PutVarint64(unsigned char * p,u64 v)296 static int FTS5_NOINLINE fts5PutVarint64(unsigned char *p, u64 v){
297   int i, j, n;
298   u8 buf[10];
299   if( v & (((u64)0xff000000)<<32) ){
300     p[8] = (u8)v;
301     v >>= 8;
302     for(i=7; i>=0; i--){
303       p[i] = (u8)((v & 0x7f) | 0x80);
304       v >>= 7;
305     }
306     return 9;
307   }
308   n = 0;
309   do{
310     buf[n++] = (u8)((v & 0x7f) | 0x80);
311     v >>= 7;
312   }while( v!=0 );
313   buf[0] &= 0x7f;
314   assert( n<=9 );
315   for(i=0, j=n-1; j>=0; j--, i++){
316     p[i] = buf[j];
317   }
318   return n;
319 }
320 
sqlite3Fts5PutVarint(unsigned char * p,u64 v)321 int sqlite3Fts5PutVarint(unsigned char *p, u64 v){
322   if( v<=0x7f ){
323     p[0] = v&0x7f;
324     return 1;
325   }
326   if( v<=0x3fff ){
327     p[0] = ((v>>7)&0x7f)|0x80;
328     p[1] = v&0x7f;
329     return 2;
330   }
331   return fts5PutVarint64(p,v);
332 }
333 
334 
sqlite3Fts5GetVarintLen(u32 iVal)335 int sqlite3Fts5GetVarintLen(u32 iVal){
336 #if 0
337   if( iVal<(1 << 7 ) ) return 1;
338 #endif
339   assert( iVal>=(1 << 7) );
340   if( iVal<(1 << 14) ) return 2;
341   if( iVal<(1 << 21) ) return 3;
342   if( iVal<(1 << 28) ) return 4;
343   return 5;
344 }
345