xref: /sqlite-3.40.0/ext/misc/fossildelta.c (revision 2e22579d)
1 /*
2 ** 2019-02-19
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** This SQLite extension implements the delta functions used by the RBU
14 ** extension. Three scalar functions and one table-valued function are
15 ** implemented here:
16 **
17 **   delta_apply(X,D)     -- apply delta D to file X and return the result
18 **   delta_create(X,Y)    -- compute and return a delta that carries X into Y
19 **   delta_output_size(D) -- blob size in bytes output from applying delta D
20 **   delta_parse(D)       -- returns rows describing delta D
21 **
22 ** The delta format is the Fossil delta format, described in a comment
23 ** on the delete_create() function implementation below, and also at
24 **
25 **    https://www.fossil-scm.org/fossil/doc/trunk/www/delta_format.wiki
26 **
27 ** This delta format is used by the RBU extension, which is the main
28 ** reason that these routines are included in the extension library.
29 ** RBU does not use this extension directly.  Rather, this extension is
30 ** provided as a convenience to developers who want to analyze RBU files
31 ** that contain deltas.
32 */
33 #include <string.h>
34 #include <assert.h>
35 #include <stdlib.h>
36 #include "sqlite3ext.h"
37 SQLITE_EXTENSION_INIT1
38 
39 #ifndef SQLITE_AMALGAMATION
40 /*
41 ** The "u32" type must be an unsigned 32-bit integer.  Adjust this
42 */
43 typedef unsigned int u32;
44 
45 /*
46 ** Must be a 16-bit value
47 */
48 typedef short int s16;
49 typedef unsigned short int u16;
50 
51 #endif /* SQLITE_AMALGAMATION */
52 
53 
54 /*
55 ** The width of a hash window in bytes.  The algorithm only works if this
56 ** is a power of 2.
57 */
58 #define NHASH 16
59 
60 /*
61 ** The current state of the rolling hash.
62 **
63 ** z[] holds the values that have been hashed.  z[] is a circular buffer.
64 ** z[i] is the first entry and z[(i+NHASH-1)%NHASH] is the last entry of
65 ** the window.
66 **
67 ** Hash.a is the sum of all elements of hash.z[].  Hash.b is a weighted
68 ** sum.  Hash.b is z[i]*NHASH + z[i+1]*(NHASH-1) + ... + z[i+NHASH-1]*1.
69 ** (Each index for z[] should be module NHASH, of course.  The %NHASH operator
70 ** is omitted in the prior expression for brevity.)
71 */
72 typedef struct hash hash;
73 struct hash {
74   u16 a, b;         /* Hash values */
75   u16 i;            /* Start of the hash window */
76   char z[NHASH];    /* The values that have been hashed */
77 };
78 
79 /*
80 ** Initialize the rolling hash using the first NHASH characters of z[]
81 */
hash_init(hash * pHash,const char * z)82 static void hash_init(hash *pHash, const char *z){
83   u16 a, b, i;
84   a = b = z[0];
85   for(i=1; i<NHASH; i++){
86     a += z[i];
87     b += a;
88   }
89   memcpy(pHash->z, z, NHASH);
90   pHash->a = a & 0xffff;
91   pHash->b = b & 0xffff;
92   pHash->i = 0;
93 }
94 
95 /*
96 ** Advance the rolling hash by a single character "c"
97 */
hash_next(hash * pHash,int c)98 static void hash_next(hash *pHash, int c){
99   u16 old = pHash->z[pHash->i];
100   pHash->z[pHash->i] = c;
101   pHash->i = (pHash->i+1)&(NHASH-1);
102   pHash->a = pHash->a - old + c;
103   pHash->b = pHash->b - NHASH*old + pHash->a;
104 }
105 
106 /*
107 ** Return a 32-bit hash value
108 */
hash_32bit(hash * pHash)109 static u32 hash_32bit(hash *pHash){
110   return (pHash->a & 0xffff) | (((u32)(pHash->b & 0xffff))<<16);
111 }
112 
113 /*
114 ** Compute a hash on NHASH bytes.
115 **
116 ** This routine is intended to be equivalent to:
117 **    hash h;
118 **    hash_init(&h, zInput);
119 **    return hash_32bit(&h);
120 */
hash_once(const char * z)121 static u32 hash_once(const char *z){
122   u16 a, b, i;
123   a = b = z[0];
124   for(i=1; i<NHASH; i++){
125     a += z[i];
126     b += a;
127   }
128   return a | (((u32)b)<<16);
129 }
130 
131 /*
132 ** Write an base-64 integer into the given buffer.
133 */
putInt(unsigned int v,char ** pz)134 static void putInt(unsigned int v, char **pz){
135   static const char zDigits[] =
136     "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~";
137   /*  123456789 123456789 123456789 123456789 123456789 123456789 123 */
138   int i, j;
139   char zBuf[20];
140   if( v==0 ){
141     *(*pz)++ = '0';
142     return;
143   }
144   for(i=0; v>0; i++, v>>=6){
145     zBuf[i] = zDigits[v&0x3f];
146   }
147   for(j=i-1; j>=0; j--){
148     *(*pz)++ = zBuf[j];
149   }
150 }
151 
152 /*
153 ** Read bytes from *pz and convert them into a positive integer.  When
154 ** finished, leave *pz pointing to the first character past the end of
155 ** the integer.  The *pLen parameter holds the length of the string
156 ** in *pz and is decremented once for each character in the integer.
157 */
deltaGetInt(const char ** pz,int * pLen)158 static unsigned int deltaGetInt(const char **pz, int *pLen){
159   static const signed char zValue[] = {
160     -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1,
161     -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1,
162     -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1,
163      0,  1,  2,  3,  4,  5,  6,  7,    8,  9, -1, -1, -1, -1, -1, -1,
164     -1, 10, 11, 12, 13, 14, 15, 16,   17, 18, 19, 20, 21, 22, 23, 24,
165     25, 26, 27, 28, 29, 30, 31, 32,   33, 34, 35, -1, -1, -1, -1, 36,
166     -1, 37, 38, 39, 40, 41, 42, 43,   44, 45, 46, 47, 48, 49, 50, 51,
167     52, 53, 54, 55, 56, 57, 58, 59,   60, 61, 62, -1, -1, -1, 63, -1,
168   };
169   unsigned int v = 0;
170   int c;
171   unsigned char *z = (unsigned char*)*pz;
172   unsigned char *zStart = z;
173   while( (c = zValue[0x7f&*(z++)])>=0 ){
174      v = (v<<6) + c;
175   }
176   z--;
177   *pLen -= z - zStart;
178   *pz = (char*)z;
179   return v;
180 }
181 
182 /*
183 ** Return the number digits in the base-64 representation of a positive integer
184 */
digit_count(int v)185 static int digit_count(int v){
186   unsigned int i, x;
187   for(i=1, x=64; v>=x; i++, x <<= 6){}
188   return i;
189 }
190 
191 #ifdef __GNUC__
192 # define GCC_VERSION (__GNUC__*1000000+__GNUC_MINOR__*1000+__GNUC_PATCHLEVEL__)
193 #else
194 # define GCC_VERSION 0
195 #endif
196 
197 /*
198 ** Compute a 32-bit big-endian checksum on the N-byte buffer.  If the
199 ** buffer is not a multiple of 4 bytes length, compute the sum that would
200 ** have occurred if the buffer was padded with zeros to the next multiple
201 ** of four bytes.
202 */
checksum(const char * zIn,size_t N)203 static unsigned int checksum(const char *zIn, size_t N){
204   static const int byteOrderTest = 1;
205   const unsigned char *z = (const unsigned char *)zIn;
206   const unsigned char *zEnd = (const unsigned char*)&zIn[N&~3];
207   unsigned sum = 0;
208   assert( (z - (const unsigned char*)0)%4==0 );  /* Four-byte alignment */
209   if( 0==*(char*)&byteOrderTest ){
210     /* This is a big-endian machine */
211     while( z<zEnd ){
212       sum += *(unsigned*)z;
213       z += 4;
214     }
215   }else{
216     /* A little-endian machine */
217 #if GCC_VERSION>=4003000
218     while( z<zEnd ){
219       sum += __builtin_bswap32(*(unsigned*)z);
220       z += 4;
221     }
222 #elif defined(_MSC_VER) && _MSC_VER>=1300
223     while( z<zEnd ){
224       sum += _byteswap_ulong(*(unsigned*)z);
225       z += 4;
226     }
227 #else
228     unsigned sum0 = 0;
229     unsigned sum1 = 0;
230     unsigned sum2 = 0;
231     while(N >= 16){
232       sum0 += ((unsigned)z[0] + z[4] + z[8] + z[12]);
233       sum1 += ((unsigned)z[1] + z[5] + z[9] + z[13]);
234       sum2 += ((unsigned)z[2] + z[6] + z[10]+ z[14]);
235       sum  += ((unsigned)z[3] + z[7] + z[11]+ z[15]);
236       z += 16;
237       N -= 16;
238     }
239     while(N >= 4){
240       sum0 += z[0];
241       sum1 += z[1];
242       sum2 += z[2];
243       sum  += z[3];
244       z += 4;
245       N -= 4;
246     }
247     sum += (sum2 << 8) + (sum1 << 16) + (sum0 << 24);
248 #endif
249   }
250   switch(N&3){
251     case 3:   sum += (z[2] << 8);
252     case 2:   sum += (z[1] << 16);
253     case 1:   sum += (z[0] << 24);
254     default:  ;
255   }
256   return sum;
257 }
258 
259 /*
260 ** Create a new delta.
261 **
262 ** The delta is written into a preallocated buffer, zDelta, which
263 ** should be at least 60 bytes longer than the target file, zOut.
264 ** The delta string will be NUL-terminated, but it might also contain
265 ** embedded NUL characters if either the zSrc or zOut files are
266 ** binary.  This function returns the length of the delta string
267 ** in bytes, excluding the final NUL terminator character.
268 **
269 ** Output Format:
270 **
271 ** The delta begins with a base64 number followed by a newline.  This
272 ** number is the number of bytes in the TARGET file.  Thus, given a
273 ** delta file z, a program can compute the size of the output file
274 ** simply by reading the first line and decoding the base-64 number
275 ** found there.  The delta_output_size() routine does exactly this.
276 **
277 ** After the initial size number, the delta consists of a series of
278 ** literal text segments and commands to copy from the SOURCE file.
279 ** A copy command looks like this:
280 **
281 **     NNN@MMM,
282 **
283 ** where NNN is the number of bytes to be copied and MMM is the offset
284 ** into the source file of the first byte (both base-64).   If NNN is 0
285 ** it means copy the rest of the input file.  Literal text is like this:
286 **
287 **     NNN:TTTTT
288 **
289 ** where NNN is the number of bytes of text (base-64) and TTTTT is the text.
290 **
291 ** The last term is of the form
292 **
293 **     NNN;
294 **
295 ** In this case, NNN is a 32-bit bigendian checksum of the output file
296 ** that can be used to verify that the delta applied correctly.  All
297 ** numbers are in base-64.
298 **
299 ** Pure text files generate a pure text delta.  Binary files generate a
300 ** delta that may contain some binary data.
301 **
302 ** Algorithm:
303 **
304 ** The encoder first builds a hash table to help it find matching
305 ** patterns in the source file.  16-byte chunks of the source file
306 ** sampled at evenly spaced intervals are used to populate the hash
307 ** table.
308 **
309 ** Next we begin scanning the target file using a sliding 16-byte
310 ** window.  The hash of the 16-byte window in the target is used to
311 ** search for a matching section in the source file.  When a match
312 ** is found, a copy command is added to the delta.  An effort is
313 ** made to extend the matching section to regions that come before
314 ** and after the 16-byte hash window.  A copy command is only issued
315 ** if the result would use less space that just quoting the text
316 ** literally. Literal text is added to the delta for sections that
317 ** do not match or which can not be encoded efficiently using copy
318 ** commands.
319 */
delta_create(const char * zSrc,unsigned int lenSrc,const char * zOut,unsigned int lenOut,char * zDelta)320 static int delta_create(
321   const char *zSrc,      /* The source or pattern file */
322   unsigned int lenSrc,   /* Length of the source file */
323   const char *zOut,      /* The target file */
324   unsigned int lenOut,   /* Length of the target file */
325   char *zDelta           /* Write the delta into this buffer */
326 ){
327   int i, base;
328   char *zOrigDelta = zDelta;
329   hash h;
330   int nHash;                 /* Number of hash table entries */
331   int *landmark;             /* Primary hash table */
332   int *collide;              /* Collision chain */
333   int lastRead = -1;         /* Last byte of zSrc read by a COPY command */
334 
335   /* Add the target file size to the beginning of the delta
336   */
337   putInt(lenOut, &zDelta);
338   *(zDelta++) = '\n';
339 
340   /* If the source file is very small, it means that we have no
341   ** chance of ever doing a copy command.  Just output a single
342   ** literal segment for the entire target and exit.
343   */
344   if( lenSrc<=NHASH ){
345     putInt(lenOut, &zDelta);
346     *(zDelta++) = ':';
347     memcpy(zDelta, zOut, lenOut);
348     zDelta += lenOut;
349     putInt(checksum(zOut, lenOut), &zDelta);
350     *(zDelta++) = ';';
351     return zDelta - zOrigDelta;
352   }
353 
354   /* Compute the hash table used to locate matching sections in the
355   ** source file.
356   */
357   nHash = lenSrc/NHASH;
358   collide = sqlite3_malloc64( (sqlite3_int64)nHash*2*sizeof(int) );
359   memset(collide, -1, nHash*2*sizeof(int));
360   landmark = &collide[nHash];
361   for(i=0; i<lenSrc-NHASH; i+=NHASH){
362     int hv = hash_once(&zSrc[i]) % nHash;
363     collide[i/NHASH] = landmark[hv];
364     landmark[hv] = i/NHASH;
365   }
366 
367   /* Begin scanning the target file and generating copy commands and
368   ** literal sections of the delta.
369   */
370   base = 0;    /* We have already generated everything before zOut[base] */
371   while( base+NHASH<lenOut ){
372     int iSrc, iBlock;
373     unsigned int bestCnt, bestOfst=0, bestLitsz=0;
374     hash_init(&h, &zOut[base]);
375     i = 0;     /* Trying to match a landmark against zOut[base+i] */
376     bestCnt = 0;
377     while( 1 ){
378       int hv;
379       int limit = 250;
380 
381       hv = hash_32bit(&h) % nHash;
382       iBlock = landmark[hv];
383       while( iBlock>=0 && (limit--)>0 ){
384         /*
385         ** The hash window has identified a potential match against
386         ** landmark block iBlock.  But we need to investigate further.
387         **
388         ** Look for a region in zOut that matches zSrc. Anchor the search
389         ** at zSrc[iSrc] and zOut[base+i].  Do not include anything prior to
390         ** zOut[base] or after zOut[outLen] nor anything after zSrc[srcLen].
391         **
392         ** Set cnt equal to the length of the match and set ofst so that
393         ** zSrc[ofst] is the first element of the match.  litsz is the number
394         ** of characters between zOut[base] and the beginning of the match.
395         ** sz will be the overhead (in bytes) needed to encode the copy
396         ** command.  Only generate copy command if the overhead of the
397         ** copy command is less than the amount of literal text to be copied.
398         */
399         int cnt, ofst, litsz;
400         int j, k, x, y;
401         int sz;
402         int limitX;
403 
404         /* Beginning at iSrc, match forwards as far as we can.  j counts
405         ** the number of characters that match */
406         iSrc = iBlock*NHASH;
407         y = base+i;
408         limitX = ( lenSrc-iSrc <= lenOut-y ) ? lenSrc : iSrc + lenOut - y;
409         for(x=iSrc; x<limitX; x++, y++){
410           if( zSrc[x]!=zOut[y] ) break;
411         }
412         j = x - iSrc - 1;
413 
414         /* Beginning at iSrc-1, match backwards as far as we can.  k counts
415         ** the number of characters that match */
416         for(k=1; k<iSrc && k<=i; k++){
417           if( zSrc[iSrc-k]!=zOut[base+i-k] ) break;
418         }
419         k--;
420 
421         /* Compute the offset and size of the matching region */
422         ofst = iSrc-k;
423         cnt = j+k+1;
424         litsz = i-k;  /* Number of bytes of literal text before the copy */
425         /* sz will hold the number of bytes needed to encode the "insert"
426         ** command and the copy command, not counting the "insert" text */
427         sz = digit_count(i-k)+digit_count(cnt)+digit_count(ofst)+3;
428         if( cnt>=sz && cnt>bestCnt ){
429           /* Remember this match only if it is the best so far and it
430           ** does not increase the file size */
431           bestCnt = cnt;
432           bestOfst = iSrc-k;
433           bestLitsz = litsz;
434         }
435 
436         /* Check the next matching block */
437         iBlock = collide[iBlock];
438       }
439 
440       /* We have a copy command that does not cause the delta to be larger
441       ** than a literal insert.  So add the copy command to the delta.
442       */
443       if( bestCnt>0 ){
444         if( bestLitsz>0 ){
445           /* Add an insert command before the copy */
446           putInt(bestLitsz,&zDelta);
447           *(zDelta++) = ':';
448           memcpy(zDelta, &zOut[base], bestLitsz);
449           zDelta += bestLitsz;
450           base += bestLitsz;
451         }
452         base += bestCnt;
453         putInt(bestCnt, &zDelta);
454         *(zDelta++) = '@';
455         putInt(bestOfst, &zDelta);
456         *(zDelta++) = ',';
457         if( bestOfst + bestCnt -1 > lastRead ){
458           lastRead = bestOfst + bestCnt - 1;
459         }
460         bestCnt = 0;
461         break;
462       }
463 
464       /* If we reach this point, it means no match is found so far */
465       if( base+i+NHASH>=lenOut ){
466         /* We have reached the end of the file and have not found any
467         ** matches.  Do an "insert" for everything that does not match */
468         putInt(lenOut-base, &zDelta);
469         *(zDelta++) = ':';
470         memcpy(zDelta, &zOut[base], lenOut-base);
471         zDelta += lenOut-base;
472         base = lenOut;
473         break;
474       }
475 
476       /* Advance the hash by one character.  Keep looking for a match */
477       hash_next(&h, zOut[base+i+NHASH]);
478       i++;
479     }
480   }
481   /* Output a final "insert" record to get all the text at the end of
482   ** the file that does not match anything in the source file.
483   */
484   if( base<lenOut ){
485     putInt(lenOut-base, &zDelta);
486     *(zDelta++) = ':';
487     memcpy(zDelta, &zOut[base], lenOut-base);
488     zDelta += lenOut-base;
489   }
490   /* Output the final checksum record. */
491   putInt(checksum(zOut, lenOut), &zDelta);
492   *(zDelta++) = ';';
493   sqlite3_free(collide);
494   return zDelta - zOrigDelta;
495 }
496 
497 /*
498 ** Return the size (in bytes) of the output from applying
499 ** a delta.
500 **
501 ** This routine is provided so that an procedure that is able
502 ** to call delta_apply() can learn how much space is required
503 ** for the output and hence allocate nor more space that is really
504 ** needed.
505 */
delta_output_size(const char * zDelta,int lenDelta)506 static int delta_output_size(const char *zDelta, int lenDelta){
507   int size;
508   size = deltaGetInt(&zDelta, &lenDelta);
509   if( *zDelta!='\n' ){
510     /* ERROR: size integer not terminated by "\n" */
511     return -1;
512   }
513   return size;
514 }
515 
516 
517 /*
518 ** Apply a delta.
519 **
520 ** The output buffer should be big enough to hold the whole output
521 ** file and a NUL terminator at the end.  The delta_output_size()
522 ** routine will determine this size for you.
523 **
524 ** The delta string should be null-terminated.  But the delta string
525 ** may contain embedded NUL characters (if the input and output are
526 ** binary files) so we also have to pass in the length of the delta in
527 ** the lenDelta parameter.
528 **
529 ** This function returns the size of the output file in bytes (excluding
530 ** the final NUL terminator character).  Except, if the delta string is
531 ** malformed or intended for use with a source file other than zSrc,
532 ** then this routine returns -1.
533 **
534 ** Refer to the delta_create() documentation above for a description
535 ** of the delta file format.
536 */
delta_apply(const char * zSrc,int lenSrc,const char * zDelta,int lenDelta,char * zOut)537 static int delta_apply(
538   const char *zSrc,      /* The source or pattern file */
539   int lenSrc,            /* Length of the source file */
540   const char *zDelta,    /* Delta to apply to the pattern */
541   int lenDelta,          /* Length of the delta */
542   char *zOut             /* Write the output into this preallocated buffer */
543 ){
544   unsigned int limit;
545   unsigned int total = 0;
546 #ifdef FOSSIL_ENABLE_DELTA_CKSUM_TEST
547   char *zOrigOut = zOut;
548 #endif
549 
550   limit = deltaGetInt(&zDelta, &lenDelta);
551   if( *zDelta!='\n' ){
552     /* ERROR: size integer not terminated by "\n" */
553     return -1;
554   }
555   zDelta++; lenDelta--;
556   while( *zDelta && lenDelta>0 ){
557     unsigned int cnt, ofst;
558     cnt = deltaGetInt(&zDelta, &lenDelta);
559     switch( zDelta[0] ){
560       case '@': {
561         zDelta++; lenDelta--;
562         ofst = deltaGetInt(&zDelta, &lenDelta);
563         if( lenDelta>0 && zDelta[0]!=',' ){
564           /* ERROR: copy command not terminated by ',' */
565           return -1;
566         }
567         zDelta++; lenDelta--;
568         total += cnt;
569         if( total>limit ){
570           /* ERROR: copy exceeds output file size */
571           return -1;
572         }
573         if( ofst+cnt > lenSrc ){
574           /* ERROR: copy extends past end of input */
575           return -1;
576         }
577         memcpy(zOut, &zSrc[ofst], cnt);
578         zOut += cnt;
579         break;
580       }
581       case ':': {
582         zDelta++; lenDelta--;
583         total += cnt;
584         if( total>limit ){
585           /* ERROR:  insert command gives an output larger than predicted */
586           return -1;
587         }
588         if( cnt>lenDelta ){
589           /* ERROR: insert count exceeds size of delta */
590           return -1;
591         }
592         memcpy(zOut, zDelta, cnt);
593         zOut += cnt;
594         zDelta += cnt;
595         lenDelta -= cnt;
596         break;
597       }
598       case ';': {
599         zDelta++; lenDelta--;
600         zOut[0] = 0;
601 #ifdef FOSSIL_ENABLE_DELTA_CKSUM_TEST
602         if( cnt!=checksum(zOrigOut, total) ){
603           /* ERROR:  bad checksum */
604           return -1;
605         }
606 #endif
607         if( total!=limit ){
608           /* ERROR: generated size does not match predicted size */
609           return -1;
610         }
611         return total;
612       }
613       default: {
614         /* ERROR: unknown delta operator */
615         return -1;
616       }
617     }
618   }
619   /* ERROR: unterminated delta */
620   return -1;
621 }
622 
623 /*
624 ** SQL functions:  delta_create(X,Y)
625 **
626 ** Return a delta for carrying X into Y.
627 */
deltaCreateFunc(sqlite3_context * context,int argc,sqlite3_value ** argv)628 static void deltaCreateFunc(
629   sqlite3_context *context,
630   int argc,
631   sqlite3_value **argv
632 ){
633   const char *aOrig; int nOrig;  /* old blob */
634   const char *aNew;  int nNew;   /* new blob */
635   char *aOut;        int nOut;   /* output delta */
636 
637   assert( argc==2 );
638   if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
639   if( sqlite3_value_type(argv[1])==SQLITE_NULL ) return;
640   nOrig = sqlite3_value_bytes(argv[0]);
641   aOrig = (const char*)sqlite3_value_blob(argv[0]);
642   nNew = sqlite3_value_bytes(argv[1]);
643   aNew = (const char*)sqlite3_value_blob(argv[1]);
644   aOut = sqlite3_malloc64(nNew+70);
645   if( aOut==0 ){
646     sqlite3_result_error_nomem(context);
647   }else{
648     nOut = delta_create(aOrig, nOrig, aNew, nNew, aOut);
649     if( nOut<0 ){
650       sqlite3_free(aOut);
651       sqlite3_result_error(context, "cannot create fossil delta", -1);
652     }else{
653       sqlite3_result_blob(context, aOut, nOut, sqlite3_free);
654     }
655   }
656 }
657 
658 /*
659 ** SQL functions:  delta_apply(X,D)
660 **
661 ** Return the result of applying delta D to input X.
662 */
deltaApplyFunc(sqlite3_context * context,int argc,sqlite3_value ** argv)663 static void deltaApplyFunc(
664   sqlite3_context *context,
665   int argc,
666   sqlite3_value **argv
667 ){
668   const char *aOrig;   int nOrig;        /* The X input */
669   const char *aDelta;  int nDelta;       /* The input delta (D) */
670   char *aOut;          int nOut, nOut2;  /* The output */
671 
672   assert( argc==2 );
673   if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
674   if( sqlite3_value_type(argv[1])==SQLITE_NULL ) return;
675   nOrig = sqlite3_value_bytes(argv[0]);
676   aOrig = (const char*)sqlite3_value_blob(argv[0]);
677   nDelta = sqlite3_value_bytes(argv[1]);
678   aDelta = (const char*)sqlite3_value_blob(argv[1]);
679 
680   /* Figure out the size of the output */
681   nOut = delta_output_size(aDelta, nDelta);
682   if( nOut<0 ){
683     sqlite3_result_error(context, "corrupt fossil delta", -1);
684     return;
685   }
686   aOut = sqlite3_malloc64((sqlite3_int64)nOut+1);
687   if( aOut==0 ){
688     sqlite3_result_error_nomem(context);
689   }else{
690     nOut2 = delta_apply(aOrig, nOrig, aDelta, nDelta, aOut);
691     if( nOut2!=nOut ){
692       sqlite3_free(aOut);
693       sqlite3_result_error(context, "corrupt fossil delta", -1);
694     }else{
695       sqlite3_result_blob(context, aOut, nOut, sqlite3_free);
696     }
697   }
698 }
699 
700 
701 /*
702 ** SQL functions:  delta_output_size(D)
703 **
704 ** Return the size of the output that results from applying delta D.
705 */
deltaOutputSizeFunc(sqlite3_context * context,int argc,sqlite3_value ** argv)706 static void deltaOutputSizeFunc(
707   sqlite3_context *context,
708   int argc,
709   sqlite3_value **argv
710 ){
711   const char *aDelta;  int nDelta;       /* The input delta (D) */
712   int nOut;                              /* Size of output */
713   assert( argc==1 );
714   if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
715   nDelta = sqlite3_value_bytes(argv[0]);
716   aDelta = (const char*)sqlite3_value_blob(argv[0]);
717 
718   /* Figure out the size of the output */
719   nOut = delta_output_size(aDelta, nDelta);
720   if( nOut<0 ){
721     sqlite3_result_error(context, "corrupt fossil delta", -1);
722     return;
723   }else{
724     sqlite3_result_int(context, nOut);
725   }
726 }
727 
728 /*****************************************************************************
729 ** Table-valued SQL function:   delta_parse(DELTA)
730 **
731 ** Schema:
732 **
733 **     CREATE TABLE delta_parse(
734 **       op TEXT,
735 **       a1 INT,
736 **       a2 ANY,
737 **       delta HIDDEN BLOB
738 **     );
739 **
740 ** Given an input DELTA, this function parses the delta and returns
741 ** rows for each entry in the delta.  The op column has one of the
742 ** values SIZE, COPY, INSERT, CHECKSUM, ERROR.
743 **
744 ** Assuming no errors, the first row has op='SIZE'.  a1 is the size of
745 ** the output in bytes and a2 is NULL.
746 **
747 ** After the initial SIZE row, there are zero or more 'COPY' and/or 'INSERT'
748 ** rows.  A COPY row means content is copied from the source into the
749 ** output.  Column a1 is the number of bytes to copy and a2 is the offset
750 ** into source from which to begin copying.  An INSERT row means to
751 ** insert text into the output stream.  Column a1 is the number of bytes
752 ** to insert and column is a BLOB that contains the text to be inserted.
753 **
754 ** The last row of a well-formed delta will have an op value of 'CHECKSUM'.
755 ** The a1 column will be the value of the checksum and a2 will be NULL.
756 **
757 ** If the input delta is not well-formed, then a row with an op value
758 ** of 'ERROR' is returned.  The a1 value of the ERROR row is the offset
759 ** into the delta where the error was encountered and a2 is NULL.
760 */
761 typedef struct deltaparsevtab_vtab deltaparsevtab_vtab;
762 typedef struct deltaparsevtab_cursor deltaparsevtab_cursor;
763 struct deltaparsevtab_vtab {
764   sqlite3_vtab base;  /* Base class - must be first */
765   /* No additional information needed */
766 };
767 struct deltaparsevtab_cursor {
768   sqlite3_vtab_cursor base;  /* Base class - must be first */
769   char *aDelta;              /* The delta being parsed */
770   int nDelta;                /* Number of bytes in the delta */
771   int iCursor;               /* Current cursor location */
772   int eOp;                   /* Name of current operator */
773   unsigned int a1, a2;       /* Arguments to current operator */
774   int iNext;                 /* Next cursor value */
775 };
776 
777 /* Operator names:
778 */
779 static const char *azOp[] = {
780   "SIZE", "COPY", "INSERT", "CHECKSUM", "ERROR", "EOF"
781 };
782 #define DELTAPARSE_OP_SIZE         0
783 #define DELTAPARSE_OP_COPY         1
784 #define DELTAPARSE_OP_INSERT       2
785 #define DELTAPARSE_OP_CHECKSUM     3
786 #define DELTAPARSE_OP_ERROR        4
787 #define DELTAPARSE_OP_EOF          5
788 
789 /*
790 ** The deltaparsevtabConnect() method is invoked to create a new
791 ** deltaparse virtual table.
792 **
793 ** Think of this routine as the constructor for deltaparsevtab_vtab objects.
794 **
795 ** All this routine needs to do is:
796 **
797 **    (1) Allocate the deltaparsevtab_vtab object and initialize all fields.
798 **
799 **    (2) Tell SQLite (via the sqlite3_declare_vtab() interface) what the
800 **        result set of queries against the virtual table will look like.
801 */
deltaparsevtabConnect(sqlite3 * db,void * pAux,int argc,const char * const * argv,sqlite3_vtab ** ppVtab,char ** pzErr)802 static int deltaparsevtabConnect(
803   sqlite3 *db,
804   void *pAux,
805   int argc, const char *const*argv,
806   sqlite3_vtab **ppVtab,
807   char **pzErr
808 ){
809   deltaparsevtab_vtab *pNew;
810   int rc;
811 
812   rc = sqlite3_declare_vtab(db,
813            "CREATE TABLE x(op,a1,a2,delta HIDDEN)"
814        );
815   /* For convenience, define symbolic names for the index to each column. */
816 #define DELTAPARSEVTAB_OP     0
817 #define DELTAPARSEVTAB_A1     1
818 #define DELTAPARSEVTAB_A2     2
819 #define DELTAPARSEVTAB_DELTA  3
820   if( rc==SQLITE_OK ){
821     pNew = sqlite3_malloc64( sizeof(*pNew) );
822     *ppVtab = (sqlite3_vtab*)pNew;
823     if( pNew==0 ) return SQLITE_NOMEM;
824     memset(pNew, 0, sizeof(*pNew));
825     sqlite3_vtab_config(db, SQLITE_VTAB_INNOCUOUS);
826   }
827   return rc;
828 }
829 
830 /*
831 ** This method is the destructor for deltaparsevtab_vtab objects.
832 */
deltaparsevtabDisconnect(sqlite3_vtab * pVtab)833 static int deltaparsevtabDisconnect(sqlite3_vtab *pVtab){
834   deltaparsevtab_vtab *p = (deltaparsevtab_vtab*)pVtab;
835   sqlite3_free(p);
836   return SQLITE_OK;
837 }
838 
839 /*
840 ** Constructor for a new deltaparsevtab_cursor object.
841 */
deltaparsevtabOpen(sqlite3_vtab * p,sqlite3_vtab_cursor ** ppCursor)842 static int deltaparsevtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
843   deltaparsevtab_cursor *pCur;
844   pCur = sqlite3_malloc( sizeof(*pCur) );
845   if( pCur==0 ) return SQLITE_NOMEM;
846   memset(pCur, 0, sizeof(*pCur));
847   *ppCursor = &pCur->base;
848   return SQLITE_OK;
849 }
850 
851 /*
852 ** Destructor for a deltaparsevtab_cursor.
853 */
deltaparsevtabClose(sqlite3_vtab_cursor * cur)854 static int deltaparsevtabClose(sqlite3_vtab_cursor *cur){
855   deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur;
856   sqlite3_free(pCur->aDelta);
857   sqlite3_free(pCur);
858   return SQLITE_OK;
859 }
860 
861 
862 /*
863 ** Advance a deltaparsevtab_cursor to its next row of output.
864 */
deltaparsevtabNext(sqlite3_vtab_cursor * cur)865 static int deltaparsevtabNext(sqlite3_vtab_cursor *cur){
866   deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur;
867   const char *z;
868   int i = 0;
869 
870   pCur->iCursor = pCur->iNext;
871   z = pCur->aDelta + pCur->iCursor;
872   pCur->a1 = deltaGetInt(&z, &i);
873   switch( z[0] ){
874     case '@': {
875       z++;
876       pCur->a2 = deltaGetInt(&z, &i);
877       pCur->eOp = DELTAPARSE_OP_COPY;
878       pCur->iNext = (int)(&z[1] - pCur->aDelta);
879       break;
880     }
881     case ':': {
882       z++;
883       pCur->a2 = (unsigned int)(z - pCur->aDelta);
884       pCur->eOp = DELTAPARSE_OP_INSERT;
885       pCur->iNext = (int)(&z[pCur->a1] - pCur->aDelta);
886       break;
887     }
888     case ';': {
889       pCur->eOp = DELTAPARSE_OP_CHECKSUM;
890       pCur->iNext = pCur->nDelta;
891       break;
892     }
893     default: {
894       if( pCur->iNext==pCur->nDelta ){
895         pCur->eOp = DELTAPARSE_OP_EOF;
896       }else{
897         pCur->eOp = DELTAPARSE_OP_ERROR;
898         pCur->iNext = pCur->nDelta;
899       }
900       break;
901     }
902   }
903   return SQLITE_OK;
904 }
905 
906 /*
907 ** Return values of columns for the row at which the deltaparsevtab_cursor
908 ** is currently pointing.
909 */
deltaparsevtabColumn(sqlite3_vtab_cursor * cur,sqlite3_context * ctx,int i)910 static int deltaparsevtabColumn(
911   sqlite3_vtab_cursor *cur,   /* The cursor */
912   sqlite3_context *ctx,       /* First argument to sqlite3_result_...() */
913   int i                       /* Which column to return */
914 ){
915   deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur;
916   switch( i ){
917     case DELTAPARSEVTAB_OP: {
918       sqlite3_result_text(ctx, azOp[pCur->eOp], -1, SQLITE_STATIC);
919       break;
920     }
921     case DELTAPARSEVTAB_A1: {
922       sqlite3_result_int(ctx, pCur->a1);
923       break;
924     }
925     case DELTAPARSEVTAB_A2: {
926       if( pCur->eOp==DELTAPARSE_OP_COPY ){
927         sqlite3_result_int(ctx, pCur->a2);
928       }else if( pCur->eOp==DELTAPARSE_OP_INSERT ){
929         sqlite3_result_blob(ctx, pCur->aDelta+pCur->a2, pCur->a1,
930                             SQLITE_TRANSIENT);
931       }
932       break;
933     }
934     case DELTAPARSEVTAB_DELTA: {
935       sqlite3_result_blob(ctx, pCur->aDelta, pCur->nDelta, SQLITE_TRANSIENT);
936       break;
937     }
938   }
939   return SQLITE_OK;
940 }
941 
942 /*
943 ** Return the rowid for the current row.  In this implementation, the
944 ** rowid is the same as the output value.
945 */
deltaparsevtabRowid(sqlite3_vtab_cursor * cur,sqlite_int64 * pRowid)946 static int deltaparsevtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
947   deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur;
948   *pRowid = pCur->iCursor;
949   return SQLITE_OK;
950 }
951 
952 /*
953 ** Return TRUE if the cursor has been moved off of the last
954 ** row of output.
955 */
deltaparsevtabEof(sqlite3_vtab_cursor * cur)956 static int deltaparsevtabEof(sqlite3_vtab_cursor *cur){
957   deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur;
958   return pCur->eOp==DELTAPARSE_OP_EOF;
959 }
960 
961 /*
962 ** This method is called to "rewind" the deltaparsevtab_cursor object back
963 ** to the first row of output.  This method is always called at least
964 ** once prior to any call to deltaparsevtabColumn() or deltaparsevtabRowid() or
965 ** deltaparsevtabEof().
966 */
deltaparsevtabFilter(sqlite3_vtab_cursor * pVtabCursor,int idxNum,const char * idxStr,int argc,sqlite3_value ** argv)967 static int deltaparsevtabFilter(
968   sqlite3_vtab_cursor *pVtabCursor,
969   int idxNum, const char *idxStr,
970   int argc, sqlite3_value **argv
971 ){
972   deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor *)pVtabCursor;
973   const char *a;
974   int i = 0;
975   pCur->eOp = DELTAPARSE_OP_ERROR;
976   if( idxNum!=1 ){
977     return SQLITE_OK;
978   }
979   pCur->nDelta = sqlite3_value_bytes(argv[0]);
980   a = (const char*)sqlite3_value_blob(argv[0]);
981   if( pCur->nDelta==0 || a==0 ){
982     return SQLITE_OK;
983   }
984   pCur->aDelta = sqlite3_malloc64( pCur->nDelta+1 );
985   if( pCur->aDelta==0 ){
986     pCur->nDelta = 0;
987     return SQLITE_NOMEM;
988   }
989   memcpy(pCur->aDelta, a, pCur->nDelta);
990   pCur->aDelta[pCur->nDelta] = 0;
991   a = pCur->aDelta;
992   pCur->eOp = DELTAPARSE_OP_SIZE;
993   pCur->a1 = deltaGetInt(&a, &i);
994   if( a[0]!='\n' ){
995     pCur->eOp = DELTAPARSE_OP_ERROR;
996     pCur->a1 = pCur->a2 = 0;
997     pCur->iNext = pCur->nDelta;
998     return SQLITE_OK;
999   }
1000   a++;
1001   pCur->iNext = (unsigned int)(a - pCur->aDelta);
1002   return SQLITE_OK;
1003 }
1004 
1005 /*
1006 ** SQLite will invoke this method one or more times while planning a query
1007 ** that uses the virtual table.  This routine needs to create
1008 ** a query plan for each invocation and compute an estimated cost for that
1009 ** plan.
1010 */
deltaparsevtabBestIndex(sqlite3_vtab * tab,sqlite3_index_info * pIdxInfo)1011 static int deltaparsevtabBestIndex(
1012   sqlite3_vtab *tab,
1013   sqlite3_index_info *pIdxInfo
1014 ){
1015   int i;
1016   for(i=0; i<pIdxInfo->nConstraint; i++){
1017     if( pIdxInfo->aConstraint[i].iColumn != DELTAPARSEVTAB_DELTA ) continue;
1018     if( pIdxInfo->aConstraint[i].usable==0 ) continue;
1019     if( pIdxInfo->aConstraint[i].op!=SQLITE_INDEX_CONSTRAINT_EQ ) continue;
1020     pIdxInfo->aConstraintUsage[i].argvIndex = 1;
1021     pIdxInfo->aConstraintUsage[i].omit = 1;
1022     pIdxInfo->estimatedCost = (double)1;
1023     pIdxInfo->estimatedRows = 10;
1024     pIdxInfo->idxNum = 1;
1025     return SQLITE_OK;
1026   }
1027   pIdxInfo->idxNum = 0;
1028   pIdxInfo->estimatedCost = (double)0x7fffffff;
1029   pIdxInfo->estimatedRows = 0x7fffffff;
1030   return SQLITE_CONSTRAINT;
1031 }
1032 
1033 /*
1034 ** This following structure defines all the methods for the
1035 ** virtual table.
1036 */
1037 static sqlite3_module deltaparsevtabModule = {
1038   /* iVersion    */ 0,
1039   /* xCreate     */ 0,
1040   /* xConnect    */ deltaparsevtabConnect,
1041   /* xBestIndex  */ deltaparsevtabBestIndex,
1042   /* xDisconnect */ deltaparsevtabDisconnect,
1043   /* xDestroy    */ 0,
1044   /* xOpen       */ deltaparsevtabOpen,
1045   /* xClose      */ deltaparsevtabClose,
1046   /* xFilter     */ deltaparsevtabFilter,
1047   /* xNext       */ deltaparsevtabNext,
1048   /* xEof        */ deltaparsevtabEof,
1049   /* xColumn     */ deltaparsevtabColumn,
1050   /* xRowid      */ deltaparsevtabRowid,
1051   /* xUpdate     */ 0,
1052   /* xBegin      */ 0,
1053   /* xSync       */ 0,
1054   /* xCommit     */ 0,
1055   /* xRollback   */ 0,
1056   /* xFindMethod */ 0,
1057   /* xRename     */ 0,
1058   /* xSavepoint  */ 0,
1059   /* xRelease    */ 0,
1060   /* xRollbackTo */ 0,
1061   /* xShadowName */ 0
1062 };
1063 
1064 
1065 
1066 #ifdef _WIN32
1067 __declspec(dllexport)
1068 #endif
sqlite3_fossildelta_init(sqlite3 * db,char ** pzErrMsg,const sqlite3_api_routines * pApi)1069 int sqlite3_fossildelta_init(
1070   sqlite3 *db,
1071   char **pzErrMsg,
1072   const sqlite3_api_routines *pApi
1073 ){
1074   static const int enc = SQLITE_UTF8|SQLITE_INNOCUOUS;
1075   int rc = SQLITE_OK;
1076   SQLITE_EXTENSION_INIT2(pApi);
1077   (void)pzErrMsg;  /* Unused parameter */
1078   rc = sqlite3_create_function(db, "delta_create", 2, enc, 0,
1079                                deltaCreateFunc, 0, 0);
1080   if( rc==SQLITE_OK ){
1081     rc = sqlite3_create_function(db, "delta_apply", 2, enc, 0,
1082                                  deltaApplyFunc, 0, 0);
1083   }
1084   if( rc==SQLITE_OK ){
1085     rc = sqlite3_create_function(db, "delta_output_size", 1, enc, 0,
1086                                  deltaOutputSizeFunc, 0, 0);
1087   }
1088   if( rc==SQLITE_OK ){
1089     rc = sqlite3_create_module(db, "delta_parse", &deltaparsevtabModule, 0);
1090   }
1091   return rc;
1092 }
1093