xref: /sqlite-3.40.0/ext/misc/spellfix.c (revision dfe4e6bb)
1 /*
2 ** 2012 April 10
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 **
13 ** This module implements the spellfix1 VIRTUAL TABLE that can be used
14 ** to search a large vocabulary for close matches.  See separate
15 ** documentation (http://www.sqlite.org/spellfix1.html) for details.
16 */
17 #include "sqlite3ext.h"
18 SQLITE_EXTENSION_INIT1
19 
20 #ifndef SQLITE_AMALGAMATION
21 # include <string.h>
22 # include <stdio.h>
23 # include <stdlib.h>
24 # include <assert.h>
25 # define ALWAYS(X)  1
26 # define NEVER(X)   0
27   typedef unsigned char u8;
28   typedef unsigned short u16;
29 #endif
30 #include <ctype.h>
31 
32 #ifndef SQLITE_OMIT_VIRTUALTABLE
33 
34 /*
35 ** Character classes for ASCII characters:
36 **
37 **   0   ''        Silent letters:   H W
38 **   1   'A'       Any vowel:   A E I O U (Y)
39 **   2   'B'       A bilabeal stop or fricative:  B F P V W
40 **   3   'C'       Other fricatives or back stops:  C G J K Q S X Z
41 **   4   'D'       Alveolar stops:  D T
42 **   5   'H'       Letter H at the beginning of a word
43 **   6   'L'       Glide:  L
44 **   7   'R'       Semivowel:  R
45 **   8   'M'       Nasals:  M N
46 **   9   'Y'       Letter Y at the beginning of a word.
47 **   10  '9'       Digits: 0 1 2 3 4 5 6 7 8 9
48 **   11  ' '       White space
49 **   12  '?'       Other.
50 */
51 #define CCLASS_SILENT         0
52 #define CCLASS_VOWEL          1
53 #define CCLASS_B              2
54 #define CCLASS_C              3
55 #define CCLASS_D              4
56 #define CCLASS_H              5
57 #define CCLASS_L              6
58 #define CCLASS_R              7
59 #define CCLASS_M              8
60 #define CCLASS_Y              9
61 #define CCLASS_DIGIT         10
62 #define CCLASS_SPACE         11
63 #define CCLASS_OTHER         12
64 
65 /*
66 ** The following table gives the character class for non-initial ASCII
67 ** characters.
68 */
69 static const unsigned char midClass[] = {
70  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
71  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
72  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
73  /*   */ CCLASS_SPACE,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
74  /*   */ CCLASS_SPACE,    /*   */ CCLASS_SPACE,   /*   */ CCLASS_OTHER,
75  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
76  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
77  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
78  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
79  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
80  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_SPACE,
81  /* ! */ CCLASS_OTHER,    /* " */ CCLASS_OTHER,   /* # */ CCLASS_OTHER,
82  /* $ */ CCLASS_OTHER,    /* % */ CCLASS_OTHER,   /* & */ CCLASS_OTHER,
83  /* ' */ CCLASS_SILENT,   /* ( */ CCLASS_OTHER,   /* ) */ CCLASS_OTHER,
84  /* * */ CCLASS_OTHER,    /* + */ CCLASS_OTHER,   /* , */ CCLASS_OTHER,
85  /* - */ CCLASS_OTHER,    /* . */ CCLASS_OTHER,   /* / */ CCLASS_OTHER,
86  /* 0 */ CCLASS_DIGIT,    /* 1 */ CCLASS_DIGIT,   /* 2 */ CCLASS_DIGIT,
87  /* 3 */ CCLASS_DIGIT,    /* 4 */ CCLASS_DIGIT,   /* 5 */ CCLASS_DIGIT,
88  /* 6 */ CCLASS_DIGIT,    /* 7 */ CCLASS_DIGIT,   /* 8 */ CCLASS_DIGIT,
89  /* 9 */ CCLASS_DIGIT,    /* : */ CCLASS_OTHER,   /* ; */ CCLASS_OTHER,
90  /* < */ CCLASS_OTHER,    /* = */ CCLASS_OTHER,   /* > */ CCLASS_OTHER,
91  /* ? */ CCLASS_OTHER,    /* @ */ CCLASS_OTHER,   /* A */ CCLASS_VOWEL,
92  /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D,
93  /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C,
94  /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C,
95  /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M,
96  /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B,
97  /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C,
98  /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B,
99  /* W */ CCLASS_B,        /* X */ CCLASS_C,       /* Y */ CCLASS_VOWEL,
100  /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER,
101  /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER,
102  /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B,
103  /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL,
104  /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT,
105  /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C,
106  /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M,
107  /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C,
108  /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D,
109  /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_B,
110  /* x */ CCLASS_C,        /* y */ CCLASS_VOWEL,   /* z */ CCLASS_C,
111  /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER,
112  /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,
113 };
114 /*
115 ** This tables gives the character class for ASCII characters that form the
116 ** initial character of a word.  The only difference from midClass is with
117 ** the letters H, W, and Y.
118 */
119 static const unsigned char initClass[] = {
120  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
121  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
122  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
123  /*   */ CCLASS_SPACE,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
124  /*   */ CCLASS_SPACE,    /*   */ CCLASS_SPACE,   /*   */ CCLASS_OTHER,
125  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
126  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
127  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
128  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
129  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
130  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_SPACE,
131  /* ! */ CCLASS_OTHER,    /* " */ CCLASS_OTHER,   /* # */ CCLASS_OTHER,
132  /* $ */ CCLASS_OTHER,    /* % */ CCLASS_OTHER,   /* & */ CCLASS_OTHER,
133  /* ' */ CCLASS_OTHER,    /* ( */ CCLASS_OTHER,   /* ) */ CCLASS_OTHER,
134  /* * */ CCLASS_OTHER,    /* + */ CCLASS_OTHER,   /* , */ CCLASS_OTHER,
135  /* - */ CCLASS_OTHER,    /* . */ CCLASS_OTHER,   /* / */ CCLASS_OTHER,
136  /* 0 */ CCLASS_DIGIT,    /* 1 */ CCLASS_DIGIT,   /* 2 */ CCLASS_DIGIT,
137  /* 3 */ CCLASS_DIGIT,    /* 4 */ CCLASS_DIGIT,   /* 5 */ CCLASS_DIGIT,
138  /* 6 */ CCLASS_DIGIT,    /* 7 */ CCLASS_DIGIT,   /* 8 */ CCLASS_DIGIT,
139  /* 9 */ CCLASS_DIGIT,    /* : */ CCLASS_OTHER,   /* ; */ CCLASS_OTHER,
140  /* < */ CCLASS_OTHER,    /* = */ CCLASS_OTHER,   /* > */ CCLASS_OTHER,
141  /* ? */ CCLASS_OTHER,    /* @ */ CCLASS_OTHER,   /* A */ CCLASS_VOWEL,
142  /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D,
143  /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C,
144  /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C,
145  /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M,
146  /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B,
147  /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C,
148  /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B,
149  /* W */ CCLASS_B,        /* X */ CCLASS_C,       /* Y */ CCLASS_Y,
150  /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER,
151  /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER,
152  /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B,
153  /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL,
154  /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT,
155  /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C,
156  /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M,
157  /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C,
158  /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D,
159  /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_B,
160  /* x */ CCLASS_C,        /* y */ CCLASS_Y,       /* z */ CCLASS_C,
161  /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER,
162  /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,
163 };
164 
165 /*
166 ** Mapping from the character class number (0-13) to a symbol for each
167 ** character class.  Note that initClass[] can be used to map the class
168 ** symbol back into the class number.
169 */
170 static const unsigned char className[] = ".ABCDHLRMY9 ?";
171 
172 /*
173 ** Generate a "phonetic hash" from a string of ASCII characters
174 ** in zIn[0..nIn-1].
175 **
176 **   * Map characters by character class as defined above.
177 **   * Omit double-letters
178 **   * Omit vowels beside R and L
179 **   * Omit T when followed by CH
180 **   * Omit W when followed by R
181 **   * Omit D when followed by J or G
182 **   * Omit K in KN or G in GN at the beginning of a word
183 **
184 ** Space to hold the result is obtained from sqlite3_malloc()
185 **
186 ** Return NULL if memory allocation fails.
187 */
188 static unsigned char *phoneticHash(const unsigned char *zIn, int nIn){
189   unsigned char *zOut = sqlite3_malloc64( nIn + 1 );
190   int i;
191   int nOut = 0;
192   char cPrev = 0x77;
193   char cPrevX = 0x77;
194   const unsigned char *aClass = initClass;
195 
196   if( zOut==0 ) return 0;
197   if( nIn>2 ){
198     switch( zIn[0] ){
199       case 'g':
200       case 'k': {
201         if( zIn[1]=='n' ){ zIn++; nIn--; }
202         break;
203       }
204     }
205   }
206   for(i=0; i<nIn; i++){
207     unsigned char c = zIn[i];
208     if( i+1<nIn ){
209       if( c=='w' && zIn[i+1]=='r' ) continue;
210       if( c=='d' && (zIn[i+1]=='j' || zIn[i+1]=='g') ) continue;
211       if( i+2<nIn ){
212         if( c=='t' && zIn[i+1]=='c' && zIn[i+2]=='h' ) continue;
213       }
214     }
215     c = aClass[c&0x7f];
216     if( c==CCLASS_SPACE ) continue;
217     if( c==CCLASS_OTHER && cPrev!=CCLASS_DIGIT ) continue;
218     aClass = midClass;
219     if( c==CCLASS_VOWEL && (cPrevX==CCLASS_R || cPrevX==CCLASS_L) ){
220        continue; /* No vowels beside L or R */
221     }
222     if( (c==CCLASS_R || c==CCLASS_L) && cPrevX==CCLASS_VOWEL ){
223        nOut--;   /* No vowels beside L or R */
224     }
225     cPrev = c;
226     if( c==CCLASS_SILENT ) continue;
227     cPrevX = c;
228     c = className[c];
229     assert( nOut>=0 );
230     if( nOut==0 || c!=zOut[nOut-1] ) zOut[nOut++] = c;
231   }
232   zOut[nOut] = 0;
233   return zOut;
234 }
235 
236 /*
237 ** This is an SQL function wrapper around phoneticHash().  See
238 ** the description of phoneticHash() for additional information.
239 */
240 static void phoneticHashSqlFunc(
241   sqlite3_context *context,
242   int argc,
243   sqlite3_value **argv
244 ){
245   const unsigned char *zIn;
246   unsigned char *zOut;
247 
248   zIn = sqlite3_value_text(argv[0]);
249   if( zIn==0 ) return;
250   zOut = phoneticHash(zIn, sqlite3_value_bytes(argv[0]));
251   if( zOut==0 ){
252     sqlite3_result_error_nomem(context);
253   }else{
254     sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free);
255   }
256 }
257 
258 /*
259 ** Return the character class number for a character given its
260 ** context.
261 */
262 static char characterClass(char cPrev, char c){
263   return cPrev==0 ? initClass[c&0x7f] : midClass[c&0x7f];
264 }
265 
266 /*
267 ** Return the cost of inserting or deleting character c immediately
268 ** following character cPrev.  If cPrev==0, that means c is the first
269 ** character of the word.
270 */
271 static int insertOrDeleteCost(char cPrev, char c, char cNext){
272   char classC = characterClass(cPrev, c);
273   char classCprev;
274 
275   if( classC==CCLASS_SILENT ){
276     /* Insert or delete "silent" characters such as H or W */
277     return 1;
278   }
279   if( cPrev==c ){
280     /* Repeated characters, or miss a repeat */
281     return 10;
282   }
283   if( classC==CCLASS_VOWEL && (cPrev=='r' || cNext=='r') ){
284     return 20;  /* Insert a vowel before or after 'r' */
285   }
286   classCprev = characterClass(cPrev, cPrev);
287   if( classC==classCprev ){
288     if( classC==CCLASS_VOWEL ){
289       /* Remove or add a new vowel to a vowel cluster */
290       return 15;
291     }else{
292       /* Remove or add a consonant not in the same class */
293       return 50;
294     }
295   }
296 
297   /* any other character insertion or deletion */
298   return 100;
299 }
300 
301 /*
302 ** Divide the insertion cost by this factor when appending to the
303 ** end of the word.
304 */
305 #define FINAL_INS_COST_DIV  4
306 
307 /*
308 ** Return the cost of substituting cTo in place of cFrom assuming
309 ** the previous character is cPrev.  If cPrev==0 then cTo is the first
310 ** character of the word.
311 */
312 static int substituteCost(char cPrev, char cFrom, char cTo){
313   char classFrom, classTo;
314   if( cFrom==cTo ){
315     /* Exact match */
316     return 0;
317   }
318   if( cFrom==(cTo^0x20) && ((cTo>='A' && cTo<='Z') || (cTo>='a' && cTo<='z')) ){
319     /* differ only in case */
320     return 0;
321   }
322   classFrom = characterClass(cPrev, cFrom);
323   classTo = characterClass(cPrev, cTo);
324   if( classFrom==classTo ){
325     /* Same character class */
326     return 40;
327   }
328   if( classFrom>=CCLASS_B && classFrom<=CCLASS_Y
329       && classTo>=CCLASS_B && classTo<=CCLASS_Y ){
330     /* Convert from one consonant to another, but in a different class */
331     return 75;
332   }
333   /* Any other subsitution */
334   return 100;
335 }
336 
337 /*
338 ** Given two strings zA and zB which are pure ASCII, return the cost
339 ** of transforming zA into zB.  If zA ends with '*' assume that it is
340 ** a prefix of zB and give only minimal penalty for extra characters
341 ** on the end of zB.
342 **
343 ** Smaller numbers mean a closer match.
344 **
345 ** Negative values indicate an error:
346 **    -1  One of the inputs is NULL
347 **    -2  Non-ASCII characters on input
348 **    -3  Unable to allocate memory
349 **
350 ** If pnMatch is not NULL, then *pnMatch is set to the number of bytes
351 ** of zB that matched the pattern in zA. If zA does not end with a '*',
352 ** then this value is always the number of bytes in zB (i.e. strlen(zB)).
353 ** If zA does end in a '*', then it is the number of bytes in the prefix
354 ** of zB that was deemed to match zA.
355 */
356 static int editdist1(const char *zA, const char *zB, int *pnMatch){
357   int nA, nB;            /* Number of characters in zA[] and zB[] */
358   int xA, xB;            /* Loop counters for zA[] and zB[] */
359   char cA = 0, cB;       /* Current character of zA and zB */
360   char cAprev, cBprev;   /* Previous character of zA and zB */
361   char cAnext, cBnext;   /* Next character in zA and zB */
362   int d;                 /* North-west cost value */
363   int dc = 0;            /* North-west character value */
364   int res;               /* Final result */
365   int *m;                /* The cost matrix */
366   char *cx;              /* Corresponding character values */
367   int *toFree = 0;       /* Malloced space */
368   int nMatch = 0;
369   int mStack[60+15];     /* Stack space to use if not too much is needed */
370 
371   /* Early out if either input is NULL */
372   if( zA==0 || zB==0 ) return -1;
373 
374   /* Skip any common prefix */
375   while( zA[0] && zA[0]==zB[0] ){ dc = zA[0]; zA++; zB++; nMatch++; }
376   if( pnMatch ) *pnMatch = nMatch;
377   if( zA[0]==0 && zB[0]==0 ) return 0;
378 
379 #if 0
380   printf("A=\"%s\" B=\"%s\" dc=%c\n", zA, zB, dc?dc:' ');
381 #endif
382 
383   /* Verify input strings and measure their lengths */
384   for(nA=0; zA[nA]; nA++){
385     if( zA[nA]&0x80 ) return -2;
386   }
387   for(nB=0; zB[nB]; nB++){
388     if( zB[nB]&0x80 ) return -2;
389   }
390 
391   /* Special processing if either string is empty */
392   if( nA==0 ){
393     cBprev = (char)dc;
394     for(xB=res=0; (cB = zB[xB])!=0; xB++){
395       res += insertOrDeleteCost(cBprev, cB, zB[xB+1])/FINAL_INS_COST_DIV;
396       cBprev = cB;
397     }
398     return res;
399   }
400   if( nB==0 ){
401     cAprev = (char)dc;
402     for(xA=res=0; (cA = zA[xA])!=0; xA++){
403       res += insertOrDeleteCost(cAprev, cA, zA[xA+1]);
404       cAprev = cA;
405     }
406     return res;
407   }
408 
409   /* A is a prefix of B */
410   if( zA[0]=='*' && zA[1]==0 ) return 0;
411 
412   /* Allocate and initialize the Wagner matrix */
413   if( nB<(sizeof(mStack)*4)/(sizeof(mStack[0])*5) ){
414     m = mStack;
415   }else{
416     m = toFree = sqlite3_malloc64( (nB+1)*5*sizeof(m[0])/4 );
417     if( m==0 ) return -3;
418   }
419   cx = (char*)&m[nB+1];
420 
421   /* Compute the Wagner edit distance */
422   m[0] = 0;
423   cx[0] = (char)dc;
424   cBprev = (char)dc;
425   for(xB=1; xB<=nB; xB++){
426     cBnext = zB[xB];
427     cB = zB[xB-1];
428     cx[xB] = cB;
429     m[xB] = m[xB-1] + insertOrDeleteCost(cBprev, cB, cBnext);
430     cBprev = cB;
431   }
432   cAprev = (char)dc;
433   for(xA=1; xA<=nA; xA++){
434     int lastA = (xA==nA);
435     cA = zA[xA-1];
436     cAnext = zA[xA];
437     if( cA=='*' && lastA ) break;
438     d = m[0];
439     dc = cx[0];
440     m[0] = d + insertOrDeleteCost(cAprev, cA, cAnext);
441     cBprev = 0;
442     for(xB=1; xB<=nB; xB++){
443       int totalCost, insCost, delCost, subCost, ncx;
444       cB = zB[xB-1];
445       cBnext = zB[xB];
446 
447       /* Cost to insert cB */
448       insCost = insertOrDeleteCost(cx[xB-1], cB, cBnext);
449       if( lastA ) insCost /= FINAL_INS_COST_DIV;
450 
451       /* Cost to delete cA */
452       delCost = insertOrDeleteCost(cx[xB], cA, cBnext);
453 
454       /* Cost to substitute cA->cB */
455       subCost = substituteCost(cx[xB-1], cA, cB);
456 
457       /* Best cost */
458       totalCost = insCost + m[xB-1];
459       ncx = cB;
460       if( (delCost + m[xB])<totalCost ){
461         totalCost = delCost + m[xB];
462         ncx = cA;
463       }
464       if( (subCost + d)<totalCost ){
465         totalCost = subCost + d;
466       }
467 
468 #if 0
469       printf("%d,%d d=%4d u=%4d r=%4d dc=%c cA=%c cB=%c"
470              " ins=%4d del=%4d sub=%4d t=%4d ncx=%c\n",
471              xA, xB, d, m[xB], m[xB-1], dc?dc:' ', cA, cB,
472              insCost, delCost, subCost, totalCost, ncx?ncx:' ');
473 #endif
474 
475       /* Update the matrix */
476       d = m[xB];
477       dc = cx[xB];
478       m[xB] = totalCost;
479       cx[xB] = (char)ncx;
480       cBprev = cB;
481     }
482     cAprev = cA;
483   }
484 
485   /* Free the wagner matrix and return the result */
486   if( cA=='*' ){
487     res = m[1];
488     for(xB=1; xB<=nB; xB++){
489       if( m[xB]<res ){
490         res = m[xB];
491         if( pnMatch ) *pnMatch = xB+nMatch;
492       }
493     }
494   }else{
495     res = m[nB];
496     /* In the current implementation, pnMatch is always NULL if zA does
497     ** not end in "*" */
498     assert( pnMatch==0 );
499   }
500   sqlite3_free(toFree);
501   return res;
502 }
503 
504 /*
505 ** Function:    editdist(A,B)
506 **
507 ** Return the cost of transforming string A into string B.  Both strings
508 ** must be pure ASCII text.  If A ends with '*' then it is assumed to be
509 ** a prefix of B and extra characters on the end of B have minimal additional
510 ** cost.
511 */
512 static void editdistSqlFunc(
513   sqlite3_context *context,
514   int argc,
515   sqlite3_value **argv
516 ){
517   int res = editdist1(
518                     (const char*)sqlite3_value_text(argv[0]),
519                     (const char*)sqlite3_value_text(argv[1]),
520                     0);
521   if( res<0 ){
522     if( res==(-3) ){
523       sqlite3_result_error_nomem(context);
524     }else if( res==(-2) ){
525       sqlite3_result_error(context, "non-ASCII input to editdist()", -1);
526     }else{
527       sqlite3_result_error(context, "NULL input to editdist()", -1);
528     }
529   }else{
530     sqlite3_result_int(context, res);
531   }
532 }
533 
534 /* End of the fixed-cost edit distance implementation
535 ******************************************************************************
536 *****************************************************************************
537 ** Begin: Configurable cost unicode edit distance routines
538 */
539 /* Forward declaration of structures */
540 typedef struct EditDist3Cost EditDist3Cost;
541 typedef struct EditDist3Config EditDist3Config;
542 typedef struct EditDist3Point EditDist3Point;
543 typedef struct EditDist3From EditDist3From;
544 typedef struct EditDist3FromString EditDist3FromString;
545 typedef struct EditDist3To EditDist3To;
546 typedef struct EditDist3ToString EditDist3ToString;
547 typedef struct EditDist3Lang EditDist3Lang;
548 
549 
550 /*
551 ** An entry in the edit cost table
552 */
553 struct EditDist3Cost {
554   EditDist3Cost *pNext;     /* Next cost element */
555   u8 nFrom;                 /* Number of bytes in aFrom */
556   u8 nTo;                   /* Number of bytes in aTo */
557   u16 iCost;                /* Cost of this transformation */
558   char a[4]    ;            /* FROM string followed by TO string */
559   /* Additional TO and FROM string bytes appended as necessary */
560 };
561 
562 /*
563 ** Edit costs for a particular language ID
564 */
565 struct EditDist3Lang {
566   int iLang;             /* Language ID */
567   int iInsCost;          /* Default insertion cost */
568   int iDelCost;          /* Default deletion cost */
569   int iSubCost;          /* Default substitution cost */
570   EditDist3Cost *pCost;  /* Costs */
571 };
572 
573 
574 /*
575 ** The default EditDist3Lang object, with default costs.
576 */
577 static const EditDist3Lang editDist3Lang = { 0, 100, 100, 150, 0 };
578 
579 /*
580 ** Complete configuration
581 */
582 struct EditDist3Config {
583   int nLang;             /* Number of language IDs.  Size of a[] */
584   EditDist3Lang *a;      /* One for each distinct language ID */
585 };
586 
587 /*
588 ** Extra information about each character in the FROM string.
589 */
590 struct EditDist3From {
591   int nSubst;              /* Number of substitution cost entries */
592   int nDel;                /* Number of deletion cost entries */
593   int nByte;               /* Number of bytes in this character */
594   EditDist3Cost **apSubst; /* Array of substitution costs for this element */
595   EditDist3Cost **apDel;   /* Array of deletion cost entries */
596 };
597 
598 /*
599 ** A precompiled FROM string.
600 *
601 ** In the common case we expect the FROM string to be reused multiple times.
602 ** In other words, the common case will be to measure the edit distance
603 ** from a single origin string to multiple target strings.
604 */
605 struct EditDist3FromString {
606   char *z;                 /* The complete text of the FROM string */
607   int n;                   /* Number of characters in the FROM string */
608   int isPrefix;            /* True if ends with '*' character */
609   EditDist3From *a;        /* Extra info about each char of the FROM string */
610 };
611 
612 /*
613 ** Extra information about each character in the TO string.
614 */
615 struct EditDist3To {
616   int nIns;                /* Number of insertion cost entries */
617   int nByte;               /* Number of bytes in this character */
618   EditDist3Cost **apIns;   /* Array of deletion cost entries */
619 };
620 
621 /*
622 ** A precompiled FROM string
623 */
624 struct EditDist3ToString {
625   char *z;                 /* The complete text of the TO string */
626   int n;                   /* Number of characters in the TO string */
627   EditDist3To *a;          /* Extra info about each char of the TO string */
628 };
629 
630 /*
631 ** Clear or delete an instance of the object that records all edit-distance
632 ** weights.
633 */
634 static void editDist3ConfigClear(EditDist3Config *p){
635   int i;
636   if( p==0 ) return;
637   for(i=0; i<p->nLang; i++){
638     EditDist3Cost *pCost, *pNext;
639     pCost = p->a[i].pCost;
640     while( pCost ){
641       pNext = pCost->pNext;
642       sqlite3_free(pCost);
643       pCost = pNext;
644     }
645   }
646   sqlite3_free(p->a);
647   memset(p, 0, sizeof(*p));
648 }
649 static void editDist3ConfigDelete(void *pIn){
650   EditDist3Config *p = (EditDist3Config*)pIn;
651   editDist3ConfigClear(p);
652   sqlite3_free(p);
653 }
654 
655 /*
656 ** Load all edit-distance weights from a table.
657 */
658 static int editDist3ConfigLoad(
659   EditDist3Config *p,      /* The edit distance configuration to load */
660   sqlite3 *db,            /* Load from this database */
661   const char *zTable      /* Name of the table from which to load */
662 ){
663   sqlite3_stmt *pStmt;
664   int rc, rc2;
665   char *zSql;
666   int iLangPrev = -9999;
667   EditDist3Lang *pLang = 0;
668 
669   zSql = sqlite3_mprintf("SELECT iLang, cFrom, cTo, iCost"
670                          " FROM \"%w\" WHERE iLang>=0 ORDER BY iLang", zTable);
671   if( zSql==0 ) return SQLITE_NOMEM;
672   rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
673   sqlite3_free(zSql);
674   if( rc ) return rc;
675   editDist3ConfigClear(p);
676   while( sqlite3_step(pStmt)==SQLITE_ROW ){
677     int iLang = sqlite3_column_int(pStmt, 0);
678     const char *zFrom = (const char*)sqlite3_column_text(pStmt, 1);
679     int nFrom = zFrom ? sqlite3_column_bytes(pStmt, 1) : 0;
680     const char *zTo = (const char*)sqlite3_column_text(pStmt, 2);
681     int nTo = zTo ? sqlite3_column_bytes(pStmt, 2) : 0;
682     int iCost = sqlite3_column_int(pStmt, 3);
683 
684     assert( zFrom!=0 || nFrom==0 );
685     assert( zTo!=0 || nTo==0 );
686     if( nFrom>100 || nTo>100 ) continue;
687     if( iCost<0 ) continue;
688     if( pLang==0 || iLang!=iLangPrev ){
689       EditDist3Lang *pNew;
690       pNew = sqlite3_realloc64(p->a, (p->nLang+1)*sizeof(p->a[0]));
691       if( pNew==0 ){ rc = SQLITE_NOMEM; break; }
692       p->a = pNew;
693       pLang = &p->a[p->nLang];
694       p->nLang++;
695       pLang->iLang = iLang;
696       pLang->iInsCost = 100;
697       pLang->iDelCost = 100;
698       pLang->iSubCost = 150;
699       pLang->pCost = 0;
700       iLangPrev = iLang;
701     }
702     if( nFrom==1 && zFrom[0]=='?' && nTo==0 ){
703       pLang->iDelCost = iCost;
704     }else if( nFrom==0 && nTo==1 && zTo[0]=='?' ){
705       pLang->iInsCost = iCost;
706     }else if( nFrom==1 && nTo==1 && zFrom[0]=='?' && zTo[0]=='?' ){
707       pLang->iSubCost = iCost;
708     }else{
709       EditDist3Cost *pCost;
710       int nExtra = nFrom + nTo - 4;
711       if( nExtra<0 ) nExtra = 0;
712       pCost = sqlite3_malloc64( sizeof(*pCost) + nExtra );
713       if( pCost==0 ){ rc = SQLITE_NOMEM; break; }
714       pCost->nFrom = (u8)nFrom;
715       pCost->nTo = (u8)nTo;
716       pCost->iCost = (u16)iCost;
717       memcpy(pCost->a, zFrom, nFrom);
718       memcpy(pCost->a + nFrom, zTo, nTo);
719       pCost->pNext = pLang->pCost;
720       pLang->pCost = pCost;
721     }
722   }
723   rc2 = sqlite3_finalize(pStmt);
724   if( rc==SQLITE_OK ) rc = rc2;
725   return rc;
726 }
727 
728 /*
729 ** Return the length (in bytes) of a utf-8 character.  Or return a maximum
730 ** of N.
731 */
732 static int utf8Len(unsigned char c, int N){
733   int len = 1;
734   if( c>0x7f ){
735     if( (c&0xe0)==0xc0 ){
736       len = 2;
737     }else if( (c&0xf0)==0xe0 ){
738       len = 3;
739     }else{
740       len = 4;
741     }
742   }
743   if( len>N ) len = N;
744   return len;
745 }
746 
747 /*
748 ** Return TRUE (non-zero) if the To side of the given cost matches
749 ** the given string.
750 */
751 static int matchTo(EditDist3Cost *p, const char *z, int n){
752   if( p->nTo>n ) return 0;
753   if( strncmp(p->a+p->nFrom, z, p->nTo)!=0 ) return 0;
754   return 1;
755 }
756 
757 /*
758 ** Return TRUE (non-zero) if the From side of the given cost matches
759 ** the given string.
760 */
761 static int matchFrom(EditDist3Cost *p, const char *z, int n){
762   assert( p->nFrom<=n );
763   if( strncmp(p->a, z, p->nFrom)!=0 ) return 0;
764   return 1;
765 }
766 
767 /*
768 ** Return TRUE (non-zero) of the next FROM character and the next TO
769 ** character are the same.
770 */
771 static int matchFromTo(
772   EditDist3FromString *pStr,  /* Left hand string */
773   int n1,                     /* Index of comparison character on the left */
774   const char *z2,             /* Right-handl comparison character */
775   int n2                      /* Bytes remaining in z2[] */
776 ){
777   int b1 = pStr->a[n1].nByte;
778   if( b1>n2 ) return 0;
779   if( memcmp(pStr->z+n1, z2, b1)!=0 ) return 0;
780   return 1;
781 }
782 
783 /*
784 ** Delete an EditDist3FromString objecct
785 */
786 static void editDist3FromStringDelete(EditDist3FromString *p){
787   int i;
788   if( p ){
789     for(i=0; i<p->n; i++){
790       sqlite3_free(p->a[i].apDel);
791       sqlite3_free(p->a[i].apSubst);
792     }
793     sqlite3_free(p);
794   }
795 }
796 
797 /*
798 ** Create a EditDist3FromString object.
799 */
800 static EditDist3FromString *editDist3FromStringNew(
801   const EditDist3Lang *pLang,
802   const char *z,
803   int n
804 ){
805   EditDist3FromString *pStr;
806   EditDist3Cost *p;
807   int i;
808 
809   if( z==0 ) return 0;
810   if( n<0 ) n = (int)strlen(z);
811   pStr = sqlite3_malloc64( sizeof(*pStr) + sizeof(pStr->a[0])*n + n + 1 );
812   if( pStr==0 ) return 0;
813   pStr->a = (EditDist3From*)&pStr[1];
814   memset(pStr->a, 0, sizeof(pStr->a[0])*n);
815   pStr->n = n;
816   pStr->z = (char*)&pStr->a[n];
817   memcpy(pStr->z, z, n+1);
818   if( n && z[n-1]=='*' ){
819     pStr->isPrefix = 1;
820     n--;
821     pStr->n--;
822     pStr->z[n] = 0;
823   }else{
824     pStr->isPrefix = 0;
825   }
826 
827   for(i=0; i<n; i++){
828     EditDist3From *pFrom = &pStr->a[i];
829     memset(pFrom, 0, sizeof(*pFrom));
830     pFrom->nByte = utf8Len((unsigned char)z[i], n-i);
831     for(p=pLang->pCost; p; p=p->pNext){
832       EditDist3Cost **apNew;
833       if( i+p->nFrom>n ) continue;
834       if( matchFrom(p, z+i, n-i)==0 ) continue;
835       if( p->nTo==0 ){
836         apNew = sqlite3_realloc64(pFrom->apDel,
837                                 sizeof(*apNew)*(pFrom->nDel+1));
838         if( apNew==0 ) break;
839         pFrom->apDel = apNew;
840         apNew[pFrom->nDel++] = p;
841       }else{
842         apNew = sqlite3_realloc64(pFrom->apSubst,
843                                 sizeof(*apNew)*(pFrom->nSubst+1));
844         if( apNew==0 ) break;
845         pFrom->apSubst = apNew;
846         apNew[pFrom->nSubst++] = p;
847       }
848     }
849     if( p ){
850       editDist3FromStringDelete(pStr);
851       pStr = 0;
852       break;
853     }
854   }
855   return pStr;
856 }
857 
858 /*
859 ** Update entry m[i] such that it is the minimum of its current value
860 ** and m[j]+iCost.
861 **
862 ** If the iCost is 1,000,000 or greater, then consider the cost to be
863 ** infinite and skip the update.
864 */
865 static void updateCost(
866   unsigned int *m,
867   int i,
868   int j,
869   int iCost
870 ){
871   assert( iCost>=0 );
872   if( iCost<10000 ){
873     unsigned int b = m[j] + iCost;
874     if( b<m[i] ) m[i] = b;
875   }
876 }
877 
878 /*
879 ** How much stack space (int bytes) to use for Wagner matrix in
880 ** editDist3Core().  If more space than this is required, the entire
881 ** matrix is taken from the heap.  To reduce the load on the memory
882 ** allocator, make this value as large as practical for the
883 ** architecture in use.
884 */
885 #ifndef SQLITE_SPELLFIX_STACKALLOC_SZ
886 # define SQLITE_SPELLFIX_STACKALLOC_SZ  (1024)
887 #endif
888 
889 /* Compute the edit distance between two strings.
890 **
891 ** If an error occurs, return a negative number which is the error code.
892 **
893 ** If pnMatch is not NULL, then *pnMatch is set to the number of characters
894 ** (not bytes) in z2 that matched the search pattern in *pFrom. If pFrom does
895 ** not contain the pattern for a prefix-search, then this is always the number
896 ** of characters in z2. If pFrom does contain a prefix search pattern, then
897 ** it is the number of characters in the prefix of z2 that was deemed to
898 ** match pFrom.
899 */
900 static int editDist3Core(
901   EditDist3FromString *pFrom,  /* The FROM string */
902   const char *z2,              /* The TO string */
903   int n2,                      /* Length of the TO string */
904   const EditDist3Lang *pLang,  /* Edit weights for a particular language ID */
905   int *pnMatch                 /* OUT: Characters in matched prefix */
906 ){
907   int k, n;
908   int i1, b1;
909   int i2, b2;
910   EditDist3FromString f = *pFrom;
911   EditDist3To *a2;
912   unsigned int *m;
913   unsigned int *pToFree;
914   int szRow;
915   EditDist3Cost *p;
916   int res;
917   sqlite3_uint64 nByte;
918   unsigned int stackSpace[SQLITE_SPELLFIX_STACKALLOC_SZ/sizeof(unsigned int)];
919 
920   /* allocate the Wagner matrix and the aTo[] array for the TO string */
921   n = (f.n+1)*(n2+1);
922   n = (n+1)&~1;
923   nByte = n*sizeof(m[0]) + sizeof(a2[0])*n2;
924   if( nByte<=sizeof(stackSpace) ){
925     m = stackSpace;
926     pToFree = 0;
927   }else{
928     m = pToFree = sqlite3_malloc64( nByte );
929     if( m==0 ) return -1;            /* Out of memory */
930   }
931   a2 = (EditDist3To*)&m[n];
932   memset(a2, 0, sizeof(a2[0])*n2);
933 
934   /* Fill in the a1[] matrix for all characters of the TO string */
935   for(i2=0; i2<n2; i2++){
936     a2[i2].nByte = utf8Len((unsigned char)z2[i2], n2-i2);
937     for(p=pLang->pCost; p; p=p->pNext){
938       EditDist3Cost **apNew;
939       if( p->nFrom>0 ) continue;
940       if( i2+p->nTo>n2 ) continue;
941       if( matchTo(p, z2+i2, n2-i2)==0 ) continue;
942       a2[i2].nIns++;
943       apNew = sqlite3_realloc64(a2[i2].apIns, sizeof(*apNew)*a2[i2].nIns);
944       if( apNew==0 ){
945         res = -1;  /* Out of memory */
946         goto editDist3Abort;
947       }
948       a2[i2].apIns = apNew;
949       a2[i2].apIns[a2[i2].nIns-1] = p;
950     }
951   }
952 
953   /* Prepare to compute the minimum edit distance */
954   szRow = f.n+1;
955   memset(m, 0x01, (n2+1)*szRow*sizeof(m[0]));
956   m[0] = 0;
957 
958   /* First fill in the top-row of the matrix with FROM deletion costs */
959   for(i1=0; i1<f.n; i1 += b1){
960     b1 = f.a[i1].nByte;
961     updateCost(m, i1+b1, i1, pLang->iDelCost);
962     for(k=0; k<f.a[i1].nDel; k++){
963       p = f.a[i1].apDel[k];
964       updateCost(m, i1+p->nFrom, i1, p->iCost);
965     }
966   }
967 
968   /* Fill in all subsequent rows, top-to-bottom, left-to-right */
969   for(i2=0; i2<n2; i2 += b2){
970     int rx;      /* Starting index for current row */
971     int rxp;     /* Starting index for previous row */
972     b2 = a2[i2].nByte;
973     rx = szRow*(i2+b2);
974     rxp = szRow*i2;
975     updateCost(m, rx, rxp, pLang->iInsCost);
976     for(k=0; k<a2[i2].nIns; k++){
977       p = a2[i2].apIns[k];
978       updateCost(m, szRow*(i2+p->nTo), rxp, p->iCost);
979     }
980     for(i1=0; i1<f.n; i1+=b1){
981       int cx;    /* Index of current cell */
982       int cxp;   /* Index of cell immediately to the left */
983       int cxd;   /* Index of cell to the left and one row above */
984       int cxu;   /* Index of cell immediately above */
985       b1 = f.a[i1].nByte;
986       cxp = rx + i1;
987       cx = cxp + b1;
988       cxd = rxp + i1;
989       cxu = cxd + b1;
990       updateCost(m, cx, cxp, pLang->iDelCost);
991       for(k=0; k<f.a[i1].nDel; k++){
992         p = f.a[i1].apDel[k];
993         updateCost(m, cxp+p->nFrom, cxp, p->iCost);
994       }
995       updateCost(m, cx, cxu, pLang->iInsCost);
996       if( matchFromTo(&f, i1, z2+i2, n2-i2) ){
997         updateCost(m, cx, cxd, 0);
998       }
999       updateCost(m, cx, cxd, pLang->iSubCost);
1000       for(k=0; k<f.a[i1].nSubst; k++){
1001         p = f.a[i1].apSubst[k];
1002         if( matchTo(p, z2+i2, n2-i2) ){
1003           updateCost(m, cxd+p->nFrom+szRow*p->nTo, cxd, p->iCost);
1004         }
1005       }
1006     }
1007   }
1008 
1009 #if 0  /* Enable for debugging */
1010   printf("         ^");
1011   for(i1=0; i1<f.n; i1++) printf(" %c-%2x", f.z[i1], f.z[i1]&0xff);
1012   printf("\n   ^:");
1013   for(i1=0; i1<szRow; i1++){
1014     int v = m[i1];
1015     if( v>9999 ) printf(" ****");
1016     else         printf(" %4d", v);
1017   }
1018   printf("\n");
1019   for(i2=0; i2<n2; i2++){
1020     printf("%c-%02x:", z2[i2], z2[i2]&0xff);
1021     for(i1=0; i1<szRow; i1++){
1022       int v = m[(i2+1)*szRow+i1];
1023       if( v>9999 ) printf(" ****");
1024       else         printf(" %4d", v);
1025     }
1026     printf("\n");
1027   }
1028 #endif
1029 
1030   /* Free memory allocations and return the result */
1031   res = (int)m[szRow*(n2+1)-1];
1032   n = n2;
1033   if( f.isPrefix ){
1034     for(i2=1; i2<=n2; i2++){
1035       int b = m[szRow*i2-1];
1036       if( b<=res ){
1037         res = b;
1038         n = i2 - 1;
1039       }
1040     }
1041   }
1042   if( pnMatch ){
1043     int nExtra = 0;
1044     for(k=0; k<n; k++){
1045       if( (z2[k] & 0xc0)==0x80 ) nExtra++;
1046     }
1047     *pnMatch = n - nExtra;
1048   }
1049 
1050 editDist3Abort:
1051   for(i2=0; i2<n2; i2++) sqlite3_free(a2[i2].apIns);
1052   sqlite3_free(pToFree);
1053   return res;
1054 }
1055 
1056 /*
1057 ** Get an appropriate EditDist3Lang object.
1058 */
1059 static const EditDist3Lang *editDist3FindLang(
1060   EditDist3Config *pConfig,
1061   int iLang
1062 ){
1063   int i;
1064   for(i=0; i<pConfig->nLang; i++){
1065     if( pConfig->a[i].iLang==iLang ) return &pConfig->a[i];
1066   }
1067   return &editDist3Lang;
1068 }
1069 
1070 /*
1071 ** Function:    editdist3(A,B,iLang)
1072 **              editdist3(tablename)
1073 **
1074 ** Return the cost of transforming string A into string B using edit
1075 ** weights for iLang.
1076 **
1077 ** The second form loads edit weights into memory from a table.
1078 */
1079 static void editDist3SqlFunc(
1080   sqlite3_context *context,
1081   int argc,
1082   sqlite3_value **argv
1083 ){
1084   EditDist3Config *pConfig = (EditDist3Config*)sqlite3_user_data(context);
1085   sqlite3 *db = sqlite3_context_db_handle(context);
1086   int rc;
1087   if( argc==1 ){
1088     const char *zTable = (const char*)sqlite3_value_text(argv[0]);
1089     rc = editDist3ConfigLoad(pConfig, db, zTable);
1090     if( rc ) sqlite3_result_error_code(context, rc);
1091   }else{
1092     const char *zA = (const char*)sqlite3_value_text(argv[0]);
1093     const char *zB = (const char*)sqlite3_value_text(argv[1]);
1094     int nA = sqlite3_value_bytes(argv[0]);
1095     int nB = sqlite3_value_bytes(argv[1]);
1096     int iLang = argc==3 ? sqlite3_value_int(argv[2]) : 0;
1097     const EditDist3Lang *pLang = editDist3FindLang(pConfig, iLang);
1098     EditDist3FromString *pFrom;
1099     int dist;
1100 
1101     pFrom = editDist3FromStringNew(pLang, zA, nA);
1102     if( pFrom==0 ){
1103       sqlite3_result_error_nomem(context);
1104       return;
1105     }
1106     dist = editDist3Core(pFrom, zB, nB, pLang, 0);
1107     editDist3FromStringDelete(pFrom);
1108     if( dist==(-1) ){
1109       sqlite3_result_error_nomem(context);
1110     }else{
1111       sqlite3_result_int(context, dist);
1112     }
1113   }
1114 }
1115 
1116 /*
1117 ** Register the editDist3 function with SQLite
1118 */
1119 static int editDist3Install(sqlite3 *db){
1120   int rc;
1121   EditDist3Config *pConfig = sqlite3_malloc64( sizeof(*pConfig) );
1122   if( pConfig==0 ) return SQLITE_NOMEM;
1123   memset(pConfig, 0, sizeof(*pConfig));
1124   rc = sqlite3_create_function_v2(db, "editdist3",
1125               2, SQLITE_UTF8, pConfig, editDist3SqlFunc, 0, 0, 0);
1126   if( rc==SQLITE_OK ){
1127     rc = sqlite3_create_function_v2(db, "editdist3",
1128                 3, SQLITE_UTF8, pConfig, editDist3SqlFunc, 0, 0, 0);
1129   }
1130   if( rc==SQLITE_OK ){
1131     rc = sqlite3_create_function_v2(db, "editdist3",
1132                 1, SQLITE_UTF8, pConfig, editDist3SqlFunc, 0, 0,
1133                 editDist3ConfigDelete);
1134   }else{
1135     sqlite3_free(pConfig);
1136   }
1137   return rc;
1138 }
1139 /* End configurable cost unicode edit distance routines
1140 ******************************************************************************
1141 ******************************************************************************
1142 ** Begin transliterate unicode-to-ascii implementation
1143 */
1144 
1145 #if !SQLITE_AMALGAMATION
1146 /*
1147 ** This lookup table is used to help decode the first byte of
1148 ** a multi-byte UTF8 character.
1149 */
1150 static const unsigned char sqlite3Utf8Trans1[] = {
1151   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1152   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1153   0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
1154   0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
1155   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1156   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1157   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1158   0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
1159 };
1160 #endif
1161 
1162 /*
1163 ** Return the value of the first UTF-8 character in the string.
1164 */
1165 static int utf8Read(const unsigned char *z, int n, int *pSize){
1166   int c, i;
1167 
1168   /* All callers to this routine (in the current implementation)
1169   ** always have n>0. */
1170   if( NEVER(n==0) ){
1171     c = i = 0;
1172   }else{
1173     c = z[0];
1174     i = 1;
1175     if( c>=0xc0 ){
1176       c = sqlite3Utf8Trans1[c-0xc0];
1177       while( i<n && (z[i] & 0xc0)==0x80 ){
1178         c = (c<<6) + (0x3f & z[i++]);
1179       }
1180     }
1181   }
1182   *pSize = i;
1183   return c;
1184 }
1185 
1186 /*
1187 ** Return the number of characters in the utf-8 string in the nIn byte
1188 ** buffer pointed to by zIn.
1189 */
1190 static int utf8Charlen(const char *zIn, int nIn){
1191   int i;
1192   int nChar = 0;
1193   for(i=0; i<nIn; nChar++){
1194     int sz;
1195     utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz);
1196     i += sz;
1197   }
1198   return nChar;
1199 }
1200 
1201 /*
1202 ** Table of translations from unicode characters into ASCII.
1203 */
1204 static const struct {
1205  unsigned short int cFrom;
1206  unsigned char cTo0, cTo1;
1207 } translit[] = {
1208   { 0x00A0,  0x20, 0x00 },  /*   to   */
1209   { 0x00B5,  0x75, 0x00 },  /* µ to u */
1210   { 0x00C0,  0x41, 0x00 },  /* À to A */
1211   { 0x00C1,  0x41, 0x00 },  /* Á to A */
1212   { 0x00C2,  0x41, 0x00 },  /* Â to A */
1213   { 0x00C3,  0x41, 0x00 },  /* Ã to A */
1214   { 0x00C4,  0x41, 0x65 },  /* Ä to Ae */
1215   { 0x00C5,  0x41, 0x61 },  /* Å to Aa */
1216   { 0x00C6,  0x41, 0x45 },  /* Æ to AE */
1217   { 0x00C7,  0x43, 0x00 },  /* Ç to C */
1218   { 0x00C8,  0x45, 0x00 },  /* È to E */
1219   { 0x00C9,  0x45, 0x00 },  /* É to E */
1220   { 0x00CA,  0x45, 0x00 },  /* Ê to E */
1221   { 0x00CB,  0x45, 0x00 },  /* Ë to E */
1222   { 0x00CC,  0x49, 0x00 },  /* Ì to I */
1223   { 0x00CD,  0x49, 0x00 },  /* Í to I */
1224   { 0x00CE,  0x49, 0x00 },  /* Î to I */
1225   { 0x00CF,  0x49, 0x00 },  /* Ï to I */
1226   { 0x00D0,  0x44, 0x00 },  /* Ð to D */
1227   { 0x00D1,  0x4E, 0x00 },  /* Ñ to N */
1228   { 0x00D2,  0x4F, 0x00 },  /* Ò to O */
1229   { 0x00D3,  0x4F, 0x00 },  /* Ó to O */
1230   { 0x00D4,  0x4F, 0x00 },  /* Ô to O */
1231   { 0x00D5,  0x4F, 0x00 },  /* Õ to O */
1232   { 0x00D6,  0x4F, 0x65 },  /* Ö to Oe */
1233   { 0x00D7,  0x78, 0x00 },  /* × to x */
1234   { 0x00D8,  0x4F, 0x00 },  /* Ø to O */
1235   { 0x00D9,  0x55, 0x00 },  /* Ù to U */
1236   { 0x00DA,  0x55, 0x00 },  /* Ú to U */
1237   { 0x00DB,  0x55, 0x00 },  /* Û to U */
1238   { 0x00DC,  0x55, 0x65 },  /* Ü to Ue */
1239   { 0x00DD,  0x59, 0x00 },  /* Ý to Y */
1240   { 0x00DE,  0x54, 0x68 },  /* Þ to Th */
1241   { 0x00DF,  0x73, 0x73 },  /* ß to ss */
1242   { 0x00E0,  0x61, 0x00 },  /* à to a */
1243   { 0x00E1,  0x61, 0x00 },  /* á to a */
1244   { 0x00E2,  0x61, 0x00 },  /* â to a */
1245   { 0x00E3,  0x61, 0x00 },  /* ã to a */
1246   { 0x00E4,  0x61, 0x65 },  /* ä to ae */
1247   { 0x00E5,  0x61, 0x61 },  /* å to aa */
1248   { 0x00E6,  0x61, 0x65 },  /* æ to ae */
1249   { 0x00E7,  0x63, 0x00 },  /* ç to c */
1250   { 0x00E8,  0x65, 0x00 },  /* è to e */
1251   { 0x00E9,  0x65, 0x00 },  /* é to e */
1252   { 0x00EA,  0x65, 0x00 },  /* ê to e */
1253   { 0x00EB,  0x65, 0x00 },  /* ë to e */
1254   { 0x00EC,  0x69, 0x00 },  /* ì to i */
1255   { 0x00ED,  0x69, 0x00 },  /* í to i */
1256   { 0x00EE,  0x69, 0x00 },  /* î to i */
1257   { 0x00EF,  0x69, 0x00 },  /* ï to i */
1258   { 0x00F0,  0x64, 0x00 },  /* ð to d */
1259   { 0x00F1,  0x6E, 0x00 },  /* ñ to n */
1260   { 0x00F2,  0x6F, 0x00 },  /* ò to o */
1261   { 0x00F3,  0x6F, 0x00 },  /* ó to o */
1262   { 0x00F4,  0x6F, 0x00 },  /* ô to o */
1263   { 0x00F5,  0x6F, 0x00 },  /* õ to o */
1264   { 0x00F6,  0x6F, 0x65 },  /* ö to oe */
1265   { 0x00F7,  0x3A, 0x00 },  /* ÷ to : */
1266   { 0x00F8,  0x6F, 0x00 },  /* ø to o */
1267   { 0x00F9,  0x75, 0x00 },  /* ù to u */
1268   { 0x00FA,  0x75, 0x00 },  /* ú to u */
1269   { 0x00FB,  0x75, 0x00 },  /* û to u */
1270   { 0x00FC,  0x75, 0x65 },  /* ü to ue */
1271   { 0x00FD,  0x79, 0x00 },  /* ý to y */
1272   { 0x00FE,  0x74, 0x68 },  /* þ to th */
1273   { 0x00FF,  0x79, 0x00 },  /* ÿ to y */
1274   { 0x0100,  0x41, 0x00 },  /* Ā to A */
1275   { 0x0101,  0x61, 0x00 },  /* ā to a */
1276   { 0x0102,  0x41, 0x00 },  /* Ă to A */
1277   { 0x0103,  0x61, 0x00 },  /* ă to a */
1278   { 0x0104,  0x41, 0x00 },  /* Ą to A */
1279   { 0x0105,  0x61, 0x00 },  /* ą to a */
1280   { 0x0106,  0x43, 0x00 },  /* Ć to C */
1281   { 0x0107,  0x63, 0x00 },  /* ć to c */
1282   { 0x0108,  0x43, 0x68 },  /* Ĉ to Ch */
1283   { 0x0109,  0x63, 0x68 },  /* ĉ to ch */
1284   { 0x010A,  0x43, 0x00 },  /* Ċ to C */
1285   { 0x010B,  0x63, 0x00 },  /* ċ to c */
1286   { 0x010C,  0x43, 0x00 },  /* Č to C */
1287   { 0x010D,  0x63, 0x00 },  /* č to c */
1288   { 0x010E,  0x44, 0x00 },  /* Ď to D */
1289   { 0x010F,  0x64, 0x00 },  /* ď to d */
1290   { 0x0110,  0x44, 0x00 },  /* Đ to D */
1291   { 0x0111,  0x64, 0x00 },  /* đ to d */
1292   { 0x0112,  0x45, 0x00 },  /* Ē to E */
1293   { 0x0113,  0x65, 0x00 },  /* ē to e */
1294   { 0x0114,  0x45, 0x00 },  /* Ĕ to E */
1295   { 0x0115,  0x65, 0x00 },  /* ĕ to e */
1296   { 0x0116,  0x45, 0x00 },  /* Ė to E */
1297   { 0x0117,  0x65, 0x00 },  /* ė to e */
1298   { 0x0118,  0x45, 0x00 },  /* Ę to E */
1299   { 0x0119,  0x65, 0x00 },  /* ę to e */
1300   { 0x011A,  0x45, 0x00 },  /* Ě to E */
1301   { 0x011B,  0x65, 0x00 },  /* ě to e */
1302   { 0x011C,  0x47, 0x68 },  /* Ĝ to Gh */
1303   { 0x011D,  0x67, 0x68 },  /* ĝ to gh */
1304   { 0x011E,  0x47, 0x00 },  /* Ğ to G */
1305   { 0x011F,  0x67, 0x00 },  /* ğ to g */
1306   { 0x0120,  0x47, 0x00 },  /* Ġ to G */
1307   { 0x0121,  0x67, 0x00 },  /* ġ to g */
1308   { 0x0122,  0x47, 0x00 },  /* Ģ to G */
1309   { 0x0123,  0x67, 0x00 },  /* ģ to g */
1310   { 0x0124,  0x48, 0x68 },  /* Ĥ to Hh */
1311   { 0x0125,  0x68, 0x68 },  /* ĥ to hh */
1312   { 0x0126,  0x48, 0x00 },  /* Ħ to H */
1313   { 0x0127,  0x68, 0x00 },  /* ħ to h */
1314   { 0x0128,  0x49, 0x00 },  /* Ĩ to I */
1315   { 0x0129,  0x69, 0x00 },  /* ĩ to i */
1316   { 0x012A,  0x49, 0x00 },  /* Ī to I */
1317   { 0x012B,  0x69, 0x00 },  /* ī to i */
1318   { 0x012C,  0x49, 0x00 },  /* Ĭ to I */
1319   { 0x012D,  0x69, 0x00 },  /* ĭ to i */
1320   { 0x012E,  0x49, 0x00 },  /* Į to I */
1321   { 0x012F,  0x69, 0x00 },  /* į to i */
1322   { 0x0130,  0x49, 0x00 },  /* İ to I */
1323   { 0x0131,  0x69, 0x00 },  /* ı to i */
1324   { 0x0132,  0x49, 0x4A },  /* IJ to IJ */
1325   { 0x0133,  0x69, 0x6A },  /* ij to ij */
1326   { 0x0134,  0x4A, 0x68 },  /* Ĵ to Jh */
1327   { 0x0135,  0x6A, 0x68 },  /* ĵ to jh */
1328   { 0x0136,  0x4B, 0x00 },  /* Ķ to K */
1329   { 0x0137,  0x6B, 0x00 },  /* ķ to k */
1330   { 0x0138,  0x6B, 0x00 },  /* ĸ to k */
1331   { 0x0139,  0x4C, 0x00 },  /* Ĺ to L */
1332   { 0x013A,  0x6C, 0x00 },  /* ĺ to l */
1333   { 0x013B,  0x4C, 0x00 },  /* Ļ to L */
1334   { 0x013C,  0x6C, 0x00 },  /* ļ to l */
1335   { 0x013D,  0x4C, 0x00 },  /* Ľ to L */
1336   { 0x013E,  0x6C, 0x00 },  /* ľ to l */
1337   { 0x013F,  0x4C, 0x2E },  /* Ŀ to L. */
1338   { 0x0140,  0x6C, 0x2E },  /* ŀ to l. */
1339   { 0x0141,  0x4C, 0x00 },  /* Ł to L */
1340   { 0x0142,  0x6C, 0x00 },  /* ł to l */
1341   { 0x0143,  0x4E, 0x00 },  /* Ń to N */
1342   { 0x0144,  0x6E, 0x00 },  /* ń to n */
1343   { 0x0145,  0x4E, 0x00 },  /* Ņ to N */
1344   { 0x0146,  0x6E, 0x00 },  /* ņ to n */
1345   { 0x0147,  0x4E, 0x00 },  /* Ň to N */
1346   { 0x0148,  0x6E, 0x00 },  /* ň to n */
1347   { 0x0149,  0x27, 0x6E },  /* ʼn to 'n */
1348   { 0x014A,  0x4E, 0x47 },  /* Ŋ to NG */
1349   { 0x014B,  0x6E, 0x67 },  /* ŋ to ng */
1350   { 0x014C,  0x4F, 0x00 },  /* Ō to O */
1351   { 0x014D,  0x6F, 0x00 },  /* ō to o */
1352   { 0x014E,  0x4F, 0x00 },  /* Ŏ to O */
1353   { 0x014F,  0x6F, 0x00 },  /* ŏ to o */
1354   { 0x0150,  0x4F, 0x00 },  /* Ő to O */
1355   { 0x0151,  0x6F, 0x00 },  /* ő to o */
1356   { 0x0152,  0x4F, 0x45 },  /* Œ to OE */
1357   { 0x0153,  0x6F, 0x65 },  /* œ to oe */
1358   { 0x0154,  0x52, 0x00 },  /* Ŕ to R */
1359   { 0x0155,  0x72, 0x00 },  /* ŕ to r */
1360   { 0x0156,  0x52, 0x00 },  /* Ŗ to R */
1361   { 0x0157,  0x72, 0x00 },  /* ŗ to r */
1362   { 0x0158,  0x52, 0x00 },  /* Ř to R */
1363   { 0x0159,  0x72, 0x00 },  /* ř to r */
1364   { 0x015A,  0x53, 0x00 },  /* Ś to S */
1365   { 0x015B,  0x73, 0x00 },  /* ś to s */
1366   { 0x015C,  0x53, 0x68 },  /* Ŝ to Sh */
1367   { 0x015D,  0x73, 0x68 },  /* ŝ to sh */
1368   { 0x015E,  0x53, 0x00 },  /* Ş to S */
1369   { 0x015F,  0x73, 0x00 },  /* ş to s */
1370   { 0x0160,  0x53, 0x00 },  /* Š to S */
1371   { 0x0161,  0x73, 0x00 },  /* š to s */
1372   { 0x0162,  0x54, 0x00 },  /* Ţ to T */
1373   { 0x0163,  0x74, 0x00 },  /* ţ to t */
1374   { 0x0164,  0x54, 0x00 },  /* Ť to T */
1375   { 0x0165,  0x74, 0x00 },  /* ť to t */
1376   { 0x0166,  0x54, 0x00 },  /* Ŧ to T */
1377   { 0x0167,  0x74, 0x00 },  /* ŧ to t */
1378   { 0x0168,  0x55, 0x00 },  /* Ũ to U */
1379   { 0x0169,  0x75, 0x00 },  /* ũ to u */
1380   { 0x016A,  0x55, 0x00 },  /* Ū to U */
1381   { 0x016B,  0x75, 0x00 },  /* ū to u */
1382   { 0x016C,  0x55, 0x00 },  /* Ŭ to U */
1383   { 0x016D,  0x75, 0x00 },  /* ŭ to u */
1384   { 0x016E,  0x55, 0x00 },  /* Ů to U */
1385   { 0x016F,  0x75, 0x00 },  /* ů to u */
1386   { 0x0170,  0x55, 0x00 },  /* Ű to U */
1387   { 0x0171,  0x75, 0x00 },  /* ű to u */
1388   { 0x0172,  0x55, 0x00 },  /* Ų to U */
1389   { 0x0173,  0x75, 0x00 },  /* ų to u */
1390   { 0x0174,  0x57, 0x00 },  /* Ŵ to W */
1391   { 0x0175,  0x77, 0x00 },  /* ŵ to w */
1392   { 0x0176,  0x59, 0x00 },  /* Ŷ to Y */
1393   { 0x0177,  0x79, 0x00 },  /* ŷ to y */
1394   { 0x0178,  0x59, 0x00 },  /* Ÿ to Y */
1395   { 0x0179,  0x5A, 0x00 },  /* Ź to Z */
1396   { 0x017A,  0x7A, 0x00 },  /* ź to z */
1397   { 0x017B,  0x5A, 0x00 },  /* Ż to Z */
1398   { 0x017C,  0x7A, 0x00 },  /* ż to z */
1399   { 0x017D,  0x5A, 0x00 },  /* Ž to Z */
1400   { 0x017E,  0x7A, 0x00 },  /* ž to z */
1401   { 0x017F,  0x73, 0x00 },  /* ſ to s */
1402   { 0x0192,  0x66, 0x00 },  /* ƒ to f */
1403   { 0x0218,  0x53, 0x00 },  /* Ș to S */
1404   { 0x0219,  0x73, 0x00 },  /* ș to s */
1405   { 0x021A,  0x54, 0x00 },  /* Ț to T */
1406   { 0x021B,  0x74, 0x00 },  /* ț to t */
1407   { 0x0386,  0x41, 0x00 },  /* Ά to A */
1408   { 0x0388,  0x45, 0x00 },  /* Έ to E */
1409   { 0x0389,  0x49, 0x00 },  /* Ή to I */
1410   { 0x038A,  0x49, 0x00 },  /* Ί to I */
1411   { 0x038C,  0x4f, 0x00 },  /* Ό to O */
1412   { 0x038E,  0x59, 0x00 },  /* Ύ to Y */
1413   { 0x038F,  0x4f, 0x00 },  /* Ώ to O */
1414   { 0x0390,  0x69, 0x00 },  /* ΐ to i */
1415   { 0x0391,  0x41, 0x00 },  /* Α to A */
1416   { 0x0392,  0x42, 0x00 },  /* Β to B */
1417   { 0x0393,  0x47, 0x00 },  /* Γ to G */
1418   { 0x0394,  0x44, 0x00 },  /* Δ to D */
1419   { 0x0395,  0x45, 0x00 },  /* Ε to E */
1420   { 0x0396,  0x5a, 0x00 },  /* Ζ to Z */
1421   { 0x0397,  0x49, 0x00 },  /* Η to I */
1422   { 0x0398,  0x54, 0x68 },  /* Θ to Th */
1423   { 0x0399,  0x49, 0x00 },  /* Ι to I */
1424   { 0x039A,  0x4b, 0x00 },  /* Κ to K */
1425   { 0x039B,  0x4c, 0x00 },  /* Λ to L */
1426   { 0x039C,  0x4d, 0x00 },  /* Μ to M */
1427   { 0x039D,  0x4e, 0x00 },  /* Ν to N */
1428   { 0x039E,  0x58, 0x00 },  /* Ξ to X */
1429   { 0x039F,  0x4f, 0x00 },  /* Ο to O */
1430   { 0x03A0,  0x50, 0x00 },  /* Π to P */
1431   { 0x03A1,  0x52, 0x00 },  /* Ρ to R */
1432   { 0x03A3,  0x53, 0x00 },  /* Σ to S */
1433   { 0x03A4,  0x54, 0x00 },  /* Τ to T */
1434   { 0x03A5,  0x59, 0x00 },  /* Υ to Y */
1435   { 0x03A6,  0x46, 0x00 },  /* Φ to F */
1436   { 0x03A7,  0x43, 0x68 },  /* Χ to Ch */
1437   { 0x03A8,  0x50, 0x73 },  /* Ψ to Ps */
1438   { 0x03A9,  0x4f, 0x00 },  /* Ω to O */
1439   { 0x03AA,  0x49, 0x00 },  /* Ϊ to I */
1440   { 0x03AB,  0x59, 0x00 },  /* Ϋ to Y */
1441   { 0x03AC,  0x61, 0x00 },  /* ά to a */
1442   { 0x03AD,  0x65, 0x00 },  /* έ to e */
1443   { 0x03AE,  0x69, 0x00 },  /* ή to i */
1444   { 0x03AF,  0x69, 0x00 },  /* ί to i */
1445   { 0x03B1,  0x61, 0x00 },  /* α to a */
1446   { 0x03B2,  0x62, 0x00 },  /* β to b */
1447   { 0x03B3,  0x67, 0x00 },  /* γ to g */
1448   { 0x03B4,  0x64, 0x00 },  /* δ to d */
1449   { 0x03B5,  0x65, 0x00 },  /* ε to e */
1450   { 0x03B6,  0x7a, 0x00 },  /* ζ to z */
1451   { 0x03B7,  0x69, 0x00 },  /* η to i */
1452   { 0x03B8,  0x74, 0x68 },  /* θ to th */
1453   { 0x03B9,  0x69, 0x00 },  /* ι to i */
1454   { 0x03BA,  0x6b, 0x00 },  /* κ to k */
1455   { 0x03BB,  0x6c, 0x00 },  /* λ to l */
1456   { 0x03BC,  0x6d, 0x00 },  /* μ to m */
1457   { 0x03BD,  0x6e, 0x00 },  /* ν to n */
1458   { 0x03BE,  0x78, 0x00 },  /* ξ to x */
1459   { 0x03BF,  0x6f, 0x00 },  /* ο to o */
1460   { 0x03C0,  0x70, 0x00 },  /* π to p */
1461   { 0x03C1,  0x72, 0x00 },  /* ρ to r */
1462   { 0x03C3,  0x73, 0x00 },  /* σ to s */
1463   { 0x03C4,  0x74, 0x00 },  /* τ to t */
1464   { 0x03C5,  0x79, 0x00 },  /* υ to y */
1465   { 0x03C6,  0x66, 0x00 },  /* φ to f */
1466   { 0x03C7,  0x63, 0x68 },  /* χ to ch */
1467   { 0x03C8,  0x70, 0x73 },  /* ψ to ps */
1468   { 0x03C9,  0x6f, 0x00 },  /* ω to o */
1469   { 0x03CA,  0x69, 0x00 },  /* ϊ to i */
1470   { 0x03CB,  0x79, 0x00 },  /* ϋ to y */
1471   { 0x03CC,  0x6f, 0x00 },  /* ό to o */
1472   { 0x03CD,  0x79, 0x00 },  /* ύ to y */
1473   { 0x03CE,  0x69, 0x00 },  /* ώ to i */
1474   { 0x0400,  0x45, 0x00 },  /* Ѐ to E */
1475   { 0x0401,  0x45, 0x00 },  /* Ё to E */
1476   { 0x0402,  0x44, 0x00 },  /* Ђ to D */
1477   { 0x0403,  0x47, 0x00 },  /* Ѓ to G */
1478   { 0x0404,  0x45, 0x00 },  /* Є to E */
1479   { 0x0405,  0x5a, 0x00 },  /* Ѕ to Z */
1480   { 0x0406,  0x49, 0x00 },  /* І to I */
1481   { 0x0407,  0x49, 0x00 },  /* Ї to I */
1482   { 0x0408,  0x4a, 0x00 },  /* Ј to J */
1483   { 0x0409,  0x49, 0x00 },  /* Љ to I */
1484   { 0x040A,  0x4e, 0x00 },  /* Њ to N */
1485   { 0x040B,  0x44, 0x00 },  /* Ћ to D */
1486   { 0x040C,  0x4b, 0x00 },  /* Ќ to K */
1487   { 0x040D,  0x49, 0x00 },  /* Ѝ to I */
1488   { 0x040E,  0x55, 0x00 },  /* Ў to U */
1489   { 0x040F,  0x44, 0x00 },  /* Џ to D */
1490   { 0x0410,  0x41, 0x00 },  /* А to A */
1491   { 0x0411,  0x42, 0x00 },  /* Б to B */
1492   { 0x0412,  0x56, 0x00 },  /* В to V */
1493   { 0x0413,  0x47, 0x00 },  /* Г to G */
1494   { 0x0414,  0x44, 0x00 },  /* Д to D */
1495   { 0x0415,  0x45, 0x00 },  /* Е to E */
1496   { 0x0416,  0x5a, 0x68 },  /* Ж to Zh */
1497   { 0x0417,  0x5a, 0x00 },  /* З to Z */
1498   { 0x0418,  0x49, 0x00 },  /* И to I */
1499   { 0x0419,  0x49, 0x00 },  /* Й to I */
1500   { 0x041A,  0x4b, 0x00 },  /* К to K */
1501   { 0x041B,  0x4c, 0x00 },  /* Л to L */
1502   { 0x041C,  0x4d, 0x00 },  /* М to M */
1503   { 0x041D,  0x4e, 0x00 },  /* Н to N */
1504   { 0x041E,  0x4f, 0x00 },  /* О to O */
1505   { 0x041F,  0x50, 0x00 },  /* П to P */
1506   { 0x0420,  0x52, 0x00 },  /* Р to R */
1507   { 0x0421,  0x53, 0x00 },  /* С to S */
1508   { 0x0422,  0x54, 0x00 },  /* Т to T */
1509   { 0x0423,  0x55, 0x00 },  /* У to U */
1510   { 0x0424,  0x46, 0x00 },  /* Ф to F */
1511   { 0x0425,  0x4b, 0x68 },  /* Х to Kh */
1512   { 0x0426,  0x54, 0x63 },  /* Ц to Tc */
1513   { 0x0427,  0x43, 0x68 },  /* Ч to Ch */
1514   { 0x0428,  0x53, 0x68 },  /* Ш to Sh */
1515   { 0x0429,  0x53, 0x68 },  /* Щ to Shch */
1516   { 0x042A,  0x61, 0x00 },  /*  to A */
1517   { 0x042B,  0x59, 0x00 },  /* Ы to Y */
1518   { 0x042C,  0x59, 0x00 },  /*  to Y */
1519   { 0x042D,  0x45, 0x00 },  /* Э to E */
1520   { 0x042E,  0x49, 0x75 },  /* Ю to Iu */
1521   { 0x042F,  0x49, 0x61 },  /* Я to Ia */
1522   { 0x0430,  0x61, 0x00 },  /* а to a */
1523   { 0x0431,  0x62, 0x00 },  /* б to b */
1524   { 0x0432,  0x76, 0x00 },  /* в to v */
1525   { 0x0433,  0x67, 0x00 },  /* г to g */
1526   { 0x0434,  0x64, 0x00 },  /* д to d */
1527   { 0x0435,  0x65, 0x00 },  /* е to e */
1528   { 0x0436,  0x7a, 0x68 },  /* ж to zh */
1529   { 0x0437,  0x7a, 0x00 },  /* з to z */
1530   { 0x0438,  0x69, 0x00 },  /* и to i */
1531   { 0x0439,  0x69, 0x00 },  /* й to i */
1532   { 0x043A,  0x6b, 0x00 },  /* к to k */
1533   { 0x043B,  0x6c, 0x00 },  /* л to l */
1534   { 0x043C,  0x6d, 0x00 },  /* м to m */
1535   { 0x043D,  0x6e, 0x00 },  /* н to n */
1536   { 0x043E,  0x6f, 0x00 },  /* о to o */
1537   { 0x043F,  0x70, 0x00 },  /* п to p */
1538   { 0x0440,  0x72, 0x00 },  /* р to r */
1539   { 0x0441,  0x73, 0x00 },  /* с to s */
1540   { 0x0442,  0x74, 0x00 },  /* т to t */
1541   { 0x0443,  0x75, 0x00 },  /* у to u */
1542   { 0x0444,  0x66, 0x00 },  /* ф to f */
1543   { 0x0445,  0x6b, 0x68 },  /* х to kh */
1544   { 0x0446,  0x74, 0x63 },  /* ц to tc */
1545   { 0x0447,  0x63, 0x68 },  /* ч to ch */
1546   { 0x0448,  0x73, 0x68 },  /* ш to sh */
1547   { 0x0449,  0x73, 0x68 },  /* щ to shch */
1548   { 0x044A,  0x61, 0x00 },  /*  to a */
1549   { 0x044B,  0x79, 0x00 },  /* ы to y */
1550   { 0x044C,  0x79, 0x00 },  /*  to y */
1551   { 0x044D,  0x65, 0x00 },  /* э to e */
1552   { 0x044E,  0x69, 0x75 },  /* ю to iu */
1553   { 0x044F,  0x69, 0x61 },  /* я to ia */
1554   { 0x0450,  0x65, 0x00 },  /* ѐ to e */
1555   { 0x0451,  0x65, 0x00 },  /* ё to e */
1556   { 0x0452,  0x64, 0x00 },  /* ђ to d */
1557   { 0x0453,  0x67, 0x00 },  /* ѓ to g */
1558   { 0x0454,  0x65, 0x00 },  /* є to e */
1559   { 0x0455,  0x7a, 0x00 },  /* ѕ to z */
1560   { 0x0456,  0x69, 0x00 },  /* і to i */
1561   { 0x0457,  0x69, 0x00 },  /* ї to i */
1562   { 0x0458,  0x6a, 0x00 },  /* ј to j */
1563   { 0x0459,  0x69, 0x00 },  /* љ to i */
1564   { 0x045A,  0x6e, 0x00 },  /* њ to n */
1565   { 0x045B,  0x64, 0x00 },  /* ћ to d */
1566   { 0x045C,  0x6b, 0x00 },  /* ќ to k */
1567   { 0x045D,  0x69, 0x00 },  /* ѝ to i */
1568   { 0x045E,  0x75, 0x00 },  /* ў to u */
1569   { 0x045F,  0x64, 0x00 },  /* џ to d */
1570   { 0x1E02,  0x42, 0x00 },  /* Ḃ to B */
1571   { 0x1E03,  0x62, 0x00 },  /* ḃ to b */
1572   { 0x1E0A,  0x44, 0x00 },  /* Ḋ to D */
1573   { 0x1E0B,  0x64, 0x00 },  /* ḋ to d */
1574   { 0x1E1E,  0x46, 0x00 },  /* Ḟ to F */
1575   { 0x1E1F,  0x66, 0x00 },  /* ḟ to f */
1576   { 0x1E40,  0x4D, 0x00 },  /* Ṁ to M */
1577   { 0x1E41,  0x6D, 0x00 },  /* ṁ to m */
1578   { 0x1E56,  0x50, 0x00 },  /* Ṗ to P */
1579   { 0x1E57,  0x70, 0x00 },  /* ṗ to p */
1580   { 0x1E60,  0x53, 0x00 },  /* Ṡ to S */
1581   { 0x1E61,  0x73, 0x00 },  /* ṡ to s */
1582   { 0x1E6A,  0x54, 0x00 },  /* Ṫ to T */
1583   { 0x1E6B,  0x74, 0x00 },  /* ṫ to t */
1584   { 0x1E80,  0x57, 0x00 },  /* Ẁ to W */
1585   { 0x1E81,  0x77, 0x00 },  /* ẁ to w */
1586   { 0x1E82,  0x57, 0x00 },  /* Ẃ to W */
1587   { 0x1E83,  0x77, 0x00 },  /* ẃ to w */
1588   { 0x1E84,  0x57, 0x00 },  /* Ẅ to W */
1589   { 0x1E85,  0x77, 0x00 },  /* ẅ to w */
1590   { 0x1EF2,  0x59, 0x00 },  /* Ỳ to Y */
1591   { 0x1EF3,  0x79, 0x00 },  /* ỳ to y */
1592   { 0xFB00,  0x66, 0x66 },  /* ff to ff */
1593   { 0xFB01,  0x66, 0x69 },  /* fi to fi */
1594   { 0xFB02,  0x66, 0x6C },  /* fl to fl */
1595   { 0xFB05,  0x73, 0x74 },  /* ſt to st */
1596   { 0xFB06,  0x73, 0x74 },  /* st to st */
1597 };
1598 
1599 /*
1600 ** Convert the input string from UTF-8 into pure ASCII by converting
1601 ** all non-ASCII characters to some combination of characters in the
1602 ** ASCII subset.
1603 **
1604 ** The returned string might contain more characters than the input.
1605 **
1606 ** Space to hold the returned string comes from sqlite3_malloc() and
1607 ** should be freed by the caller.
1608 */
1609 static unsigned char *transliterate(const unsigned char *zIn, int nIn){
1610   unsigned char *zOut = sqlite3_malloc64( nIn*4 + 1 );
1611   int c, sz, nOut;
1612   if( zOut==0 ) return 0;
1613   nOut = 0;
1614   while( nIn>0 ){
1615     c = utf8Read(zIn, nIn, &sz);
1616     zIn += sz;
1617     nIn -= sz;
1618     if( c<=127 ){
1619       zOut[nOut++] = (unsigned char)c;
1620     }else{
1621       int xTop, xBtm, x;
1622       xTop = sizeof(translit)/sizeof(translit[0]) - 1;
1623       xBtm = 0;
1624       while( xTop>=xBtm ){
1625         x = (xTop + xBtm)/2;
1626         if( translit[x].cFrom==c ){
1627           zOut[nOut++] = translit[x].cTo0;
1628           if( translit[x].cTo1 ){
1629             zOut[nOut++] = translit[x].cTo1;
1630             /* Add an extra "ch" after the "sh" for Щ and щ */
1631             if( c==0x0429 || c== 0x0449 ){
1632               zOut[nOut++] = 'c';
1633               zOut[nOut++] = 'h';
1634             }
1635           }
1636           c = 0;
1637           break;
1638         }else if( translit[x].cFrom>c ){
1639           xTop = x-1;
1640         }else{
1641           xBtm = x+1;
1642         }
1643       }
1644       if( c ) zOut[nOut++] = '?';
1645     }
1646   }
1647   zOut[nOut] = 0;
1648   return zOut;
1649 }
1650 
1651 /*
1652 ** Return the number of characters in the shortest prefix of the input
1653 ** string that transliterates to an ASCII string nTrans bytes or longer.
1654 ** Or, if the transliteration of the input string is less than nTrans
1655 ** bytes in size, return the number of characters in the input string.
1656 */
1657 static int translen_to_charlen(const char *zIn, int nIn, int nTrans){
1658   int i, c, sz, nOut;
1659   int nChar;
1660 
1661   i = nOut = 0;
1662   for(nChar=0; i<nIn && nOut<nTrans; nChar++){
1663     c = utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz);
1664     i += sz;
1665 
1666     nOut++;
1667     if( c>=128 ){
1668       int xTop, xBtm, x;
1669       xTop = sizeof(translit)/sizeof(translit[0]) - 1;
1670       xBtm = 0;
1671       while( xTop>=xBtm ){
1672         x = (xTop + xBtm)/2;
1673         if( translit[x].cFrom==c ){
1674           if( translit[x].cTo1 ) nOut++;
1675           if( c==0x0429 || c== 0x0449 ) nOut += 2;
1676           break;
1677         }else if( translit[x].cFrom>c ){
1678           xTop = x-1;
1679         }else{
1680           xBtm = x+1;
1681         }
1682       }
1683     }
1684   }
1685 
1686   return nChar;
1687 }
1688 
1689 
1690 /*
1691 **    spellfix1_translit(X)
1692 **
1693 ** Convert a string that contains non-ASCII Roman characters into
1694 ** pure ASCII.
1695 */
1696 static void transliterateSqlFunc(
1697   sqlite3_context *context,
1698   int argc,
1699   sqlite3_value **argv
1700 ){
1701   const unsigned char *zIn = sqlite3_value_text(argv[0]);
1702   int nIn = sqlite3_value_bytes(argv[0]);
1703   unsigned char *zOut = transliterate(zIn, nIn);
1704   if( zOut==0 ){
1705     sqlite3_result_error_nomem(context);
1706   }else{
1707     sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free);
1708   }
1709 }
1710 
1711 /*
1712 **    spellfix1_scriptcode(X)
1713 **
1714 ** Try to determine the dominant script used by the word X and return
1715 ** its ISO 15924 numeric code.
1716 **
1717 ** The current implementation only understands the following scripts:
1718 **
1719 **    215  (Latin)
1720 **    220  (Cyrillic)
1721 **    200  (Greek)
1722 **
1723 ** This routine will return 998 if the input X contains characters from
1724 ** two or more of the above scripts or 999 if X contains no characters
1725 ** from any of the above scripts.
1726 */
1727 static void scriptCodeSqlFunc(
1728   sqlite3_context *context,
1729   int argc,
1730   sqlite3_value **argv
1731 ){
1732   const unsigned char *zIn = sqlite3_value_text(argv[0]);
1733   int nIn = sqlite3_value_bytes(argv[0]);
1734   int c, sz;
1735   int scriptMask = 0;
1736   int res;
1737   int seenDigit = 0;
1738 # define SCRIPT_LATIN       0x0001
1739 # define SCRIPT_CYRILLIC    0x0002
1740 # define SCRIPT_GREEK       0x0004
1741 # define SCRIPT_HEBREW      0x0008
1742 # define SCRIPT_ARABIC      0x0010
1743 
1744   while( nIn>0 ){
1745     c = utf8Read(zIn, nIn, &sz);
1746     zIn += sz;
1747     nIn -= sz;
1748     if( c<0x02af ){
1749       if( c>=0x80 || midClass[c&0x7f]<CCLASS_DIGIT ){
1750         scriptMask |= SCRIPT_LATIN;
1751       }else if( c>='0' && c<='9' ){
1752         seenDigit = 1;
1753       }
1754     }else if( c>=0x0400 && c<=0x04ff ){
1755       scriptMask |= SCRIPT_CYRILLIC;
1756     }else if( c>=0x0386 && c<=0x03ce ){
1757       scriptMask |= SCRIPT_GREEK;
1758     }else if( c>=0x0590 && c<=0x05ff ){
1759       scriptMask |= SCRIPT_HEBREW;
1760     }else if( c>=0x0600 && c<=0x06ff ){
1761       scriptMask |= SCRIPT_ARABIC;
1762     }
1763   }
1764   if( scriptMask==0 && seenDigit ) scriptMask = SCRIPT_LATIN;
1765   switch( scriptMask ){
1766     case 0:                res = 999; break;
1767     case SCRIPT_LATIN:     res = 215; break;
1768     case SCRIPT_CYRILLIC:  res = 220; break;
1769     case SCRIPT_GREEK:     res = 200; break;
1770     case SCRIPT_HEBREW:    res = 125; break;
1771     case SCRIPT_ARABIC:    res = 160; break;
1772     default:               res = 998; break;
1773   }
1774   sqlite3_result_int(context, res);
1775 }
1776 
1777 /* End transliterate
1778 ******************************************************************************
1779 ******************************************************************************
1780 ** Begin spellfix1 virtual table.
1781 */
1782 
1783 /* Maximum length of a phonehash used for querying the shadow table */
1784 #define SPELLFIX_MX_HASH  32
1785 
1786 /* Maximum number of hash strings to examine per query */
1787 #define SPELLFIX_MX_RUN   1
1788 
1789 typedef struct spellfix1_vtab spellfix1_vtab;
1790 typedef struct spellfix1_cursor spellfix1_cursor;
1791 
1792 /* Fuzzy-search virtual table object */
1793 struct spellfix1_vtab {
1794   sqlite3_vtab base;         /* Base class - must be first */
1795   sqlite3 *db;               /* Database connection */
1796   char *zDbName;             /* Name of database holding this table */
1797   char *zTableName;          /* Name of the virtual table */
1798   char *zCostTable;          /* Table holding edit-distance cost numbers */
1799   EditDist3Config *pConfig3; /* Parsed edit distance costs */
1800 };
1801 
1802 /* Fuzzy-search cursor object */
1803 struct spellfix1_cursor {
1804   sqlite3_vtab_cursor base;    /* Base class - must be first */
1805   spellfix1_vtab *pVTab;       /* The table to which this cursor belongs */
1806   char *zPattern;              /* rhs of MATCH clause */
1807   int idxNum;                  /* idxNum value passed to xFilter() */
1808   int nRow;                    /* Number of rows of content */
1809   int nAlloc;                  /* Number of allocated rows */
1810   int iRow;                    /* Current row of content */
1811   int iLang;                   /* Value of the langid= constraint */
1812   int iTop;                    /* Value of the top= constraint */
1813   int iScope;                  /* Value of the scope= constraint */
1814   int nSearch;                 /* Number of vocabulary items checked */
1815   sqlite3_stmt *pFullScan;     /* Shadow query for a full table scan */
1816   struct spellfix1_row {       /* For each row of content */
1817     sqlite3_int64 iRowid;         /* Rowid for this row */
1818     char *zWord;                  /* Text for this row */
1819     int iRank;                    /* Rank for this row */
1820     int iDistance;                /* Distance from pattern for this row */
1821     int iScore;                   /* Score for sorting */
1822     int iMatchlen;                /* Value of matchlen column (or -1) */
1823     char zHash[SPELLFIX_MX_HASH]; /* the phonehash used for this match */
1824   } *a;
1825 };
1826 
1827 /*
1828 ** Construct one or more SQL statements from the format string given
1829 ** and then evaluate those statements. The success code is written
1830 ** into *pRc.
1831 **
1832 ** If *pRc is initially non-zero then this routine is a no-op.
1833 */
1834 static void spellfix1DbExec(
1835   int *pRc,              /* Success code */
1836   sqlite3 *db,           /* Database in which to run SQL */
1837   const char *zFormat,   /* Format string for SQL */
1838   ...                    /* Arguments to the format string */
1839 ){
1840   va_list ap;
1841   char *zSql;
1842   if( *pRc ) return;
1843   va_start(ap, zFormat);
1844   zSql = sqlite3_vmprintf(zFormat, ap);
1845   va_end(ap);
1846   if( zSql==0 ){
1847     *pRc = SQLITE_NOMEM;
1848   }else{
1849     *pRc = sqlite3_exec(db, zSql, 0, 0, 0);
1850     sqlite3_free(zSql);
1851   }
1852 }
1853 
1854 /*
1855 ** xDisconnect/xDestroy method for the fuzzy-search module.
1856 */
1857 static int spellfix1Uninit(int isDestroy, sqlite3_vtab *pVTab){
1858   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
1859   int rc = SQLITE_OK;
1860   if( isDestroy ){
1861     sqlite3 *db = p->db;
1862     spellfix1DbExec(&rc, db, "DROP TABLE IF EXISTS \"%w\".\"%w_vocab\"",
1863                   p->zDbName, p->zTableName);
1864   }
1865   if( rc==SQLITE_OK ){
1866     sqlite3_free(p->zTableName);
1867     editDist3ConfigDelete(p->pConfig3);
1868     sqlite3_free(p->zCostTable);
1869     sqlite3_free(p);
1870   }
1871   return rc;
1872 }
1873 static int spellfix1Disconnect(sqlite3_vtab *pVTab){
1874   return spellfix1Uninit(0, pVTab);
1875 }
1876 static int spellfix1Destroy(sqlite3_vtab *pVTab){
1877   return spellfix1Uninit(1, pVTab);
1878 }
1879 
1880 /*
1881 ** Make a copy of a string.  Remove leading and trailing whitespace
1882 ** and dequote it.
1883 */
1884 static char *spellfix1Dequote(const char *zIn){
1885   char *zOut;
1886   int i, j;
1887   char c;
1888   while( isspace((unsigned char)zIn[0]) ) zIn++;
1889   zOut = sqlite3_mprintf("%s", zIn);
1890   if( zOut==0 ) return 0;
1891   i = (int)strlen(zOut);
1892 #if 0  /* The parser will never leave spaces at the end */
1893   while( i>0 && isspace(zOut[i-1]) ){ i--; }
1894 #endif
1895   zOut[i] = 0;
1896   c = zOut[0];
1897   if( c=='\'' || c=='"' ){
1898     for(i=1, j=0; ALWAYS(zOut[i]); i++){
1899       zOut[j++] = zOut[i];
1900       if( zOut[i]==c ){
1901         if( zOut[i+1]==c ){
1902           i++;
1903         }else{
1904           zOut[j-1] = 0;
1905           break;
1906         }
1907       }
1908     }
1909   }
1910   return zOut;
1911 }
1912 
1913 
1914 /*
1915 ** xConnect/xCreate method for the spellfix1 module. Arguments are:
1916 **
1917 **   argv[0]   -> module name  ("spellfix1")
1918 **   argv[1]   -> database name
1919 **   argv[2]   -> table name
1920 **   argv[3].. -> optional arguments (i.e. "edit_cost_table" parameter)
1921 */
1922 static int spellfix1Init(
1923   int isCreate,
1924   sqlite3 *db,
1925   void *pAux,
1926   int argc, const char *const*argv,
1927   sqlite3_vtab **ppVTab,
1928   char **pzErr
1929 ){
1930   spellfix1_vtab *pNew = 0;
1931   /* const char *zModule = argv[0]; // not used */
1932   const char *zDbName = argv[1];
1933   const char *zTableName = argv[2];
1934   int nDbName;
1935   int rc = SQLITE_OK;
1936   int i;
1937 
1938   nDbName = (int)strlen(zDbName);
1939   pNew = sqlite3_malloc64( sizeof(*pNew) + nDbName + 1);
1940   if( pNew==0 ){
1941     rc = SQLITE_NOMEM;
1942   }else{
1943     memset(pNew, 0, sizeof(*pNew));
1944     pNew->zDbName = (char*)&pNew[1];
1945     memcpy(pNew->zDbName, zDbName, nDbName+1);
1946     pNew->zTableName = sqlite3_mprintf("%s", zTableName);
1947     pNew->db = db;
1948     if( pNew->zTableName==0 ){
1949       rc = SQLITE_NOMEM;
1950     }else{
1951       rc = sqlite3_declare_vtab(db,
1952            "CREATE TABLE x(word,rank,distance,langid, "
1953            "score, matchlen, phonehash HIDDEN, "
1954            "top HIDDEN, scope HIDDEN, srchcnt HIDDEN, "
1955            "soundslike HIDDEN, command HIDDEN)"
1956       );
1957 #define SPELLFIX_COL_WORD            0
1958 #define SPELLFIX_COL_RANK            1
1959 #define SPELLFIX_COL_DISTANCE        2
1960 #define SPELLFIX_COL_LANGID          3
1961 #define SPELLFIX_COL_SCORE           4
1962 #define SPELLFIX_COL_MATCHLEN        5
1963 #define SPELLFIX_COL_PHONEHASH       6
1964 #define SPELLFIX_COL_TOP             7
1965 #define SPELLFIX_COL_SCOPE           8
1966 #define SPELLFIX_COL_SRCHCNT         9
1967 #define SPELLFIX_COL_SOUNDSLIKE     10
1968 #define SPELLFIX_COL_COMMAND        11
1969     }
1970     if( rc==SQLITE_OK && isCreate ){
1971       spellfix1DbExec(&rc, db,
1972          "CREATE TABLE IF NOT EXISTS \"%w\".\"%w_vocab\"(\n"
1973          "  id INTEGER PRIMARY KEY,\n"
1974          "  rank INT,\n"
1975          "  langid INT,\n"
1976          "  word TEXT,\n"
1977          "  k1 TEXT,\n"
1978          "  k2 TEXT\n"
1979          ");\n",
1980          zDbName, zTableName
1981       );
1982       spellfix1DbExec(&rc, db,
1983          "CREATE INDEX IF NOT EXISTS \"%w\".\"%w_vocab_index_langid_k2\" "
1984             "ON \"%w_vocab\"(langid,k2);",
1985          zDbName, zTableName, zTableName
1986       );
1987     }
1988     for(i=3; rc==SQLITE_OK && i<argc; i++){
1989       if( strncmp(argv[i],"edit_cost_table=",16)==0 && pNew->zCostTable==0 ){
1990         pNew->zCostTable = spellfix1Dequote(&argv[i][16]);
1991         if( pNew->zCostTable==0 ) rc = SQLITE_NOMEM;
1992         continue;
1993       }
1994       *pzErr = sqlite3_mprintf("bad argument to spellfix1(): \"%s\"", argv[i]);
1995       rc = SQLITE_ERROR;
1996     }
1997   }
1998 
1999   if( rc && pNew ){
2000     *ppVTab = 0;
2001     spellfix1Uninit(0, &pNew->base);
2002   }else{
2003     *ppVTab = (sqlite3_vtab *)pNew;
2004   }
2005   return rc;
2006 }
2007 
2008 /*
2009 ** The xConnect and xCreate methods
2010 */
2011 static int spellfix1Connect(
2012   sqlite3 *db,
2013   void *pAux,
2014   int argc, const char *const*argv,
2015   sqlite3_vtab **ppVTab,
2016   char **pzErr
2017 ){
2018   return spellfix1Init(0, db, pAux, argc, argv, ppVTab, pzErr);
2019 }
2020 static int spellfix1Create(
2021   sqlite3 *db,
2022   void *pAux,
2023   int argc, const char *const*argv,
2024   sqlite3_vtab **ppVTab,
2025   char **pzErr
2026 ){
2027   return spellfix1Init(1, db, pAux, argc, argv, ppVTab, pzErr);
2028 }
2029 
2030 /*
2031 ** Clear all of the content from a cursor.
2032 */
2033 static void spellfix1ResetCursor(spellfix1_cursor *pCur){
2034   int i;
2035   for(i=0; i<pCur->nRow; i++){
2036     sqlite3_free(pCur->a[i].zWord);
2037   }
2038   pCur->nRow = 0;
2039   pCur->iRow = 0;
2040   pCur->nSearch = 0;
2041   if( pCur->pFullScan ){
2042     sqlite3_finalize(pCur->pFullScan);
2043     pCur->pFullScan = 0;
2044   }
2045 }
2046 
2047 /*
2048 ** Resize the cursor to hold up to N rows of content
2049 */
2050 static void spellfix1ResizeCursor(spellfix1_cursor *pCur, int N){
2051   struct spellfix1_row *aNew;
2052   assert( N>=pCur->nRow );
2053   aNew = sqlite3_realloc64(pCur->a, sizeof(pCur->a[0])*N);
2054   if( aNew==0 && N>0 ){
2055     spellfix1ResetCursor(pCur);
2056     sqlite3_free(pCur->a);
2057     pCur->nAlloc = 0;
2058     pCur->a = 0;
2059   }else{
2060     pCur->nAlloc = N;
2061     pCur->a = aNew;
2062   }
2063 }
2064 
2065 
2066 /*
2067 ** Close a fuzzy-search cursor.
2068 */
2069 static int spellfix1Close(sqlite3_vtab_cursor *cur){
2070   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2071   spellfix1ResetCursor(pCur);
2072   spellfix1ResizeCursor(pCur, 0);
2073   sqlite3_free(pCur->zPattern);
2074   sqlite3_free(pCur);
2075   return SQLITE_OK;
2076 }
2077 
2078 #define SPELLFIX_IDXNUM_MATCH  0x01         /* word MATCH $str */
2079 #define SPELLFIX_IDXNUM_LANGID 0x02         /* langid == $langid */
2080 #define SPELLFIX_IDXNUM_TOP    0x04         /* top = $top */
2081 #define SPELLFIX_IDXNUM_SCOPE  0x08         /* scope = $scope */
2082 #define SPELLFIX_IDXNUM_DISTLT 0x10         /* distance < $distance */
2083 #define SPELLFIX_IDXNUM_DISTLE 0x20         /* distance <= $distance */
2084 #define SPELLFIX_IDXNUM_ROWID  0x40         /* rowid = $rowid */
2085 #define SPELLFIX_IDXNUM_DIST   (0x10|0x20)  /* DISTLT and DISTLE */
2086 
2087 /*
2088 **
2089 ** The plan number is a bitmask of the SPELLFIX_IDXNUM_* values defined
2090 ** above.
2091 **
2092 ** filter.argv[*] values contains $str, $langid, $top, $scope and $rowid
2093 ** if specified and in that order.
2094 */
2095 static int spellfix1BestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
2096   int iPlan = 0;
2097   int iLangTerm = -1;
2098   int iTopTerm = -1;
2099   int iScopeTerm = -1;
2100   int iDistTerm = -1;
2101   int iRowidTerm = -1;
2102   int i;
2103   const struct sqlite3_index_constraint *pConstraint;
2104   pConstraint = pIdxInfo->aConstraint;
2105   for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){
2106     if( pConstraint->usable==0 ) continue;
2107 
2108     /* Terms of the form:  word MATCH $str */
2109     if( (iPlan & SPELLFIX_IDXNUM_MATCH)==0
2110      && pConstraint->iColumn==SPELLFIX_COL_WORD
2111      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH
2112     ){
2113       iPlan |= SPELLFIX_IDXNUM_MATCH;
2114       pIdxInfo->aConstraintUsage[i].argvIndex = 1;
2115       pIdxInfo->aConstraintUsage[i].omit = 1;
2116     }
2117 
2118     /* Terms of the form:  langid = $langid  */
2119     if( (iPlan & SPELLFIX_IDXNUM_LANGID)==0
2120      && pConstraint->iColumn==SPELLFIX_COL_LANGID
2121      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2122     ){
2123       iPlan |= SPELLFIX_IDXNUM_LANGID;
2124       iLangTerm = i;
2125     }
2126 
2127     /* Terms of the form:  top = $top */
2128     if( (iPlan & SPELLFIX_IDXNUM_TOP)==0
2129      && pConstraint->iColumn==SPELLFIX_COL_TOP
2130      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2131     ){
2132       iPlan |= SPELLFIX_IDXNUM_TOP;
2133       iTopTerm = i;
2134     }
2135 
2136     /* Terms of the form:  scope = $scope */
2137     if( (iPlan & SPELLFIX_IDXNUM_SCOPE)==0
2138      && pConstraint->iColumn==SPELLFIX_COL_SCOPE
2139      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2140     ){
2141       iPlan |= SPELLFIX_IDXNUM_SCOPE;
2142       iScopeTerm = i;
2143     }
2144 
2145     /* Terms of the form:  distance < $dist or distance <= $dist */
2146     if( (iPlan & SPELLFIX_IDXNUM_DIST)==0
2147      && pConstraint->iColumn==SPELLFIX_COL_DISTANCE
2148      && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT
2149           || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE)
2150     ){
2151       if( pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT ){
2152         iPlan |= SPELLFIX_IDXNUM_DISTLT;
2153       }else{
2154         iPlan |= SPELLFIX_IDXNUM_DISTLE;
2155       }
2156       iDistTerm = i;
2157     }
2158 
2159     /* Terms of the form:  distance < $dist or distance <= $dist */
2160     if( (iPlan & SPELLFIX_IDXNUM_ROWID)==0
2161      && pConstraint->iColumn<0
2162      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2163     ){
2164       iPlan |= SPELLFIX_IDXNUM_ROWID;
2165       iRowidTerm = i;
2166     }
2167   }
2168   if( iPlan&SPELLFIX_IDXNUM_MATCH ){
2169     int idx = 2;
2170     pIdxInfo->idxNum = iPlan;
2171     if( pIdxInfo->nOrderBy==1
2172      && pIdxInfo->aOrderBy[0].iColumn==SPELLFIX_COL_SCORE
2173      && pIdxInfo->aOrderBy[0].desc==0
2174     ){
2175       pIdxInfo->orderByConsumed = 1;  /* Default order by iScore */
2176     }
2177     if( iPlan&SPELLFIX_IDXNUM_LANGID ){
2178       pIdxInfo->aConstraintUsage[iLangTerm].argvIndex = idx++;
2179       pIdxInfo->aConstraintUsage[iLangTerm].omit = 1;
2180     }
2181     if( iPlan&SPELLFIX_IDXNUM_TOP ){
2182       pIdxInfo->aConstraintUsage[iTopTerm].argvIndex = idx++;
2183       pIdxInfo->aConstraintUsage[iTopTerm].omit = 1;
2184     }
2185     if( iPlan&SPELLFIX_IDXNUM_SCOPE ){
2186       pIdxInfo->aConstraintUsage[iScopeTerm].argvIndex = idx++;
2187       pIdxInfo->aConstraintUsage[iScopeTerm].omit = 1;
2188     }
2189     if( iPlan&SPELLFIX_IDXNUM_DIST ){
2190       pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = idx++;
2191       pIdxInfo->aConstraintUsage[iDistTerm].omit = 1;
2192     }
2193     pIdxInfo->estimatedCost = 1e5;
2194   }else if( (iPlan & SPELLFIX_IDXNUM_ROWID) ){
2195     pIdxInfo->idxNum = SPELLFIX_IDXNUM_ROWID;
2196     pIdxInfo->aConstraintUsage[iRowidTerm].argvIndex = 1;
2197     pIdxInfo->aConstraintUsage[iRowidTerm].omit = 1;
2198     pIdxInfo->estimatedCost = 5;
2199   }else{
2200     pIdxInfo->idxNum = 0;
2201     pIdxInfo->estimatedCost = 1e50;
2202   }
2203   return SQLITE_OK;
2204 }
2205 
2206 /*
2207 ** Open a new fuzzy-search cursor.
2208 */
2209 static int spellfix1Open(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
2210   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2211   spellfix1_cursor *pCur;
2212   pCur = sqlite3_malloc64( sizeof(*pCur) );
2213   if( pCur==0 ) return SQLITE_NOMEM;
2214   memset(pCur, 0, sizeof(*pCur));
2215   pCur->pVTab = p;
2216   *ppCursor = &pCur->base;
2217   return SQLITE_OK;
2218 }
2219 
2220 /*
2221 ** Adjust a distance measurement by the words rank in order to show
2222 ** preference to common words.
2223 */
2224 static int spellfix1Score(int iDistance, int iRank){
2225   int iLog2;
2226   for(iLog2=0; iRank>0; iLog2++, iRank>>=1){}
2227   return iDistance + 32 - iLog2;
2228 }
2229 
2230 /*
2231 ** Compare two spellfix1_row objects for sorting purposes in qsort() such
2232 ** that they sort in order of increasing distance.
2233 */
2234 static int SQLITE_CDECL spellfix1RowCompare(const void *A, const void *B){
2235   const struct spellfix1_row *a = (const struct spellfix1_row*)A;
2236   const struct spellfix1_row *b = (const struct spellfix1_row*)B;
2237   return a->iScore - b->iScore;
2238 }
2239 
2240 /*
2241 ** A structure used to pass information from spellfix1FilterForMatch()
2242 ** into spellfix1RunQuery().
2243 */
2244 typedef struct MatchQuery {
2245   spellfix1_cursor *pCur;          /* The cursor being queried */
2246   sqlite3_stmt *pStmt;             /* shadow table query statment */
2247   char zHash[SPELLFIX_MX_HASH];    /* The current phonehash for zPattern */
2248   const char *zPattern;            /* Transliterated input string */
2249   int nPattern;                    /* Length of zPattern */
2250   EditDist3FromString *pMatchStr3; /* Original unicode string */
2251   EditDist3Config *pConfig3;       /* Edit-distance cost coefficients */
2252   const EditDist3Lang *pLang;      /* The selected language coefficients */
2253   int iLang;                       /* The language id */
2254   int iScope;                      /* Default scope */
2255   int iMaxDist;                    /* Maximum allowed edit distance, or -1 */
2256   int rc;                          /* Error code */
2257   int nRun;                  /* Number of prior runs for the same zPattern */
2258   char azPrior[SPELLFIX_MX_RUN][SPELLFIX_MX_HASH];  /* Prior hashes */
2259 } MatchQuery;
2260 
2261 /*
2262 ** Run a query looking for the best matches against zPattern using
2263 ** zHash as the character class seed hash.
2264 */
2265 static void spellfix1RunQuery(MatchQuery *p, const char *zQuery, int nQuery){
2266   const char *zK1;
2267   const char *zWord;
2268   int iDist;
2269   int iRank;
2270   int iScore;
2271   int iWorst = 0;
2272   int idx;
2273   int idxWorst = -1;
2274   int i;
2275   int iScope = p->iScope;
2276   spellfix1_cursor *pCur = p->pCur;
2277   sqlite3_stmt *pStmt = p->pStmt;
2278   char zHash1[SPELLFIX_MX_HASH];
2279   char zHash2[SPELLFIX_MX_HASH];
2280   char *zClass;
2281   int nClass;
2282   int rc;
2283 
2284   if( pCur->a==0 || p->rc ) return;   /* Prior memory allocation failure */
2285   zClass = (char*)phoneticHash((unsigned char*)zQuery, nQuery);
2286   if( zClass==0 ){
2287     p->rc = SQLITE_NOMEM;
2288     return;
2289   }
2290   nClass = (int)strlen(zClass);
2291   if( nClass>SPELLFIX_MX_HASH-2 ){
2292     nClass = SPELLFIX_MX_HASH-2;
2293     zClass[nClass] = 0;
2294   }
2295   if( nClass<=iScope ){
2296     if( nClass>2 ){
2297       iScope = nClass-1;
2298     }else{
2299       iScope = nClass;
2300     }
2301   }
2302   memcpy(zHash1, zClass, iScope);
2303   sqlite3_free(zClass);
2304   zHash1[iScope] = 0;
2305   memcpy(zHash2, zHash1, iScope);
2306   zHash2[iScope] = 'Z';
2307   zHash2[iScope+1] = 0;
2308 #if SPELLFIX_MX_RUN>1
2309   for(i=0; i<p->nRun; i++){
2310     if( strcmp(p->azPrior[i], zHash1)==0 ) return;
2311   }
2312 #endif
2313   assert( p->nRun<SPELLFIX_MX_RUN );
2314   memcpy(p->azPrior[p->nRun++], zHash1, iScope+1);
2315   if( sqlite3_bind_text(pStmt, 1, zHash1, -1, SQLITE_STATIC)==SQLITE_NOMEM
2316    || sqlite3_bind_text(pStmt, 2, zHash2, -1, SQLITE_STATIC)==SQLITE_NOMEM
2317   ){
2318     p->rc = SQLITE_NOMEM;
2319     return;
2320   }
2321 #if SPELLFIX_MX_RUN>1
2322   for(i=0; i<pCur->nRow; i++){
2323     if( pCur->a[i].iScore>iWorst ){
2324       iWorst = pCur->a[i].iScore;
2325       idxWorst = i;
2326     }
2327   }
2328 #endif
2329   while( sqlite3_step(pStmt)==SQLITE_ROW ){
2330     int iMatchlen = -1;
2331     iRank = sqlite3_column_int(pStmt, 2);
2332     if( p->pMatchStr3 ){
2333       int nWord = sqlite3_column_bytes(pStmt, 1);
2334       zWord = (const char*)sqlite3_column_text(pStmt, 1);
2335       iDist = editDist3Core(p->pMatchStr3, zWord, nWord, p->pLang, &iMatchlen);
2336     }else{
2337       zK1 = (const char*)sqlite3_column_text(pStmt, 3);
2338       if( zK1==0 ) continue;
2339       iDist = editdist1(p->zPattern, zK1, 0);
2340     }
2341     if( iDist<0 ){
2342       p->rc = SQLITE_NOMEM;
2343       break;
2344     }
2345     pCur->nSearch++;
2346 
2347     /* If there is a "distance < $dist" or "distance <= $dist" constraint,
2348     ** check if this row meets it. If not, jump back up to the top of the
2349     ** loop to process the next row. Otherwise, if the row does match the
2350     ** distance constraint, check if the pCur->a[] array is already full.
2351     ** If it is and no explicit "top = ?" constraint was present in the
2352     ** query, grow the array to ensure there is room for the new entry. */
2353     assert( (p->iMaxDist>=0)==((pCur->idxNum & SPELLFIX_IDXNUM_DIST) ? 1 : 0) );
2354     if( p->iMaxDist>=0 ){
2355       if( iDist>p->iMaxDist ) continue;
2356       if( pCur->nRow>=pCur->nAlloc && (pCur->idxNum & SPELLFIX_IDXNUM_TOP)==0 ){
2357         spellfix1ResizeCursor(pCur, pCur->nAlloc*2 + 10);
2358         if( pCur->a==0 ) break;
2359       }
2360     }
2361 
2362     iScore = spellfix1Score(iDist,iRank);
2363     if( pCur->nRow<pCur->nAlloc ){
2364       idx = pCur->nRow;
2365     }else if( iScore<iWorst ){
2366       idx = idxWorst;
2367       sqlite3_free(pCur->a[idx].zWord);
2368     }else{
2369       continue;
2370     }
2371 
2372     pCur->a[idx].zWord = sqlite3_mprintf("%s", sqlite3_column_text(pStmt, 1));
2373     if( pCur->a[idx].zWord==0 ){
2374       p->rc = SQLITE_NOMEM;
2375       break;
2376     }
2377     pCur->a[idx].iRowid = sqlite3_column_int64(pStmt, 0);
2378     pCur->a[idx].iRank = iRank;
2379     pCur->a[idx].iDistance = iDist;
2380     pCur->a[idx].iScore = iScore;
2381     pCur->a[idx].iMatchlen = iMatchlen;
2382     memcpy(pCur->a[idx].zHash, zHash1, iScope+1);
2383     if( pCur->nRow<pCur->nAlloc ) pCur->nRow++;
2384     if( pCur->nRow==pCur->nAlloc ){
2385       iWorst = pCur->a[0].iScore;
2386       idxWorst = 0;
2387       for(i=1; i<pCur->nRow; i++){
2388         iScore = pCur->a[i].iScore;
2389         if( iWorst<iScore ){
2390           iWorst = iScore;
2391           idxWorst = i;
2392         }
2393       }
2394     }
2395   }
2396   rc = sqlite3_reset(pStmt);
2397   if( rc ) p->rc = rc;
2398 }
2399 
2400 /*
2401 ** This version of the xFilter method work if the MATCH term is present
2402 ** and we are doing a scan.
2403 */
2404 static int spellfix1FilterForMatch(
2405   spellfix1_cursor *pCur,
2406   int argc,
2407   sqlite3_value **argv
2408 ){
2409   int idxNum = pCur->idxNum;
2410   const unsigned char *zMatchThis;   /* RHS of the MATCH operator */
2411   EditDist3FromString *pMatchStr3 = 0; /* zMatchThis as an editdist string */
2412   char *zPattern;                    /* Transliteration of zMatchThis */
2413   int nPattern;                      /* Length of zPattern */
2414   int iLimit = 20;                   /* Max number of rows of output */
2415   int iScope = 3;                    /* Use this many characters of zClass */
2416   int iLang = 0;                     /* Language code */
2417   char *zSql;                        /* SQL of shadow table query */
2418   sqlite3_stmt *pStmt = 0;           /* Shadow table query */
2419   int rc;                            /* Result code */
2420   int idx = 1;                       /* Next available filter parameter */
2421   spellfix1_vtab *p = pCur->pVTab;   /* The virtual table that owns pCur */
2422   MatchQuery x;                      /* For passing info to RunQuery() */
2423 
2424   /* Load the cost table if we have not already done so */
2425   if( p->zCostTable!=0 && p->pConfig3==0 ){
2426     p->pConfig3 = sqlite3_malloc64( sizeof(p->pConfig3[0]) );
2427     if( p->pConfig3==0 ) return SQLITE_NOMEM;
2428     memset(p->pConfig3, 0, sizeof(p->pConfig3[0]));
2429     rc = editDist3ConfigLoad(p->pConfig3, p->db, p->zCostTable);
2430     if( rc ) return rc;
2431   }
2432   memset(&x, 0, sizeof(x));
2433   x.iScope = 3;  /* Default scope if none specified by "WHERE scope=N" */
2434   x.iMaxDist = -1;   /* Maximum allowed edit distance */
2435 
2436   if( idxNum&2 ){
2437     iLang = sqlite3_value_int(argv[idx++]);
2438   }
2439   if( idxNum&4 ){
2440     iLimit = sqlite3_value_int(argv[idx++]);
2441     if( iLimit<1 ) iLimit = 1;
2442   }
2443   if( idxNum&8 ){
2444     x.iScope = sqlite3_value_int(argv[idx++]);
2445     if( x.iScope<1 ) x.iScope = 1;
2446     if( x.iScope>SPELLFIX_MX_HASH-2 ) x.iScope = SPELLFIX_MX_HASH-2;
2447   }
2448   if( idxNum&(16|32) ){
2449     x.iMaxDist = sqlite3_value_int(argv[idx++]);
2450     if( idxNum&16 ) x.iMaxDist--;
2451     if( x.iMaxDist<0 ) x.iMaxDist = 0;
2452   }
2453   spellfix1ResetCursor(pCur);
2454   spellfix1ResizeCursor(pCur, iLimit);
2455   zMatchThis = sqlite3_value_text(argv[0]);
2456   if( zMatchThis==0 ) return SQLITE_OK;
2457   if( p->pConfig3 ){
2458     x.pLang = editDist3FindLang(p->pConfig3, iLang);
2459     pMatchStr3 = editDist3FromStringNew(x.pLang, (const char*)zMatchThis, -1);
2460     if( pMatchStr3==0 ){
2461       x.rc = SQLITE_NOMEM;
2462       goto filter_exit;
2463     }
2464   }else{
2465     x.pLang = 0;
2466   }
2467   zPattern = (char*)transliterate(zMatchThis, sqlite3_value_bytes(argv[0]));
2468   sqlite3_free(pCur->zPattern);
2469   pCur->zPattern = zPattern;
2470   if( zPattern==0 ){
2471     x.rc = SQLITE_NOMEM;
2472     goto filter_exit;
2473   }
2474   nPattern = (int)strlen(zPattern);
2475   if( zPattern[nPattern-1]=='*' ) nPattern--;
2476   zSql = sqlite3_mprintf(
2477      "SELECT id, word, rank, k1"
2478      "  FROM \"%w\".\"%w_vocab\""
2479      " WHERE langid=%d AND k2>=?1 AND k2<?2",
2480      p->zDbName, p->zTableName, iLang
2481   );
2482   if( zSql==0 ){
2483     x.rc = SQLITE_NOMEM;
2484     pStmt = 0;
2485     goto filter_exit;
2486   }
2487   rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
2488   sqlite3_free(zSql);
2489   pCur->iLang = iLang;
2490   x.pCur = pCur;
2491   x.pStmt = pStmt;
2492   x.zPattern = zPattern;
2493   x.nPattern = nPattern;
2494   x.pMatchStr3 = pMatchStr3;
2495   x.iLang = iLang;
2496   x.rc = rc;
2497   x.pConfig3 = p->pConfig3;
2498   if( x.rc==SQLITE_OK ){
2499     spellfix1RunQuery(&x, zPattern, nPattern);
2500   }
2501 
2502   if( pCur->a ){
2503     qsort(pCur->a, pCur->nRow, sizeof(pCur->a[0]), spellfix1RowCompare);
2504     pCur->iTop = iLimit;
2505     pCur->iScope = iScope;
2506   }else{
2507     x.rc = SQLITE_NOMEM;
2508   }
2509 
2510 filter_exit:
2511   sqlite3_finalize(pStmt);
2512   editDist3FromStringDelete(pMatchStr3);
2513   return x.rc;
2514 }
2515 
2516 /*
2517 ** This version of xFilter handles a full-table scan case
2518 */
2519 static int spellfix1FilterForFullScan(
2520   spellfix1_cursor *pCur,
2521   int argc,
2522   sqlite3_value **argv
2523 ){
2524   int rc = SQLITE_OK;
2525   int idxNum = pCur->idxNum;
2526   char *zSql;
2527   spellfix1_vtab *pVTab = pCur->pVTab;
2528   spellfix1ResetCursor(pCur);
2529   assert( idxNum==0 || idxNum==64 );
2530   zSql = sqlite3_mprintf(
2531      "SELECT word, rank, NULL, langid, id FROM \"%w\".\"%w_vocab\"%s",
2532      pVTab->zDbName, pVTab->zTableName,
2533      ((idxNum & 64) ? " WHERE rowid=?" : "")
2534   );
2535   if( zSql==0 ) return SQLITE_NOMEM;
2536   rc = sqlite3_prepare_v2(pVTab->db, zSql, -1, &pCur->pFullScan, 0);
2537   sqlite3_free(zSql);
2538   if( rc==SQLITE_OK && (idxNum & 64) ){
2539     assert( argc==1 );
2540     rc = sqlite3_bind_value(pCur->pFullScan, 1, argv[0]);
2541   }
2542   pCur->nRow = pCur->iRow = 0;
2543   if( rc==SQLITE_OK ){
2544     rc = sqlite3_step(pCur->pFullScan);
2545     if( rc==SQLITE_ROW ){ pCur->iRow = -1; rc = SQLITE_OK; }
2546     if( rc==SQLITE_DONE ){ rc = SQLITE_OK; }
2547   }else{
2548     pCur->iRow = 0;
2549   }
2550   return rc;
2551 }
2552 
2553 
2554 /*
2555 ** Called to "rewind" a cursor back to the beginning so that
2556 ** it starts its output over again.  Always called at least once
2557 ** prior to any spellfix1Column, spellfix1Rowid, or spellfix1Eof call.
2558 */
2559 static int spellfix1Filter(
2560   sqlite3_vtab_cursor *cur,
2561   int idxNum, const char *idxStr,
2562   int argc, sqlite3_value **argv
2563 ){
2564   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2565   int rc;
2566   pCur->idxNum = idxNum;
2567   if( idxNum & 1 ){
2568     rc = spellfix1FilterForMatch(pCur, argc, argv);
2569   }else{
2570     rc = spellfix1FilterForFullScan(pCur, argc, argv);
2571   }
2572   return rc;
2573 }
2574 
2575 
2576 /*
2577 ** Advance a cursor to its next row of output
2578 */
2579 static int spellfix1Next(sqlite3_vtab_cursor *cur){
2580   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2581   int rc = SQLITE_OK;
2582   if( pCur->iRow < pCur->nRow ){
2583     if( pCur->pFullScan ){
2584       rc = sqlite3_step(pCur->pFullScan);
2585       if( rc!=SQLITE_ROW ) pCur->iRow = pCur->nRow;
2586       if( rc==SQLITE_ROW || rc==SQLITE_DONE ) rc = SQLITE_OK;
2587     }else{
2588       pCur->iRow++;
2589     }
2590   }
2591   return rc;
2592 }
2593 
2594 /*
2595 ** Return TRUE if we are at the end-of-file
2596 */
2597 static int spellfix1Eof(sqlite3_vtab_cursor *cur){
2598   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2599   return pCur->iRow>=pCur->nRow;
2600 }
2601 
2602 /*
2603 ** Return columns from the current row.
2604 */
2605 static int spellfix1Column(
2606   sqlite3_vtab_cursor *cur,
2607   sqlite3_context *ctx,
2608   int i
2609 ){
2610   spellfix1_cursor *pCur = (spellfix1_cursor*)cur;
2611   if( pCur->pFullScan ){
2612     if( i<=SPELLFIX_COL_LANGID ){
2613       sqlite3_result_value(ctx, sqlite3_column_value(pCur->pFullScan, i));
2614     }else{
2615       sqlite3_result_null(ctx);
2616     }
2617     return SQLITE_OK;
2618   }
2619   switch( i ){
2620     case SPELLFIX_COL_WORD: {
2621       sqlite3_result_text(ctx, pCur->a[pCur->iRow].zWord, -1, SQLITE_STATIC);
2622       break;
2623     }
2624     case SPELLFIX_COL_RANK: {
2625       sqlite3_result_int(ctx, pCur->a[pCur->iRow].iRank);
2626       break;
2627     }
2628     case SPELLFIX_COL_DISTANCE: {
2629       sqlite3_result_int(ctx, pCur->a[pCur->iRow].iDistance);
2630       break;
2631     }
2632     case SPELLFIX_COL_LANGID: {
2633       sqlite3_result_int(ctx, pCur->iLang);
2634       break;
2635     }
2636     case SPELLFIX_COL_SCORE: {
2637       sqlite3_result_int(ctx, pCur->a[pCur->iRow].iScore);
2638       break;
2639     }
2640     case SPELLFIX_COL_MATCHLEN: {
2641       int iMatchlen = pCur->a[pCur->iRow].iMatchlen;
2642       if( iMatchlen<0 ){
2643         int nPattern = (int)strlen(pCur->zPattern);
2644         char *zWord = pCur->a[pCur->iRow].zWord;
2645         int nWord = (int)strlen(zWord);
2646 
2647         if( nPattern>0 && pCur->zPattern[nPattern-1]=='*' ){
2648           char *zTranslit;
2649           int res;
2650           zTranslit = (char *)transliterate((unsigned char *)zWord, nWord);
2651           if( !zTranslit ) return SQLITE_NOMEM;
2652           res = editdist1(pCur->zPattern, zTranslit, &iMatchlen);
2653           sqlite3_free(zTranslit);
2654           if( res<0 ) return SQLITE_NOMEM;
2655           iMatchlen = translen_to_charlen(zWord, nWord, iMatchlen);
2656         }else{
2657           iMatchlen = utf8Charlen(zWord, nWord);
2658         }
2659       }
2660 
2661       sqlite3_result_int(ctx, iMatchlen);
2662       break;
2663     }
2664     case SPELLFIX_COL_PHONEHASH: {
2665       sqlite3_result_text(ctx, pCur->a[pCur->iRow].zHash, -1, SQLITE_STATIC);
2666       break;
2667     }
2668     case SPELLFIX_COL_TOP: {
2669       sqlite3_result_int(ctx, pCur->iTop);
2670       break;
2671     }
2672     case SPELLFIX_COL_SCOPE: {
2673       sqlite3_result_int(ctx, pCur->iScope);
2674       break;
2675     }
2676     case SPELLFIX_COL_SRCHCNT: {
2677       sqlite3_result_int(ctx, pCur->nSearch);
2678       break;
2679     }
2680     default: {
2681       sqlite3_result_null(ctx);
2682       break;
2683     }
2684   }
2685   return SQLITE_OK;
2686 }
2687 
2688 /*
2689 ** The rowid.
2690 */
2691 static int spellfix1Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
2692   spellfix1_cursor *pCur = (spellfix1_cursor*)cur;
2693   if( pCur->pFullScan ){
2694     *pRowid = sqlite3_column_int64(pCur->pFullScan, 4);
2695   }else{
2696     *pRowid = pCur->a[pCur->iRow].iRowid;
2697   }
2698   return SQLITE_OK;
2699 }
2700 
2701 /*
2702 ** This function is called by the xUpdate() method. It returns a string
2703 ** containing the conflict mode that xUpdate() should use for the current
2704 ** operation. One of: "ROLLBACK", "IGNORE", "ABORT" or "REPLACE".
2705 */
2706 static const char *spellfix1GetConflict(sqlite3 *db){
2707   static const char *azConflict[] = {
2708     /* Note: Instead of "FAIL" - "ABORT". */
2709     "ROLLBACK", "IGNORE", "ABORT", "ABORT", "REPLACE"
2710   };
2711   int eConflict = sqlite3_vtab_on_conflict(db);
2712 
2713   assert( eConflict==SQLITE_ROLLBACK || eConflict==SQLITE_IGNORE
2714        || eConflict==SQLITE_FAIL || eConflict==SQLITE_ABORT
2715        || eConflict==SQLITE_REPLACE
2716   );
2717   assert( SQLITE_ROLLBACK==1 );
2718   assert( SQLITE_IGNORE==2 );
2719   assert( SQLITE_FAIL==3 );
2720   assert( SQLITE_ABORT==4 );
2721   assert( SQLITE_REPLACE==5 );
2722 
2723   return azConflict[eConflict-1];
2724 }
2725 
2726 /*
2727 ** The xUpdate() method.
2728 */
2729 static int spellfix1Update(
2730   sqlite3_vtab *pVTab,
2731   int argc,
2732   sqlite3_value **argv,
2733   sqlite_int64 *pRowid
2734 ){
2735   int rc = SQLITE_OK;
2736   sqlite3_int64 rowid, newRowid;
2737   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2738   sqlite3 *db = p->db;
2739 
2740   if( argc==1 ){
2741     /* A delete operation on the rowid given by argv[0] */
2742     rowid = *pRowid = sqlite3_value_int64(argv[0]);
2743     spellfix1DbExec(&rc, db, "DELETE FROM \"%w\".\"%w_vocab\" "
2744                            " WHERE id=%lld",
2745                   p->zDbName, p->zTableName, rowid);
2746   }else{
2747     const unsigned char *zWord = sqlite3_value_text(argv[SPELLFIX_COL_WORD+2]);
2748     int nWord = sqlite3_value_bytes(argv[SPELLFIX_COL_WORD+2]);
2749     int iLang = sqlite3_value_int(argv[SPELLFIX_COL_LANGID+2]);
2750     int iRank = sqlite3_value_int(argv[SPELLFIX_COL_RANK+2]);
2751     const unsigned char *zSoundslike =
2752            sqlite3_value_text(argv[SPELLFIX_COL_SOUNDSLIKE+2]);
2753     int nSoundslike = sqlite3_value_bytes(argv[SPELLFIX_COL_SOUNDSLIKE+2]);
2754     char *zK1, *zK2;
2755     int i;
2756     char c;
2757     const char *zConflict = spellfix1GetConflict(db);
2758 
2759     if( zWord==0 ){
2760       /* Inserts of the form:  INSERT INTO table(command) VALUES('xyzzy');
2761       ** cause zWord to be NULL, so we look at the "command" column to see
2762       ** what special actions to take */
2763       const char *zCmd =
2764          (const char*)sqlite3_value_text(argv[SPELLFIX_COL_COMMAND+2]);
2765       if( zCmd==0 ){
2766         pVTab->zErrMsg = sqlite3_mprintf("NOT NULL constraint failed: %s.word",
2767                                          p->zTableName);
2768         return SQLITE_CONSTRAINT_NOTNULL;
2769       }
2770       if( strcmp(zCmd,"reset")==0 ){
2771         /* Reset the  edit cost table (if there is one). */
2772         editDist3ConfigDelete(p->pConfig3);
2773         p->pConfig3 = 0;
2774         return SQLITE_OK;
2775       }
2776       if( strncmp(zCmd,"edit_cost_table=",16)==0 ){
2777         editDist3ConfigDelete(p->pConfig3);
2778         p->pConfig3 = 0;
2779         sqlite3_free(p->zCostTable);
2780         p->zCostTable = spellfix1Dequote(zCmd+16);
2781         if( p->zCostTable==0 ) return SQLITE_NOMEM;
2782         if( p->zCostTable[0]==0 || sqlite3_stricmp(p->zCostTable,"null")==0 ){
2783           sqlite3_free(p->zCostTable);
2784           p->zCostTable = 0;
2785         }
2786         return SQLITE_OK;
2787       }
2788       pVTab->zErrMsg = sqlite3_mprintf("unknown value for %s.command: \"%w\"",
2789                                        p->zTableName, zCmd);
2790       return SQLITE_ERROR;
2791     }
2792     if( iRank<1 ) iRank = 1;
2793     if( zSoundslike ){
2794       zK1 = (char*)transliterate(zSoundslike, nSoundslike);
2795     }else{
2796       zK1 = (char*)transliterate(zWord, nWord);
2797     }
2798     if( zK1==0 ) return SQLITE_NOMEM;
2799     for(i=0; (c = zK1[i])!=0; i++){
2800        if( c>='A' && c<='Z' ) zK1[i] += 'a' - 'A';
2801     }
2802     zK2 = (char*)phoneticHash((const unsigned char*)zK1, i);
2803     if( zK2==0 ){
2804       sqlite3_free(zK1);
2805       return SQLITE_NOMEM;
2806     }
2807     if( sqlite3_value_type(argv[0])==SQLITE_NULL ){
2808       if( sqlite3_value_type(argv[1])==SQLITE_NULL ){
2809         spellfix1DbExec(&rc, db,
2810                "INSERT INTO \"%w\".\"%w_vocab\"(rank,langid,word,k1,k2) "
2811                "VALUES(%d,%d,%Q,%Q,%Q)",
2812                p->zDbName, p->zTableName,
2813                iRank, iLang, zWord, zK1, zK2
2814         );
2815       }else{
2816         newRowid = sqlite3_value_int64(argv[1]);
2817         spellfix1DbExec(&rc, db,
2818             "INSERT OR %s INTO \"%w\".\"%w_vocab\"(id,rank,langid,word,k1,k2) "
2819             "VALUES(%lld,%d,%d,%Q,%Q,%Q)",
2820             zConflict, p->zDbName, p->zTableName,
2821             newRowid, iRank, iLang, zWord, zK1, zK2
2822         );
2823       }
2824       *pRowid = sqlite3_last_insert_rowid(db);
2825     }else{
2826       rowid = sqlite3_value_int64(argv[0]);
2827       newRowid = *pRowid = sqlite3_value_int64(argv[1]);
2828       spellfix1DbExec(&rc, db,
2829              "UPDATE OR %s \"%w\".\"%w_vocab\" SET id=%lld, rank=%d, langid=%d,"
2830              " word=%Q, k1=%Q, k2=%Q WHERE id=%lld",
2831              zConflict, p->zDbName, p->zTableName, newRowid, iRank, iLang,
2832              zWord, zK1, zK2, rowid
2833       );
2834     }
2835     sqlite3_free(zK1);
2836     sqlite3_free(zK2);
2837   }
2838   return rc;
2839 }
2840 
2841 /*
2842 ** Rename the spellfix1 table.
2843 */
2844 static int spellfix1Rename(sqlite3_vtab *pVTab, const char *zNew){
2845   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2846   sqlite3 *db = p->db;
2847   int rc = SQLITE_OK;
2848   char *zNewName = sqlite3_mprintf("%s", zNew);
2849   if( zNewName==0 ){
2850     return SQLITE_NOMEM;
2851   }
2852   spellfix1DbExec(&rc, db,
2853      "ALTER TABLE \"%w\".\"%w_vocab\" RENAME TO \"%w_vocab\"",
2854      p->zDbName, p->zTableName, zNewName
2855   );
2856   if( rc==SQLITE_OK ){
2857     sqlite3_free(p->zTableName);
2858     p->zTableName = zNewName;
2859   }else{
2860     sqlite3_free(zNewName);
2861   }
2862   return rc;
2863 }
2864 
2865 
2866 /*
2867 ** A virtual table module that provides fuzzy search.
2868 */
2869 static sqlite3_module spellfix1Module = {
2870   0,                       /* iVersion */
2871   spellfix1Create,         /* xCreate - handle CREATE VIRTUAL TABLE */
2872   spellfix1Connect,        /* xConnect - reconnected to an existing table */
2873   spellfix1BestIndex,      /* xBestIndex - figure out how to do a query */
2874   spellfix1Disconnect,     /* xDisconnect - close a connection */
2875   spellfix1Destroy,        /* xDestroy - handle DROP TABLE */
2876   spellfix1Open,           /* xOpen - open a cursor */
2877   spellfix1Close,          /* xClose - close a cursor */
2878   spellfix1Filter,         /* xFilter - configure scan constraints */
2879   spellfix1Next,           /* xNext - advance a cursor */
2880   spellfix1Eof,            /* xEof - check for end of scan */
2881   spellfix1Column,         /* xColumn - read data */
2882   spellfix1Rowid,          /* xRowid - read data */
2883   spellfix1Update,         /* xUpdate */
2884   0,                       /* xBegin */
2885   0,                       /* xSync */
2886   0,                       /* xCommit */
2887   0,                       /* xRollback */
2888   0,                       /* xFindMethod */
2889   spellfix1Rename,         /* xRename */
2890 };
2891 
2892 /*
2893 ** Register the various functions and the virtual table.
2894 */
2895 static int spellfix1Register(sqlite3 *db){
2896   int rc = SQLITE_OK;
2897   int i;
2898   rc = sqlite3_create_function(db, "spellfix1_translit", 1, SQLITE_UTF8, 0,
2899                                   transliterateSqlFunc, 0, 0);
2900   if( rc==SQLITE_OK ){
2901     rc = sqlite3_create_function(db, "spellfix1_editdist", 2, SQLITE_UTF8, 0,
2902                                   editdistSqlFunc, 0, 0);
2903   }
2904   if( rc==SQLITE_OK ){
2905     rc = sqlite3_create_function(db, "spellfix1_phonehash", 1, SQLITE_UTF8, 0,
2906                                   phoneticHashSqlFunc, 0, 0);
2907   }
2908   if( rc==SQLITE_OK ){
2909     rc = sqlite3_create_function(db, "spellfix1_scriptcode", 1, SQLITE_UTF8, 0,
2910                                   scriptCodeSqlFunc, 0, 0);
2911   }
2912   if( rc==SQLITE_OK ){
2913     rc = sqlite3_create_module(db, "spellfix1", &spellfix1Module, 0);
2914   }
2915   if( rc==SQLITE_OK ){
2916     rc = editDist3Install(db);
2917   }
2918 
2919   /* Verify sanity of the translit[] table */
2920   for(i=0; i<sizeof(translit)/sizeof(translit[0])-1; i++){
2921     assert( translit[i].cFrom<translit[i+1].cFrom );
2922   }
2923 
2924   return rc;
2925 }
2926 
2927 #endif /* SQLITE_OMIT_VIRTUALTABLE */
2928 
2929 /*
2930 ** Extension load function.
2931 */
2932 #ifdef _WIN32
2933 __declspec(dllexport)
2934 #endif
2935 int sqlite3_spellfix_init(
2936   sqlite3 *db,
2937   char **pzErrMsg,
2938   const sqlite3_api_routines *pApi
2939 ){
2940   SQLITE_EXTENSION_INIT2(pApi);
2941 #ifndef SQLITE_OMIT_VIRTUALTABLE
2942   return spellfix1Register(db);
2943 #endif
2944   return SQLITE_OK;
2945 }
2946