xref: /sqlite-3.40.0/ext/misc/spellfix.c (revision a3bc8425)
1 /*
2 ** 2012 April 10
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 **
13 ** This module implements the spellfix1 VIRTUAL TABLE that can be used
14 ** to search a large vocabulary for close matches.  See separate
15 ** documentation (http://www.sqlite.org/spellfix1.html) for details.
16 */
17 #include "sqlite3ext.h"
18 SQLITE_EXTENSION_INIT1
19 
20 #ifndef SQLITE_AMALGAMATION
21 # include <string.h>
22 # include <stdio.h>
23 # include <stdlib.h>
24 # include <assert.h>
25 # define ALWAYS(X)  1
26 # define NEVER(X)   0
27   typedef unsigned char u8;
28   typedef unsigned short u16;
29 #endif
30 #include <ctype.h>
31 
32 #ifndef SQLITE_OMIT_VIRTUALTABLE
33 
34 /*
35 ** Character classes for ASCII characters:
36 **
37 **   0   ''        Silent letters:   H W
38 **   1   'A'       Any vowel:   A E I O U (Y)
39 **   2   'B'       A bilabeal stop or fricative:  B F P V W
40 **   3   'C'       Other fricatives or back stops:  C G J K Q S X Z
41 **   4   'D'       Alveolar stops:  D T
42 **   5   'H'       Letter H at the beginning of a word
43 **   6   'L'       Glide:  L
44 **   7   'R'       Semivowel:  R
45 **   8   'M'       Nasals:  M N
46 **   9   'Y'       Letter Y at the beginning of a word.
47 **   10  '9'       Digits: 0 1 2 3 4 5 6 7 8 9
48 **   11  ' '       White space
49 **   12  '?'       Other.
50 */
51 #define CCLASS_SILENT         0
52 #define CCLASS_VOWEL          1
53 #define CCLASS_B              2
54 #define CCLASS_C              3
55 #define CCLASS_D              4
56 #define CCLASS_H              5
57 #define CCLASS_L              6
58 #define CCLASS_R              7
59 #define CCLASS_M              8
60 #define CCLASS_Y              9
61 #define CCLASS_DIGIT         10
62 #define CCLASS_SPACE         11
63 #define CCLASS_OTHER         12
64 
65 /*
66 ** The following table gives the character class for non-initial ASCII
67 ** characters.
68 */
69 static const unsigned char midClass[] = {
70  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
71  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
72  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
73  /*   */ CCLASS_SPACE,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
74  /*   */ CCLASS_SPACE,    /*   */ CCLASS_SPACE,   /*   */ CCLASS_OTHER,
75  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
76  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
77  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
78  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
79  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
80  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_SPACE,
81  /* ! */ CCLASS_OTHER,    /* " */ CCLASS_OTHER,   /* # */ CCLASS_OTHER,
82  /* $ */ CCLASS_OTHER,    /* % */ CCLASS_OTHER,   /* & */ CCLASS_OTHER,
83  /* ' */ CCLASS_SILENT,   /* ( */ CCLASS_OTHER,   /* ) */ CCLASS_OTHER,
84  /* * */ CCLASS_OTHER,    /* + */ CCLASS_OTHER,   /* , */ CCLASS_OTHER,
85  /* - */ CCLASS_OTHER,    /* . */ CCLASS_OTHER,   /* / */ CCLASS_OTHER,
86  /* 0 */ CCLASS_DIGIT,    /* 1 */ CCLASS_DIGIT,   /* 2 */ CCLASS_DIGIT,
87  /* 3 */ CCLASS_DIGIT,    /* 4 */ CCLASS_DIGIT,   /* 5 */ CCLASS_DIGIT,
88  /* 6 */ CCLASS_DIGIT,    /* 7 */ CCLASS_DIGIT,   /* 8 */ CCLASS_DIGIT,
89  /* 9 */ CCLASS_DIGIT,    /* : */ CCLASS_OTHER,   /* ; */ CCLASS_OTHER,
90  /* < */ CCLASS_OTHER,    /* = */ CCLASS_OTHER,   /* > */ CCLASS_OTHER,
91  /* ? */ CCLASS_OTHER,    /* @ */ CCLASS_OTHER,   /* A */ CCLASS_VOWEL,
92  /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D,
93  /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C,
94  /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C,
95  /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M,
96  /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B,
97  /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C,
98  /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B,
99  /* W */ CCLASS_B,        /* X */ CCLASS_C,       /* Y */ CCLASS_VOWEL,
100  /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER,
101  /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER,
102  /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B,
103  /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL,
104  /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT,
105  /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C,
106  /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M,
107  /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C,
108  /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D,
109  /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_B,
110  /* x */ CCLASS_C,        /* y */ CCLASS_VOWEL,   /* z */ CCLASS_C,
111  /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER,
112  /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,
113 };
114 /*
115 ** This tables gives the character class for ASCII characters that form the
116 ** initial character of a word.  The only difference from midClass is with
117 ** the letters H, W, and Y.
118 */
119 static const unsigned char initClass[] = {
120  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
121  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
122  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
123  /*   */ CCLASS_SPACE,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
124  /*   */ CCLASS_SPACE,    /*   */ CCLASS_SPACE,   /*   */ CCLASS_OTHER,
125  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
126  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
127  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
128  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
129  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
130  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_SPACE,
131  /* ! */ CCLASS_OTHER,    /* " */ CCLASS_OTHER,   /* # */ CCLASS_OTHER,
132  /* $ */ CCLASS_OTHER,    /* % */ CCLASS_OTHER,   /* & */ CCLASS_OTHER,
133  /* ' */ CCLASS_OTHER,    /* ( */ CCLASS_OTHER,   /* ) */ CCLASS_OTHER,
134  /* * */ CCLASS_OTHER,    /* + */ CCLASS_OTHER,   /* , */ CCLASS_OTHER,
135  /* - */ CCLASS_OTHER,    /* . */ CCLASS_OTHER,   /* / */ CCLASS_OTHER,
136  /* 0 */ CCLASS_DIGIT,    /* 1 */ CCLASS_DIGIT,   /* 2 */ CCLASS_DIGIT,
137  /* 3 */ CCLASS_DIGIT,    /* 4 */ CCLASS_DIGIT,   /* 5 */ CCLASS_DIGIT,
138  /* 6 */ CCLASS_DIGIT,    /* 7 */ CCLASS_DIGIT,   /* 8 */ CCLASS_DIGIT,
139  /* 9 */ CCLASS_DIGIT,    /* : */ CCLASS_OTHER,   /* ; */ CCLASS_OTHER,
140  /* < */ CCLASS_OTHER,    /* = */ CCLASS_OTHER,   /* > */ CCLASS_OTHER,
141  /* ? */ CCLASS_OTHER,    /* @ */ CCLASS_OTHER,   /* A */ CCLASS_VOWEL,
142  /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D,
143  /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C,
144  /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C,
145  /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M,
146  /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B,
147  /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C,
148  /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B,
149  /* W */ CCLASS_B,        /* X */ CCLASS_C,       /* Y */ CCLASS_Y,
150  /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER,
151  /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER,
152  /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B,
153  /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL,
154  /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT,
155  /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C,
156  /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M,
157  /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C,
158  /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D,
159  /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_B,
160  /* x */ CCLASS_C,        /* y */ CCLASS_Y,       /* z */ CCLASS_C,
161  /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER,
162  /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,
163 };
164 
165 /*
166 ** Mapping from the character class number (0-13) to a symbol for each
167 ** character class.  Note that initClass[] can be used to map the class
168 ** symbol back into the class number.
169 */
170 static const unsigned char className[] = ".ABCDHLRMY9 ?";
171 
172 /*
173 ** Generate a "phonetic hash" from a string of ASCII characters
174 ** in zIn[0..nIn-1].
175 **
176 **   * Map characters by character class as defined above.
177 **   * Omit double-letters
178 **   * Omit vowels beside R and L
179 **   * Omit T when followed by CH
180 **   * Omit W when followed by R
181 **   * Omit D when followed by J or G
182 **   * Omit K in KN or G in GN at the beginning of a word
183 **
184 ** Space to hold the result is obtained from sqlite3_malloc()
185 **
186 ** Return NULL if memory allocation fails.
187 */
188 static unsigned char *phoneticHash(const unsigned char *zIn, int nIn){
189   unsigned char *zOut = sqlite3_malloc64( nIn + 1 );
190   int i;
191   int nOut = 0;
192   char cPrev = 0x77;
193   char cPrevX = 0x77;
194   const unsigned char *aClass = initClass;
195 
196   if( zOut==0 ) return 0;
197   if( nIn>2 ){
198     switch( zIn[0] ){
199       case 'g':
200       case 'k': {
201         if( zIn[1]=='n' ){ zIn++; nIn--; }
202         break;
203       }
204     }
205   }
206   for(i=0; i<nIn; i++){
207     unsigned char c = zIn[i];
208     if( i+1<nIn ){
209       if( c=='w' && zIn[i+1]=='r' ) continue;
210       if( c=='d' && (zIn[i+1]=='j' || zIn[i+1]=='g') ) continue;
211       if( i+2<nIn ){
212         if( c=='t' && zIn[i+1]=='c' && zIn[i+2]=='h' ) continue;
213       }
214     }
215     c = aClass[c&0x7f];
216     if( c==CCLASS_SPACE ) continue;
217     if( c==CCLASS_OTHER && cPrev!=CCLASS_DIGIT ) continue;
218     aClass = midClass;
219     if( c==CCLASS_VOWEL && (cPrevX==CCLASS_R || cPrevX==CCLASS_L) ){
220        continue; /* No vowels beside L or R */
221     }
222     if( (c==CCLASS_R || c==CCLASS_L) && cPrevX==CCLASS_VOWEL ){
223        nOut--;   /* No vowels beside L or R */
224     }
225     cPrev = c;
226     if( c==CCLASS_SILENT ) continue;
227     cPrevX = c;
228     c = className[c];
229     assert( nOut>=0 );
230     if( nOut==0 || c!=zOut[nOut-1] ) zOut[nOut++] = c;
231   }
232   zOut[nOut] = 0;
233   return zOut;
234 }
235 
236 /*
237 ** This is an SQL function wrapper around phoneticHash().  See
238 ** the description of phoneticHash() for additional information.
239 */
240 static void phoneticHashSqlFunc(
241   sqlite3_context *context,
242   int argc,
243   sqlite3_value **argv
244 ){
245   const unsigned char *zIn;
246   unsigned char *zOut;
247 
248   zIn = sqlite3_value_text(argv[0]);
249   if( zIn==0 ) return;
250   zOut = phoneticHash(zIn, sqlite3_value_bytes(argv[0]));
251   if( zOut==0 ){
252     sqlite3_result_error_nomem(context);
253   }else{
254     sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free);
255   }
256 }
257 
258 /*
259 ** Return the character class number for a character given its
260 ** context.
261 */
262 static char characterClass(char cPrev, char c){
263   return cPrev==0 ? initClass[c&0x7f] : midClass[c&0x7f];
264 }
265 
266 /*
267 ** Return the cost of inserting or deleting character c immediately
268 ** following character cPrev.  If cPrev==0, that means c is the first
269 ** character of the word.
270 */
271 static int insertOrDeleteCost(char cPrev, char c, char cNext){
272   char classC = characterClass(cPrev, c);
273   char classCprev;
274 
275   if( classC==CCLASS_SILENT ){
276     /* Insert or delete "silent" characters such as H or W */
277     return 1;
278   }
279   if( cPrev==c ){
280     /* Repeated characters, or miss a repeat */
281     return 10;
282   }
283   if( classC==CCLASS_VOWEL && (cPrev=='r' || cNext=='r') ){
284     return 20;  /* Insert a vowel before or after 'r' */
285   }
286   classCprev = characterClass(cPrev, cPrev);
287   if( classC==classCprev ){
288     if( classC==CCLASS_VOWEL ){
289       /* Remove or add a new vowel to a vowel cluster */
290       return 15;
291     }else{
292       /* Remove or add a consonant not in the same class */
293       return 50;
294     }
295   }
296 
297   /* any other character insertion or deletion */
298   return 100;
299 }
300 
301 /*
302 ** Divide the insertion cost by this factor when appending to the
303 ** end of the word.
304 */
305 #define FINAL_INS_COST_DIV  4
306 
307 /*
308 ** Return the cost of substituting cTo in place of cFrom assuming
309 ** the previous character is cPrev.  If cPrev==0 then cTo is the first
310 ** character of the word.
311 */
312 static int substituteCost(char cPrev, char cFrom, char cTo){
313   char classFrom, classTo;
314   if( cFrom==cTo ){
315     /* Exact match */
316     return 0;
317   }
318   if( cFrom==(cTo^0x20) && ((cTo>='A' && cTo<='Z') || (cTo>='a' && cTo<='z')) ){
319     /* differ only in case */
320     return 0;
321   }
322   classFrom = characterClass(cPrev, cFrom);
323   classTo = characterClass(cPrev, cTo);
324   if( classFrom==classTo ){
325     /* Same character class */
326     return 40;
327   }
328   if( classFrom>=CCLASS_B && classFrom<=CCLASS_Y
329       && classTo>=CCLASS_B && classTo<=CCLASS_Y ){
330     /* Convert from one consonant to another, but in a different class */
331     return 75;
332   }
333   /* Any other subsitution */
334   return 100;
335 }
336 
337 /*
338 ** Given two strings zA and zB which are pure ASCII, return the cost
339 ** of transforming zA into zB.  If zA ends with '*' assume that it is
340 ** a prefix of zB and give only minimal penalty for extra characters
341 ** on the end of zB.
342 **
343 ** Smaller numbers mean a closer match.
344 **
345 ** Negative values indicate an error:
346 **    -1  One of the inputs is NULL
347 **    -2  Non-ASCII characters on input
348 **    -3  Unable to allocate memory
349 **
350 ** If pnMatch is not NULL, then *pnMatch is set to the number of bytes
351 ** of zB that matched the pattern in zA. If zA does not end with a '*',
352 ** then this value is always the number of bytes in zB (i.e. strlen(zB)).
353 ** If zA does end in a '*', then it is the number of bytes in the prefix
354 ** of zB that was deemed to match zA.
355 */
356 static int editdist1(const char *zA, const char *zB, int *pnMatch){
357   int nA, nB;            /* Number of characters in zA[] and zB[] */
358   int xA, xB;            /* Loop counters for zA[] and zB[] */
359   char cA = 0, cB;       /* Current character of zA and zB */
360   char cAprev, cBprev;   /* Previous character of zA and zB */
361   char cAnext, cBnext;   /* Next character in zA and zB */
362   int d;                 /* North-west cost value */
363   int dc = 0;            /* North-west character value */
364   int res;               /* Final result */
365   int *m;                /* The cost matrix */
366   char *cx;              /* Corresponding character values */
367   int *toFree = 0;       /* Malloced space */
368   int nMatch = 0;
369   int mStack[60+15];     /* Stack space to use if not too much is needed */
370 
371   /* Early out if either input is NULL */
372   if( zA==0 || zB==0 ) return -1;
373 
374   /* Skip any common prefix */
375   while( zA[0] && zA[0]==zB[0] ){ dc = zA[0]; zA++; zB++; nMatch++; }
376   if( pnMatch ) *pnMatch = nMatch;
377   if( zA[0]==0 && zB[0]==0 ) return 0;
378 
379 #if 0
380   printf("A=\"%s\" B=\"%s\" dc=%c\n", zA, zB, dc?dc:' ');
381 #endif
382 
383   /* Verify input strings and measure their lengths */
384   for(nA=0; zA[nA]; nA++){
385     if( zA[nA]&0x80 ) return -2;
386   }
387   for(nB=0; zB[nB]; nB++){
388     if( zB[nB]&0x80 ) return -2;
389   }
390 
391   /* Special processing if either string is empty */
392   if( nA==0 ){
393     cBprev = (char)dc;
394     for(xB=res=0; (cB = zB[xB])!=0; xB++){
395       res += insertOrDeleteCost(cBprev, cB, zB[xB+1])/FINAL_INS_COST_DIV;
396       cBprev = cB;
397     }
398     return res;
399   }
400   if( nB==0 ){
401     cAprev = (char)dc;
402     for(xA=res=0; (cA = zA[xA])!=0; xA++){
403       res += insertOrDeleteCost(cAprev, cA, zA[xA+1]);
404       cAprev = cA;
405     }
406     return res;
407   }
408 
409   /* A is a prefix of B */
410   if( zA[0]=='*' && zA[1]==0 ) return 0;
411 
412   /* Allocate and initialize the Wagner matrix */
413   if( nB<(sizeof(mStack)*4)/(sizeof(mStack[0])*5) ){
414     m = mStack;
415   }else{
416     m = toFree = sqlite3_malloc64( (nB+1)*5*sizeof(m[0])/4 );
417     if( m==0 ) return -3;
418   }
419   cx = (char*)&m[nB+1];
420 
421   /* Compute the Wagner edit distance */
422   m[0] = 0;
423   cx[0] = (char)dc;
424   cBprev = (char)dc;
425   for(xB=1; xB<=nB; xB++){
426     cBnext = zB[xB];
427     cB = zB[xB-1];
428     cx[xB] = cB;
429     m[xB] = m[xB-1] + insertOrDeleteCost(cBprev, cB, cBnext);
430     cBprev = cB;
431   }
432   cAprev = (char)dc;
433   for(xA=1; xA<=nA; xA++){
434     int lastA = (xA==nA);
435     cA = zA[xA-1];
436     cAnext = zA[xA];
437     if( cA=='*' && lastA ) break;
438     d = m[0];
439     dc = cx[0];
440     m[0] = d + insertOrDeleteCost(cAprev, cA, cAnext);
441     cBprev = 0;
442     for(xB=1; xB<=nB; xB++){
443       int totalCost, insCost, delCost, subCost, ncx;
444       cB = zB[xB-1];
445       cBnext = zB[xB];
446 
447       /* Cost to insert cB */
448       insCost = insertOrDeleteCost(cx[xB-1], cB, cBnext);
449       if( lastA ) insCost /= FINAL_INS_COST_DIV;
450 
451       /* Cost to delete cA */
452       delCost = insertOrDeleteCost(cx[xB], cA, cBnext);
453 
454       /* Cost to substitute cA->cB */
455       subCost = substituteCost(cx[xB-1], cA, cB);
456 
457       /* Best cost */
458       totalCost = insCost + m[xB-1];
459       ncx = cB;
460       if( (delCost + m[xB])<totalCost ){
461         totalCost = delCost + m[xB];
462         ncx = cA;
463       }
464       if( (subCost + d)<totalCost ){
465         totalCost = subCost + d;
466       }
467 
468 #if 0
469       printf("%d,%d d=%4d u=%4d r=%4d dc=%c cA=%c cB=%c"
470              " ins=%4d del=%4d sub=%4d t=%4d ncx=%c\n",
471              xA, xB, d, m[xB], m[xB-1], dc?dc:' ', cA, cB,
472              insCost, delCost, subCost, totalCost, ncx?ncx:' ');
473 #endif
474 
475       /* Update the matrix */
476       d = m[xB];
477       dc = cx[xB];
478       m[xB] = totalCost;
479       cx[xB] = (char)ncx;
480       cBprev = cB;
481     }
482     cAprev = cA;
483   }
484 
485   /* Free the wagner matrix and return the result */
486   if( cA=='*' ){
487     res = m[1];
488     for(xB=1; xB<=nB; xB++){
489       if( m[xB]<res ){
490         res = m[xB];
491         if( pnMatch ) *pnMatch = xB+nMatch;
492       }
493     }
494   }else{
495     res = m[nB];
496     /* In the current implementation, pnMatch is always NULL if zA does
497     ** not end in "*" */
498     assert( pnMatch==0 );
499   }
500   sqlite3_free(toFree);
501   return res;
502 }
503 
504 /*
505 ** Function:    editdist(A,B)
506 **
507 ** Return the cost of transforming string A into string B.  Both strings
508 ** must be pure ASCII text.  If A ends with '*' then it is assumed to be
509 ** a prefix of B and extra characters on the end of B have minimal additional
510 ** cost.
511 */
512 static void editdistSqlFunc(
513   sqlite3_context *context,
514   int argc,
515   sqlite3_value **argv
516 ){
517   int res = editdist1(
518                     (const char*)sqlite3_value_text(argv[0]),
519                     (const char*)sqlite3_value_text(argv[1]),
520                     0);
521   if( res<0 ){
522     if( res==(-3) ){
523       sqlite3_result_error_nomem(context);
524     }else if( res==(-2) ){
525       sqlite3_result_error(context, "non-ASCII input to editdist()", -1);
526     }else{
527       sqlite3_result_error(context, "NULL input to editdist()", -1);
528     }
529   }else{
530     sqlite3_result_int(context, res);
531   }
532 }
533 
534 /* End of the fixed-cost edit distance implementation
535 ******************************************************************************
536 *****************************************************************************
537 ** Begin: Configurable cost unicode edit distance routines
538 */
539 /* Forward declaration of structures */
540 typedef struct EditDist3Cost EditDist3Cost;
541 typedef struct EditDist3Config EditDist3Config;
542 typedef struct EditDist3Point EditDist3Point;
543 typedef struct EditDist3From EditDist3From;
544 typedef struct EditDist3FromString EditDist3FromString;
545 typedef struct EditDist3To EditDist3To;
546 typedef struct EditDist3ToString EditDist3ToString;
547 typedef struct EditDist3Lang EditDist3Lang;
548 
549 
550 /*
551 ** An entry in the edit cost table
552 */
553 struct EditDist3Cost {
554   EditDist3Cost *pNext;     /* Next cost element */
555   u8 nFrom;                 /* Number of bytes in aFrom */
556   u8 nTo;                   /* Number of bytes in aTo */
557   u16 iCost;                /* Cost of this transformation */
558   char a[4]    ;            /* FROM string followed by TO string */
559   /* Additional TO and FROM string bytes appended as necessary */
560 };
561 
562 /*
563 ** Edit costs for a particular language ID
564 */
565 struct EditDist3Lang {
566   int iLang;             /* Language ID */
567   int iInsCost;          /* Default insertion cost */
568   int iDelCost;          /* Default deletion cost */
569   int iSubCost;          /* Default substitution cost */
570   EditDist3Cost *pCost;  /* Costs */
571 };
572 
573 
574 /*
575 ** The default EditDist3Lang object, with default costs.
576 */
577 static const EditDist3Lang editDist3Lang = { 0, 100, 100, 150, 0 };
578 
579 /*
580 ** Complete configuration
581 */
582 struct EditDist3Config {
583   int nLang;             /* Number of language IDs.  Size of a[] */
584   EditDist3Lang *a;      /* One for each distinct language ID */
585 };
586 
587 /*
588 ** Extra information about each character in the FROM string.
589 */
590 struct EditDist3From {
591   int nSubst;              /* Number of substitution cost entries */
592   int nDel;                /* Number of deletion cost entries */
593   int nByte;               /* Number of bytes in this character */
594   EditDist3Cost **apSubst; /* Array of substitution costs for this element */
595   EditDist3Cost **apDel;   /* Array of deletion cost entries */
596 };
597 
598 /*
599 ** A precompiled FROM string.
600 *
601 ** In the common case we expect the FROM string to be reused multiple times.
602 ** In other words, the common case will be to measure the edit distance
603 ** from a single origin string to multiple target strings.
604 */
605 struct EditDist3FromString {
606   char *z;                 /* The complete text of the FROM string */
607   int n;                   /* Number of characters in the FROM string */
608   int isPrefix;            /* True if ends with '*' character */
609   EditDist3From *a;        /* Extra info about each char of the FROM string */
610 };
611 
612 /*
613 ** Extra information about each character in the TO string.
614 */
615 struct EditDist3To {
616   int nIns;                /* Number of insertion cost entries */
617   int nByte;               /* Number of bytes in this character */
618   EditDist3Cost **apIns;   /* Array of deletion cost entries */
619 };
620 
621 /*
622 ** A precompiled FROM string
623 */
624 struct EditDist3ToString {
625   char *z;                 /* The complete text of the TO string */
626   int n;                   /* Number of characters in the TO string */
627   EditDist3To *a;          /* Extra info about each char of the TO string */
628 };
629 
630 /*
631 ** Clear or delete an instance of the object that records all edit-distance
632 ** weights.
633 */
634 static void editDist3ConfigClear(EditDist3Config *p){
635   int i;
636   if( p==0 ) return;
637   for(i=0; i<p->nLang; i++){
638     EditDist3Cost *pCost, *pNext;
639     pCost = p->a[i].pCost;
640     while( pCost ){
641       pNext = pCost->pNext;
642       sqlite3_free(pCost);
643       pCost = pNext;
644     }
645   }
646   sqlite3_free(p->a);
647   memset(p, 0, sizeof(*p));
648 }
649 static void editDist3ConfigDelete(void *pIn){
650   EditDist3Config *p = (EditDist3Config*)pIn;
651   editDist3ConfigClear(p);
652   sqlite3_free(p);
653 }
654 
655 /*
656 ** Load all edit-distance weights from a table.
657 */
658 static int editDist3ConfigLoad(
659   EditDist3Config *p,      /* The edit distance configuration to load */
660   sqlite3 *db,            /* Load from this database */
661   const char *zTable      /* Name of the table from which to load */
662 ){
663   sqlite3_stmt *pStmt;
664   int rc, rc2;
665   char *zSql;
666   int iLangPrev = -9999;
667   EditDist3Lang *pLang = 0;
668 
669   zSql = sqlite3_mprintf("SELECT iLang, cFrom, cTo, iCost"
670                          " FROM \"%w\" WHERE iLang>=0 ORDER BY iLang", zTable);
671   if( zSql==0 ) return SQLITE_NOMEM;
672   rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
673   sqlite3_free(zSql);
674   if( rc ) return rc;
675   editDist3ConfigClear(p);
676   while( sqlite3_step(pStmt)==SQLITE_ROW ){
677     int iLang = sqlite3_column_int(pStmt, 0);
678     const char *zFrom = (const char*)sqlite3_column_text(pStmt, 1);
679     int nFrom = zFrom ? sqlite3_column_bytes(pStmt, 1) : 0;
680     const char *zTo = (const char*)sqlite3_column_text(pStmt, 2);
681     int nTo = zTo ? sqlite3_column_bytes(pStmt, 2) : 0;
682     int iCost = sqlite3_column_int(pStmt, 3);
683 
684     assert( zFrom!=0 || nFrom==0 );
685     assert( zTo!=0 || nTo==0 );
686     if( nFrom>100 || nTo>100 ) continue;
687     if( iCost<0 ) continue;
688     if( pLang==0 || iLang!=iLangPrev ){
689       EditDist3Lang *pNew;
690       pNew = sqlite3_realloc64(p->a, (p->nLang+1)*sizeof(p->a[0]));
691       if( pNew==0 ){ rc = SQLITE_NOMEM; break; }
692       p->a = pNew;
693       pLang = &p->a[p->nLang];
694       p->nLang++;
695       pLang->iLang = iLang;
696       pLang->iInsCost = 100;
697       pLang->iDelCost = 100;
698       pLang->iSubCost = 150;
699       pLang->pCost = 0;
700       iLangPrev = iLang;
701     }
702     if( nFrom==1 && zFrom[0]=='?' && nTo==0 ){
703       pLang->iDelCost = iCost;
704     }else if( nFrom==0 && nTo==1 && zTo[0]=='?' ){
705       pLang->iInsCost = iCost;
706     }else if( nFrom==1 && nTo==1 && zFrom[0]=='?' && zTo[0]=='?' ){
707       pLang->iSubCost = iCost;
708     }else{
709       EditDist3Cost *pCost;
710       int nExtra = nFrom + nTo - 4;
711       if( nExtra<0 ) nExtra = 0;
712       pCost = sqlite3_malloc64( sizeof(*pCost) + nExtra );
713       if( pCost==0 ){ rc = SQLITE_NOMEM; break; }
714       pCost->nFrom = (u8)nFrom;
715       pCost->nTo = (u8)nTo;
716       pCost->iCost = (u16)iCost;
717       memcpy(pCost->a, zFrom, nFrom);
718       memcpy(pCost->a + nFrom, zTo, nTo);
719       pCost->pNext = pLang->pCost;
720       pLang->pCost = pCost;
721     }
722   }
723   rc2 = sqlite3_finalize(pStmt);
724   if( rc==SQLITE_OK ) rc = rc2;
725   return rc;
726 }
727 
728 /*
729 ** Return the length (in bytes) of a utf-8 character.  Or return a maximum
730 ** of N.
731 */
732 static int utf8Len(unsigned char c, int N){
733   int len = 1;
734   if( c>0x7f ){
735     if( (c&0xe0)==0xc0 ){
736       len = 2;
737     }else if( (c&0xf0)==0xe0 ){
738       len = 3;
739     }else{
740       len = 4;
741     }
742   }
743   if( len>N ) len = N;
744   return len;
745 }
746 
747 /*
748 ** Return TRUE (non-zero) if the To side of the given cost matches
749 ** the given string.
750 */
751 static int matchTo(EditDist3Cost *p, const char *z, int n){
752   if( p->nTo>n ) return 0;
753   if( strncmp(p->a+p->nFrom, z, p->nTo)!=0 ) return 0;
754   return 1;
755 }
756 
757 /*
758 ** Return TRUE (non-zero) if the From side of the given cost matches
759 ** the given string.
760 */
761 static int matchFrom(EditDist3Cost *p, const char *z, int n){
762   assert( p->nFrom<=n );
763   if( strncmp(p->a, z, p->nFrom)!=0 ) return 0;
764   return 1;
765 }
766 
767 /*
768 ** Return TRUE (non-zero) of the next FROM character and the next TO
769 ** character are the same.
770 */
771 static int matchFromTo(
772   EditDist3FromString *pStr,  /* Left hand string */
773   int n1,                     /* Index of comparison character on the left */
774   const char *z2,             /* Right-handl comparison character */
775   int n2                      /* Bytes remaining in z2[] */
776 ){
777   int b1 = pStr->a[n1].nByte;
778   if( b1>n2 ) return 0;
779   if( memcmp(pStr->z+n1, z2, b1)!=0 ) return 0;
780   return 1;
781 }
782 
783 /*
784 ** Delete an EditDist3FromString objecct
785 */
786 static void editDist3FromStringDelete(EditDist3FromString *p){
787   int i;
788   if( p ){
789     for(i=0; i<p->n; i++){
790       sqlite3_free(p->a[i].apDel);
791       sqlite3_free(p->a[i].apSubst);
792     }
793     sqlite3_free(p);
794   }
795 }
796 
797 /*
798 ** Create a EditDist3FromString object.
799 */
800 static EditDist3FromString *editDist3FromStringNew(
801   const EditDist3Lang *pLang,
802   const char *z,
803   int n
804 ){
805   EditDist3FromString *pStr;
806   EditDist3Cost *p;
807   int i;
808 
809   if( z==0 ) return 0;
810   if( n<0 ) n = (int)strlen(z);
811   pStr = sqlite3_malloc64( sizeof(*pStr) + sizeof(pStr->a[0])*n + n + 1 );
812   if( pStr==0 ) return 0;
813   pStr->a = (EditDist3From*)&pStr[1];
814   memset(pStr->a, 0, sizeof(pStr->a[0])*n);
815   pStr->n = n;
816   pStr->z = (char*)&pStr->a[n];
817   memcpy(pStr->z, z, n+1);
818   if( n && z[n-1]=='*' ){
819     pStr->isPrefix = 1;
820     n--;
821     pStr->n--;
822     pStr->z[n] = 0;
823   }else{
824     pStr->isPrefix = 0;
825   }
826 
827   for(i=0; i<n; i++){
828     EditDist3From *pFrom = &pStr->a[i];
829     memset(pFrom, 0, sizeof(*pFrom));
830     pFrom->nByte = utf8Len((unsigned char)z[i], n-i);
831     for(p=pLang->pCost; p; p=p->pNext){
832       EditDist3Cost **apNew;
833       if( i+p->nFrom>n ) continue;
834       if( matchFrom(p, z+i, n-i)==0 ) continue;
835       if( p->nTo==0 ){
836         apNew = sqlite3_realloc64(pFrom->apDel,
837                                 sizeof(*apNew)*(pFrom->nDel+1));
838         if( apNew==0 ) break;
839         pFrom->apDel = apNew;
840         apNew[pFrom->nDel++] = p;
841       }else{
842         apNew = sqlite3_realloc64(pFrom->apSubst,
843                                 sizeof(*apNew)*(pFrom->nSubst+1));
844         if( apNew==0 ) break;
845         pFrom->apSubst = apNew;
846         apNew[pFrom->nSubst++] = p;
847       }
848     }
849     if( p ){
850       editDist3FromStringDelete(pStr);
851       pStr = 0;
852       break;
853     }
854   }
855   return pStr;
856 }
857 
858 /*
859 ** Update entry m[i] such that it is the minimum of its current value
860 ** and m[j]+iCost.
861 **
862 ** If the iCost is 1,000,000 or greater, then consider the cost to be
863 ** infinite and skip the update.
864 */
865 static void updateCost(
866   unsigned int *m,
867   int i,
868   int j,
869   int iCost
870 ){
871   assert( iCost>=0 );
872   if( iCost<10000 ){
873     unsigned int b = m[j] + iCost;
874     if( b<m[i] ) m[i] = b;
875   }
876 }
877 
878 /*
879 ** How much stack space (int bytes) to use for Wagner matrix in
880 ** editDist3Core().  If more space than this is required, the entire
881 ** matrix is taken from the heap.  To reduce the load on the memory
882 ** allocator, make this value as large as practical for the
883 ** architecture in use.
884 */
885 #ifndef SQLITE_SPELLFIX_STACKALLOC_SZ
886 # define SQLITE_SPELLFIX_STACKALLOC_SZ  (1024)
887 #endif
888 
889 /* Compute the edit distance between two strings.
890 **
891 ** If an error occurs, return a negative number which is the error code.
892 **
893 ** If pnMatch is not NULL, then *pnMatch is set to the number of characters
894 ** (not bytes) in z2 that matched the search pattern in *pFrom. If pFrom does
895 ** not contain the pattern for a prefix-search, then this is always the number
896 ** of characters in z2. If pFrom does contain a prefix search pattern, then
897 ** it is the number of characters in the prefix of z2 that was deemed to
898 ** match pFrom.
899 */
900 static int editDist3Core(
901   EditDist3FromString *pFrom,  /* The FROM string */
902   const char *z2,              /* The TO string */
903   int n2,                      /* Length of the TO string */
904   const EditDist3Lang *pLang,  /* Edit weights for a particular language ID */
905   int *pnMatch                 /* OUT: Characters in matched prefix */
906 ){
907   int k, n;
908   int i1, b1;
909   int i2, b2;
910   EditDist3FromString f = *pFrom;
911   EditDist3To *a2;
912   unsigned int *m;
913   unsigned int *pToFree;
914   int szRow;
915   EditDist3Cost *p;
916   int res;
917   sqlite3_uint64 nByte;
918   unsigned int stackSpace[SQLITE_SPELLFIX_STACKALLOC_SZ/sizeof(unsigned int)];
919 
920   /* allocate the Wagner matrix and the aTo[] array for the TO string */
921   n = (f.n+1)*(n2+1);
922   n = (n+1)&~1;
923   nByte = n*sizeof(m[0]) + sizeof(a2[0])*n2;
924   if( nByte<=sizeof(stackSpace) ){
925     m = stackSpace;
926     pToFree = 0;
927   }else{
928     m = pToFree = sqlite3_malloc64( nByte );
929     if( m==0 ) return -1;            /* Out of memory */
930   }
931   a2 = (EditDist3To*)&m[n];
932   memset(a2, 0, sizeof(a2[0])*n2);
933 
934   /* Fill in the a1[] matrix for all characters of the TO string */
935   for(i2=0; i2<n2; i2++){
936     a2[i2].nByte = utf8Len((unsigned char)z2[i2], n2-i2);
937     for(p=pLang->pCost; p; p=p->pNext){
938       EditDist3Cost **apNew;
939       if( p->nFrom>0 ) continue;
940       if( i2+p->nTo>n2 ) continue;
941       if( matchTo(p, z2+i2, n2-i2)==0 ) continue;
942       a2[i2].nIns++;
943       apNew = sqlite3_realloc64(a2[i2].apIns, sizeof(*apNew)*a2[i2].nIns);
944       if( apNew==0 ){
945         res = -1;  /* Out of memory */
946         goto editDist3Abort;
947       }
948       a2[i2].apIns = apNew;
949       a2[i2].apIns[a2[i2].nIns-1] = p;
950     }
951   }
952 
953   /* Prepare to compute the minimum edit distance */
954   szRow = f.n+1;
955   memset(m, 0x01, (n2+1)*szRow*sizeof(m[0]));
956   m[0] = 0;
957 
958   /* First fill in the top-row of the matrix with FROM deletion costs */
959   for(i1=0; i1<f.n; i1 += b1){
960     b1 = f.a[i1].nByte;
961     updateCost(m, i1+b1, i1, pLang->iDelCost);
962     for(k=0; k<f.a[i1].nDel; k++){
963       p = f.a[i1].apDel[k];
964       updateCost(m, i1+p->nFrom, i1, p->iCost);
965     }
966   }
967 
968   /* Fill in all subsequent rows, top-to-bottom, left-to-right */
969   for(i2=0; i2<n2; i2 += b2){
970     int rx;      /* Starting index for current row */
971     int rxp;     /* Starting index for previous row */
972     b2 = a2[i2].nByte;
973     rx = szRow*(i2+b2);
974     rxp = szRow*i2;
975     updateCost(m, rx, rxp, pLang->iInsCost);
976     for(k=0; k<a2[i2].nIns; k++){
977       p = a2[i2].apIns[k];
978       updateCost(m, szRow*(i2+p->nTo), rxp, p->iCost);
979     }
980     for(i1=0; i1<f.n; i1+=b1){
981       int cx;    /* Index of current cell */
982       int cxp;   /* Index of cell immediately to the left */
983       int cxd;   /* Index of cell to the left and one row above */
984       int cxu;   /* Index of cell immediately above */
985       b1 = f.a[i1].nByte;
986       cxp = rx + i1;
987       cx = cxp + b1;
988       cxd = rxp + i1;
989       cxu = cxd + b1;
990       updateCost(m, cx, cxp, pLang->iDelCost);
991       for(k=0; k<f.a[i1].nDel; k++){
992         p = f.a[i1].apDel[k];
993         updateCost(m, cxp+p->nFrom, cxp, p->iCost);
994       }
995       updateCost(m, cx, cxu, pLang->iInsCost);
996       if( matchFromTo(&f, i1, z2+i2, n2-i2) ){
997         updateCost(m, cx, cxd, 0);
998       }
999       updateCost(m, cx, cxd, pLang->iSubCost);
1000       for(k=0; k<f.a[i1].nSubst; k++){
1001         p = f.a[i1].apSubst[k];
1002         if( matchTo(p, z2+i2, n2-i2) ){
1003           updateCost(m, cxd+p->nFrom+szRow*p->nTo, cxd, p->iCost);
1004         }
1005       }
1006     }
1007   }
1008 
1009 #if 0  /* Enable for debugging */
1010   printf("         ^");
1011   for(i1=0; i1<f.n; i1++) printf(" %c-%2x", f.z[i1], f.z[i1]&0xff);
1012   printf("\n   ^:");
1013   for(i1=0; i1<szRow; i1++){
1014     int v = m[i1];
1015     if( v>9999 ) printf(" ****");
1016     else         printf(" %4d", v);
1017   }
1018   printf("\n");
1019   for(i2=0; i2<n2; i2++){
1020     printf("%c-%02x:", z2[i2], z2[i2]&0xff);
1021     for(i1=0; i1<szRow; i1++){
1022       int v = m[(i2+1)*szRow+i1];
1023       if( v>9999 ) printf(" ****");
1024       else         printf(" %4d", v);
1025     }
1026     printf("\n");
1027   }
1028 #endif
1029 
1030   /* Free memory allocations and return the result */
1031   res = (int)m[szRow*(n2+1)-1];
1032   n = n2;
1033   if( f.isPrefix ){
1034     for(i2=1; i2<=n2; i2++){
1035       int b = m[szRow*i2-1];
1036       if( b<=res ){
1037         res = b;
1038         n = i2 - 1;
1039       }
1040     }
1041   }
1042   if( pnMatch ){
1043     int nExtra = 0;
1044     for(k=0; k<n; k++){
1045       if( (z2[k] & 0xc0)==0x80 ) nExtra++;
1046     }
1047     *pnMatch = n - nExtra;
1048   }
1049 
1050 editDist3Abort:
1051   for(i2=0; i2<n2; i2++) sqlite3_free(a2[i2].apIns);
1052   sqlite3_free(pToFree);
1053   return res;
1054 }
1055 
1056 /*
1057 ** Get an appropriate EditDist3Lang object.
1058 */
1059 static const EditDist3Lang *editDist3FindLang(
1060   EditDist3Config *pConfig,
1061   int iLang
1062 ){
1063   int i;
1064   for(i=0; i<pConfig->nLang; i++){
1065     if( pConfig->a[i].iLang==iLang ) return &pConfig->a[i];
1066   }
1067   return &editDist3Lang;
1068 }
1069 
1070 /*
1071 ** Function:    editdist3(A,B,iLang)
1072 **              editdist3(tablename)
1073 **
1074 ** Return the cost of transforming string A into string B using edit
1075 ** weights for iLang.
1076 **
1077 ** The second form loads edit weights into memory from a table.
1078 */
1079 static void editDist3SqlFunc(
1080   sqlite3_context *context,
1081   int argc,
1082   sqlite3_value **argv
1083 ){
1084   EditDist3Config *pConfig = (EditDist3Config*)sqlite3_user_data(context);
1085   sqlite3 *db = sqlite3_context_db_handle(context);
1086   int rc;
1087   if( argc==1 ){
1088     const char *zTable = (const char*)sqlite3_value_text(argv[0]);
1089     rc = editDist3ConfigLoad(pConfig, db, zTable);
1090     if( rc ) sqlite3_result_error_code(context, rc);
1091   }else{
1092     const char *zA = (const char*)sqlite3_value_text(argv[0]);
1093     const char *zB = (const char*)sqlite3_value_text(argv[1]);
1094     int nA = sqlite3_value_bytes(argv[0]);
1095     int nB = sqlite3_value_bytes(argv[1]);
1096     int iLang = argc==3 ? sqlite3_value_int(argv[2]) : 0;
1097     const EditDist3Lang *pLang = editDist3FindLang(pConfig, iLang);
1098     EditDist3FromString *pFrom;
1099     int dist;
1100 
1101     pFrom = editDist3FromStringNew(pLang, zA, nA);
1102     if( pFrom==0 ){
1103       sqlite3_result_error_nomem(context);
1104       return;
1105     }
1106     dist = editDist3Core(pFrom, zB, nB, pLang, 0);
1107     editDist3FromStringDelete(pFrom);
1108     if( dist==(-1) ){
1109       sqlite3_result_error_nomem(context);
1110     }else{
1111       sqlite3_result_int(context, dist);
1112     }
1113   }
1114 }
1115 
1116 /*
1117 ** Register the editDist3 function with SQLite
1118 */
1119 static int editDist3Install(sqlite3 *db){
1120   int rc;
1121   EditDist3Config *pConfig = sqlite3_malloc64( sizeof(*pConfig) );
1122   if( pConfig==0 ) return SQLITE_NOMEM;
1123   memset(pConfig, 0, sizeof(*pConfig));
1124   rc = sqlite3_create_function_v2(db, "editdist3",
1125               2, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig,
1126               editDist3SqlFunc, 0, 0, 0);
1127   if( rc==SQLITE_OK ){
1128     rc = sqlite3_create_function_v2(db, "editdist3",
1129                 3, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig,
1130                 editDist3SqlFunc, 0, 0, 0);
1131   }
1132   if( rc==SQLITE_OK ){
1133     rc = sqlite3_create_function_v2(db, "editdist3",
1134                 1, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig,
1135                 editDist3SqlFunc, 0, 0, editDist3ConfigDelete);
1136   }else{
1137     sqlite3_free(pConfig);
1138   }
1139   return rc;
1140 }
1141 /* End configurable cost unicode edit distance routines
1142 ******************************************************************************
1143 ******************************************************************************
1144 ** Begin transliterate unicode-to-ascii implementation
1145 */
1146 
1147 #if !SQLITE_AMALGAMATION
1148 /*
1149 ** This lookup table is used to help decode the first byte of
1150 ** a multi-byte UTF8 character.
1151 */
1152 static const unsigned char sqlite3Utf8Trans1[] = {
1153   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1154   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1155   0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
1156   0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
1157   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1158   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1159   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1160   0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
1161 };
1162 #endif
1163 
1164 /*
1165 ** Return the value of the first UTF-8 character in the string.
1166 */
1167 static int utf8Read(const unsigned char *z, int n, int *pSize){
1168   int c, i;
1169 
1170   /* All callers to this routine (in the current implementation)
1171   ** always have n>0. */
1172   if( NEVER(n==0) ){
1173     c = i = 0;
1174   }else{
1175     c = z[0];
1176     i = 1;
1177     if( c>=0xc0 ){
1178       c = sqlite3Utf8Trans1[c-0xc0];
1179       while( i<n && (z[i] & 0xc0)==0x80 ){
1180         c = (c<<6) + (0x3f & z[i++]);
1181       }
1182     }
1183   }
1184   *pSize = i;
1185   return c;
1186 }
1187 
1188 /*
1189 ** Return the number of characters in the utf-8 string in the nIn byte
1190 ** buffer pointed to by zIn.
1191 */
1192 static int utf8Charlen(const char *zIn, int nIn){
1193   int i;
1194   int nChar = 0;
1195   for(i=0; i<nIn; nChar++){
1196     int sz;
1197     utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz);
1198     i += sz;
1199   }
1200   return nChar;
1201 }
1202 
1203 /*
1204 ** Table of translations from unicode characters into ASCII.
1205 */
1206 static const struct {
1207  unsigned short int cFrom;
1208  unsigned char cTo0, cTo1;
1209 } translit[] = {
1210   { 0x00A0,  0x20, 0x00 },  /*   to   */
1211   { 0x00B5,  0x75, 0x00 },  /* µ to u */
1212   { 0x00C0,  0x41, 0x00 },  /* À to A */
1213   { 0x00C1,  0x41, 0x00 },  /* Á to A */
1214   { 0x00C2,  0x41, 0x00 },  /* Â to A */
1215   { 0x00C3,  0x41, 0x00 },  /* Ã to A */
1216   { 0x00C4,  0x41, 0x65 },  /* Ä to Ae */
1217   { 0x00C5,  0x41, 0x61 },  /* Å to Aa */
1218   { 0x00C6,  0x41, 0x45 },  /* Æ to AE */
1219   { 0x00C7,  0x43, 0x00 },  /* Ç to C */
1220   { 0x00C8,  0x45, 0x00 },  /* È to E */
1221   { 0x00C9,  0x45, 0x00 },  /* É to E */
1222   { 0x00CA,  0x45, 0x00 },  /* Ê to E */
1223   { 0x00CB,  0x45, 0x00 },  /* Ë to E */
1224   { 0x00CC,  0x49, 0x00 },  /* Ì to I */
1225   { 0x00CD,  0x49, 0x00 },  /* Í to I */
1226   { 0x00CE,  0x49, 0x00 },  /* Î to I */
1227   { 0x00CF,  0x49, 0x00 },  /* Ï to I */
1228   { 0x00D0,  0x44, 0x00 },  /* Ð to D */
1229   { 0x00D1,  0x4E, 0x00 },  /* Ñ to N */
1230   { 0x00D2,  0x4F, 0x00 },  /* Ò to O */
1231   { 0x00D3,  0x4F, 0x00 },  /* Ó to O */
1232   { 0x00D4,  0x4F, 0x00 },  /* Ô to O */
1233   { 0x00D5,  0x4F, 0x00 },  /* Õ to O */
1234   { 0x00D6,  0x4F, 0x65 },  /* Ö to Oe */
1235   { 0x00D7,  0x78, 0x00 },  /* × to x */
1236   { 0x00D8,  0x4F, 0x00 },  /* Ø to O */
1237   { 0x00D9,  0x55, 0x00 },  /* Ù to U */
1238   { 0x00DA,  0x55, 0x00 },  /* Ú to U */
1239   { 0x00DB,  0x55, 0x00 },  /* Û to U */
1240   { 0x00DC,  0x55, 0x65 },  /* Ü to Ue */
1241   { 0x00DD,  0x59, 0x00 },  /* Ý to Y */
1242   { 0x00DE,  0x54, 0x68 },  /* Þ to Th */
1243   { 0x00DF,  0x73, 0x73 },  /* ß to ss */
1244   { 0x00E0,  0x61, 0x00 },  /* à to a */
1245   { 0x00E1,  0x61, 0x00 },  /* á to a */
1246   { 0x00E2,  0x61, 0x00 },  /* â to a */
1247   { 0x00E3,  0x61, 0x00 },  /* ã to a */
1248   { 0x00E4,  0x61, 0x65 },  /* ä to ae */
1249   { 0x00E5,  0x61, 0x61 },  /* å to aa */
1250   { 0x00E6,  0x61, 0x65 },  /* æ to ae */
1251   { 0x00E7,  0x63, 0x00 },  /* ç to c */
1252   { 0x00E8,  0x65, 0x00 },  /* è to e */
1253   { 0x00E9,  0x65, 0x00 },  /* é to e */
1254   { 0x00EA,  0x65, 0x00 },  /* ê to e */
1255   { 0x00EB,  0x65, 0x00 },  /* ë to e */
1256   { 0x00EC,  0x69, 0x00 },  /* ì to i */
1257   { 0x00ED,  0x69, 0x00 },  /* í to i */
1258   { 0x00EE,  0x69, 0x00 },  /* î to i */
1259   { 0x00EF,  0x69, 0x00 },  /* ï to i */
1260   { 0x00F0,  0x64, 0x00 },  /* ð to d */
1261   { 0x00F1,  0x6E, 0x00 },  /* ñ to n */
1262   { 0x00F2,  0x6F, 0x00 },  /* ò to o */
1263   { 0x00F3,  0x6F, 0x00 },  /* ó to o */
1264   { 0x00F4,  0x6F, 0x00 },  /* ô to o */
1265   { 0x00F5,  0x6F, 0x00 },  /* õ to o */
1266   { 0x00F6,  0x6F, 0x65 },  /* ö to oe */
1267   { 0x00F7,  0x3A, 0x00 },  /* ÷ to : */
1268   { 0x00F8,  0x6F, 0x00 },  /* ø to o */
1269   { 0x00F9,  0x75, 0x00 },  /* ù to u */
1270   { 0x00FA,  0x75, 0x00 },  /* ú to u */
1271   { 0x00FB,  0x75, 0x00 },  /* û to u */
1272   { 0x00FC,  0x75, 0x65 },  /* ü to ue */
1273   { 0x00FD,  0x79, 0x00 },  /* ý to y */
1274   { 0x00FE,  0x74, 0x68 },  /* þ to th */
1275   { 0x00FF,  0x79, 0x00 },  /* ÿ to y */
1276   { 0x0100,  0x41, 0x00 },  /* Ā to A */
1277   { 0x0101,  0x61, 0x00 },  /* ā to a */
1278   { 0x0102,  0x41, 0x00 },  /* Ă to A */
1279   { 0x0103,  0x61, 0x00 },  /* ă to a */
1280   { 0x0104,  0x41, 0x00 },  /* Ą to A */
1281   { 0x0105,  0x61, 0x00 },  /* ą to a */
1282   { 0x0106,  0x43, 0x00 },  /* Ć to C */
1283   { 0x0107,  0x63, 0x00 },  /* ć to c */
1284   { 0x0108,  0x43, 0x68 },  /* Ĉ to Ch */
1285   { 0x0109,  0x63, 0x68 },  /* ĉ to ch */
1286   { 0x010A,  0x43, 0x00 },  /* Ċ to C */
1287   { 0x010B,  0x63, 0x00 },  /* ċ to c */
1288   { 0x010C,  0x43, 0x00 },  /* Č to C */
1289   { 0x010D,  0x63, 0x00 },  /* č to c */
1290   { 0x010E,  0x44, 0x00 },  /* Ď to D */
1291   { 0x010F,  0x64, 0x00 },  /* ď to d */
1292   { 0x0110,  0x44, 0x00 },  /* Đ to D */
1293   { 0x0111,  0x64, 0x00 },  /* đ to d */
1294   { 0x0112,  0x45, 0x00 },  /* Ē to E */
1295   { 0x0113,  0x65, 0x00 },  /* ē to e */
1296   { 0x0114,  0x45, 0x00 },  /* Ĕ to E */
1297   { 0x0115,  0x65, 0x00 },  /* ĕ to e */
1298   { 0x0116,  0x45, 0x00 },  /* Ė to E */
1299   { 0x0117,  0x65, 0x00 },  /* ė to e */
1300   { 0x0118,  0x45, 0x00 },  /* Ę to E */
1301   { 0x0119,  0x65, 0x00 },  /* ę to e */
1302   { 0x011A,  0x45, 0x00 },  /* Ě to E */
1303   { 0x011B,  0x65, 0x00 },  /* ě to e */
1304   { 0x011C,  0x47, 0x68 },  /* Ĝ to Gh */
1305   { 0x011D,  0x67, 0x68 },  /* ĝ to gh */
1306   { 0x011E,  0x47, 0x00 },  /* Ğ to G */
1307   { 0x011F,  0x67, 0x00 },  /* ğ to g */
1308   { 0x0120,  0x47, 0x00 },  /* Ġ to G */
1309   { 0x0121,  0x67, 0x00 },  /* ġ to g */
1310   { 0x0122,  0x47, 0x00 },  /* Ģ to G */
1311   { 0x0123,  0x67, 0x00 },  /* ģ to g */
1312   { 0x0124,  0x48, 0x68 },  /* Ĥ to Hh */
1313   { 0x0125,  0x68, 0x68 },  /* ĥ to hh */
1314   { 0x0126,  0x48, 0x00 },  /* Ħ to H */
1315   { 0x0127,  0x68, 0x00 },  /* ħ to h */
1316   { 0x0128,  0x49, 0x00 },  /* Ĩ to I */
1317   { 0x0129,  0x69, 0x00 },  /* ĩ to i */
1318   { 0x012A,  0x49, 0x00 },  /* Ī to I */
1319   { 0x012B,  0x69, 0x00 },  /* ī to i */
1320   { 0x012C,  0x49, 0x00 },  /* Ĭ to I */
1321   { 0x012D,  0x69, 0x00 },  /* ĭ to i */
1322   { 0x012E,  0x49, 0x00 },  /* Į to I */
1323   { 0x012F,  0x69, 0x00 },  /* į to i */
1324   { 0x0130,  0x49, 0x00 },  /* İ to I */
1325   { 0x0131,  0x69, 0x00 },  /* ı to i */
1326   { 0x0132,  0x49, 0x4A },  /* IJ to IJ */
1327   { 0x0133,  0x69, 0x6A },  /* ij to ij */
1328   { 0x0134,  0x4A, 0x68 },  /* Ĵ to Jh */
1329   { 0x0135,  0x6A, 0x68 },  /* ĵ to jh */
1330   { 0x0136,  0x4B, 0x00 },  /* Ķ to K */
1331   { 0x0137,  0x6B, 0x00 },  /* ķ to k */
1332   { 0x0138,  0x6B, 0x00 },  /* ĸ to k */
1333   { 0x0139,  0x4C, 0x00 },  /* Ĺ to L */
1334   { 0x013A,  0x6C, 0x00 },  /* ĺ to l */
1335   { 0x013B,  0x4C, 0x00 },  /* Ļ to L */
1336   { 0x013C,  0x6C, 0x00 },  /* ļ to l */
1337   { 0x013D,  0x4C, 0x00 },  /* Ľ to L */
1338   { 0x013E,  0x6C, 0x00 },  /* ľ to l */
1339   { 0x013F,  0x4C, 0x2E },  /* Ŀ to L. */
1340   { 0x0140,  0x6C, 0x2E },  /* ŀ to l. */
1341   { 0x0141,  0x4C, 0x00 },  /* Ł to L */
1342   { 0x0142,  0x6C, 0x00 },  /* ł to l */
1343   { 0x0143,  0x4E, 0x00 },  /* Ń to N */
1344   { 0x0144,  0x6E, 0x00 },  /* ń to n */
1345   { 0x0145,  0x4E, 0x00 },  /* Ņ to N */
1346   { 0x0146,  0x6E, 0x00 },  /* ņ to n */
1347   { 0x0147,  0x4E, 0x00 },  /* Ň to N */
1348   { 0x0148,  0x6E, 0x00 },  /* ň to n */
1349   { 0x0149,  0x27, 0x6E },  /* ʼn to 'n */
1350   { 0x014A,  0x4E, 0x47 },  /* Ŋ to NG */
1351   { 0x014B,  0x6E, 0x67 },  /* ŋ to ng */
1352   { 0x014C,  0x4F, 0x00 },  /* Ō to O */
1353   { 0x014D,  0x6F, 0x00 },  /* ō to o */
1354   { 0x014E,  0x4F, 0x00 },  /* Ŏ to O */
1355   { 0x014F,  0x6F, 0x00 },  /* ŏ to o */
1356   { 0x0150,  0x4F, 0x00 },  /* Ő to O */
1357   { 0x0151,  0x6F, 0x00 },  /* ő to o */
1358   { 0x0152,  0x4F, 0x45 },  /* Œ to OE */
1359   { 0x0153,  0x6F, 0x65 },  /* œ to oe */
1360   { 0x0154,  0x52, 0x00 },  /* Ŕ to R */
1361   { 0x0155,  0x72, 0x00 },  /* ŕ to r */
1362   { 0x0156,  0x52, 0x00 },  /* Ŗ to R */
1363   { 0x0157,  0x72, 0x00 },  /* ŗ to r */
1364   { 0x0158,  0x52, 0x00 },  /* Ř to R */
1365   { 0x0159,  0x72, 0x00 },  /* ř to r */
1366   { 0x015A,  0x53, 0x00 },  /* Ś to S */
1367   { 0x015B,  0x73, 0x00 },  /* ś to s */
1368   { 0x015C,  0x53, 0x68 },  /* Ŝ to Sh */
1369   { 0x015D,  0x73, 0x68 },  /* ŝ to sh */
1370   { 0x015E,  0x53, 0x00 },  /* Ş to S */
1371   { 0x015F,  0x73, 0x00 },  /* ş to s */
1372   { 0x0160,  0x53, 0x00 },  /* Š to S */
1373   { 0x0161,  0x73, 0x00 },  /* š to s */
1374   { 0x0162,  0x54, 0x00 },  /* Ţ to T */
1375   { 0x0163,  0x74, 0x00 },  /* ţ to t */
1376   { 0x0164,  0x54, 0x00 },  /* Ť to T */
1377   { 0x0165,  0x74, 0x00 },  /* ť to t */
1378   { 0x0166,  0x54, 0x00 },  /* Ŧ to T */
1379   { 0x0167,  0x74, 0x00 },  /* ŧ to t */
1380   { 0x0168,  0x55, 0x00 },  /* Ũ to U */
1381   { 0x0169,  0x75, 0x00 },  /* ũ to u */
1382   { 0x016A,  0x55, 0x00 },  /* Ū to U */
1383   { 0x016B,  0x75, 0x00 },  /* ū to u */
1384   { 0x016C,  0x55, 0x00 },  /* Ŭ to U */
1385   { 0x016D,  0x75, 0x00 },  /* ŭ to u */
1386   { 0x016E,  0x55, 0x00 },  /* Ů to U */
1387   { 0x016F,  0x75, 0x00 },  /* ů to u */
1388   { 0x0170,  0x55, 0x00 },  /* Ű to U */
1389   { 0x0171,  0x75, 0x00 },  /* ű to u */
1390   { 0x0172,  0x55, 0x00 },  /* Ų to U */
1391   { 0x0173,  0x75, 0x00 },  /* ų to u */
1392   { 0x0174,  0x57, 0x00 },  /* Ŵ to W */
1393   { 0x0175,  0x77, 0x00 },  /* ŵ to w */
1394   { 0x0176,  0x59, 0x00 },  /* Ŷ to Y */
1395   { 0x0177,  0x79, 0x00 },  /* ŷ to y */
1396   { 0x0178,  0x59, 0x00 },  /* Ÿ to Y */
1397   { 0x0179,  0x5A, 0x00 },  /* Ź to Z */
1398   { 0x017A,  0x7A, 0x00 },  /* ź to z */
1399   { 0x017B,  0x5A, 0x00 },  /* Ż to Z */
1400   { 0x017C,  0x7A, 0x00 },  /* ż to z */
1401   { 0x017D,  0x5A, 0x00 },  /* Ž to Z */
1402   { 0x017E,  0x7A, 0x00 },  /* ž to z */
1403   { 0x017F,  0x73, 0x00 },  /* ſ to s */
1404   { 0x0192,  0x66, 0x00 },  /* ƒ to f */
1405   { 0x0218,  0x53, 0x00 },  /* Ș to S */
1406   { 0x0219,  0x73, 0x00 },  /* ș to s */
1407   { 0x021A,  0x54, 0x00 },  /* Ț to T */
1408   { 0x021B,  0x74, 0x00 },  /* ț to t */
1409   { 0x0386,  0x41, 0x00 },  /* Ά to A */
1410   { 0x0388,  0x45, 0x00 },  /* Έ to E */
1411   { 0x0389,  0x49, 0x00 },  /* Ή to I */
1412   { 0x038A,  0x49, 0x00 },  /* Ί to I */
1413   { 0x038C,  0x4f, 0x00 },  /* Ό to O */
1414   { 0x038E,  0x59, 0x00 },  /* Ύ to Y */
1415   { 0x038F,  0x4f, 0x00 },  /* Ώ to O */
1416   { 0x0390,  0x69, 0x00 },  /* ΐ to i */
1417   { 0x0391,  0x41, 0x00 },  /* Α to A */
1418   { 0x0392,  0x42, 0x00 },  /* Β to B */
1419   { 0x0393,  0x47, 0x00 },  /* Γ to G */
1420   { 0x0394,  0x44, 0x00 },  /* Δ to D */
1421   { 0x0395,  0x45, 0x00 },  /* Ε to E */
1422   { 0x0396,  0x5a, 0x00 },  /* Ζ to Z */
1423   { 0x0397,  0x49, 0x00 },  /* Η to I */
1424   { 0x0398,  0x54, 0x68 },  /* Θ to Th */
1425   { 0x0399,  0x49, 0x00 },  /* Ι to I */
1426   { 0x039A,  0x4b, 0x00 },  /* Κ to K */
1427   { 0x039B,  0x4c, 0x00 },  /* Λ to L */
1428   { 0x039C,  0x4d, 0x00 },  /* Μ to M */
1429   { 0x039D,  0x4e, 0x00 },  /* Ν to N */
1430   { 0x039E,  0x58, 0x00 },  /* Ξ to X */
1431   { 0x039F,  0x4f, 0x00 },  /* Ο to O */
1432   { 0x03A0,  0x50, 0x00 },  /* Π to P */
1433   { 0x03A1,  0x52, 0x00 },  /* Ρ to R */
1434   { 0x03A3,  0x53, 0x00 },  /* Σ to S */
1435   { 0x03A4,  0x54, 0x00 },  /* Τ to T */
1436   { 0x03A5,  0x59, 0x00 },  /* Υ to Y */
1437   { 0x03A6,  0x46, 0x00 },  /* Φ to F */
1438   { 0x03A7,  0x43, 0x68 },  /* Χ to Ch */
1439   { 0x03A8,  0x50, 0x73 },  /* Ψ to Ps */
1440   { 0x03A9,  0x4f, 0x00 },  /* Ω to O */
1441   { 0x03AA,  0x49, 0x00 },  /* Ϊ to I */
1442   { 0x03AB,  0x59, 0x00 },  /* Ϋ to Y */
1443   { 0x03AC,  0x61, 0x00 },  /* ά to a */
1444   { 0x03AD,  0x65, 0x00 },  /* έ to e */
1445   { 0x03AE,  0x69, 0x00 },  /* ή to i */
1446   { 0x03AF,  0x69, 0x00 },  /* ί to i */
1447   { 0x03B1,  0x61, 0x00 },  /* α to a */
1448   { 0x03B2,  0x62, 0x00 },  /* β to b */
1449   { 0x03B3,  0x67, 0x00 },  /* γ to g */
1450   { 0x03B4,  0x64, 0x00 },  /* δ to d */
1451   { 0x03B5,  0x65, 0x00 },  /* ε to e */
1452   { 0x03B6,  0x7a, 0x00 },  /* ζ to z */
1453   { 0x03B7,  0x69, 0x00 },  /* η to i */
1454   { 0x03B8,  0x74, 0x68 },  /* θ to th */
1455   { 0x03B9,  0x69, 0x00 },  /* ι to i */
1456   { 0x03BA,  0x6b, 0x00 },  /* κ to k */
1457   { 0x03BB,  0x6c, 0x00 },  /* λ to l */
1458   { 0x03BC,  0x6d, 0x00 },  /* μ to m */
1459   { 0x03BD,  0x6e, 0x00 },  /* ν to n */
1460   { 0x03BE,  0x78, 0x00 },  /* ξ to x */
1461   { 0x03BF,  0x6f, 0x00 },  /* ο to o */
1462   { 0x03C0,  0x70, 0x00 },  /* π to p */
1463   { 0x03C1,  0x72, 0x00 },  /* ρ to r */
1464   { 0x03C3,  0x73, 0x00 },  /* σ to s */
1465   { 0x03C4,  0x74, 0x00 },  /* τ to t */
1466   { 0x03C5,  0x79, 0x00 },  /* υ to y */
1467   { 0x03C6,  0x66, 0x00 },  /* φ to f */
1468   { 0x03C7,  0x63, 0x68 },  /* χ to ch */
1469   { 0x03C8,  0x70, 0x73 },  /* ψ to ps */
1470   { 0x03C9,  0x6f, 0x00 },  /* ω to o */
1471   { 0x03CA,  0x69, 0x00 },  /* ϊ to i */
1472   { 0x03CB,  0x79, 0x00 },  /* ϋ to y */
1473   { 0x03CC,  0x6f, 0x00 },  /* ό to o */
1474   { 0x03CD,  0x79, 0x00 },  /* ύ to y */
1475   { 0x03CE,  0x69, 0x00 },  /* ώ to i */
1476   { 0x0400,  0x45, 0x00 },  /* Ѐ to E */
1477   { 0x0401,  0x45, 0x00 },  /* Ё to E */
1478   { 0x0402,  0x44, 0x00 },  /* Ђ to D */
1479   { 0x0403,  0x47, 0x00 },  /* Ѓ to G */
1480   { 0x0404,  0x45, 0x00 },  /* Є to E */
1481   { 0x0405,  0x5a, 0x00 },  /* Ѕ to Z */
1482   { 0x0406,  0x49, 0x00 },  /* І to I */
1483   { 0x0407,  0x49, 0x00 },  /* Ї to I */
1484   { 0x0408,  0x4a, 0x00 },  /* Ј to J */
1485   { 0x0409,  0x49, 0x00 },  /* Љ to I */
1486   { 0x040A,  0x4e, 0x00 },  /* Њ to N */
1487   { 0x040B,  0x44, 0x00 },  /* Ћ to D */
1488   { 0x040C,  0x4b, 0x00 },  /* Ќ to K */
1489   { 0x040D,  0x49, 0x00 },  /* Ѝ to I */
1490   { 0x040E,  0x55, 0x00 },  /* Ў to U */
1491   { 0x040F,  0x44, 0x00 },  /* Џ to D */
1492   { 0x0410,  0x41, 0x00 },  /* А to A */
1493   { 0x0411,  0x42, 0x00 },  /* Б to B */
1494   { 0x0412,  0x56, 0x00 },  /* В to V */
1495   { 0x0413,  0x47, 0x00 },  /* Г to G */
1496   { 0x0414,  0x44, 0x00 },  /* Д to D */
1497   { 0x0415,  0x45, 0x00 },  /* Е to E */
1498   { 0x0416,  0x5a, 0x68 },  /* Ж to Zh */
1499   { 0x0417,  0x5a, 0x00 },  /* З to Z */
1500   { 0x0418,  0x49, 0x00 },  /* И to I */
1501   { 0x0419,  0x49, 0x00 },  /* Й to I */
1502   { 0x041A,  0x4b, 0x00 },  /* К to K */
1503   { 0x041B,  0x4c, 0x00 },  /* Л to L */
1504   { 0x041C,  0x4d, 0x00 },  /* М to M */
1505   { 0x041D,  0x4e, 0x00 },  /* Н to N */
1506   { 0x041E,  0x4f, 0x00 },  /* О to O */
1507   { 0x041F,  0x50, 0x00 },  /* П to P */
1508   { 0x0420,  0x52, 0x00 },  /* Р to R */
1509   { 0x0421,  0x53, 0x00 },  /* С to S */
1510   { 0x0422,  0x54, 0x00 },  /* Т to T */
1511   { 0x0423,  0x55, 0x00 },  /* У to U */
1512   { 0x0424,  0x46, 0x00 },  /* Ф to F */
1513   { 0x0425,  0x4b, 0x68 },  /* Х to Kh */
1514   { 0x0426,  0x54, 0x63 },  /* Ц to Tc */
1515   { 0x0427,  0x43, 0x68 },  /* Ч to Ch */
1516   { 0x0428,  0x53, 0x68 },  /* Ш to Sh */
1517   { 0x0429,  0x53, 0x68 },  /* Щ to Shch */
1518   { 0x042A,  0x61, 0x00 },  /*  to A */
1519   { 0x042B,  0x59, 0x00 },  /* Ы to Y */
1520   { 0x042C,  0x59, 0x00 },  /*  to Y */
1521   { 0x042D,  0x45, 0x00 },  /* Э to E */
1522   { 0x042E,  0x49, 0x75 },  /* Ю to Iu */
1523   { 0x042F,  0x49, 0x61 },  /* Я to Ia */
1524   { 0x0430,  0x61, 0x00 },  /* а to a */
1525   { 0x0431,  0x62, 0x00 },  /* б to b */
1526   { 0x0432,  0x76, 0x00 },  /* в to v */
1527   { 0x0433,  0x67, 0x00 },  /* г to g */
1528   { 0x0434,  0x64, 0x00 },  /* д to d */
1529   { 0x0435,  0x65, 0x00 },  /* е to e */
1530   { 0x0436,  0x7a, 0x68 },  /* ж to zh */
1531   { 0x0437,  0x7a, 0x00 },  /* з to z */
1532   { 0x0438,  0x69, 0x00 },  /* и to i */
1533   { 0x0439,  0x69, 0x00 },  /* й to i */
1534   { 0x043A,  0x6b, 0x00 },  /* к to k */
1535   { 0x043B,  0x6c, 0x00 },  /* л to l */
1536   { 0x043C,  0x6d, 0x00 },  /* м to m */
1537   { 0x043D,  0x6e, 0x00 },  /* н to n */
1538   { 0x043E,  0x6f, 0x00 },  /* о to o */
1539   { 0x043F,  0x70, 0x00 },  /* п to p */
1540   { 0x0440,  0x72, 0x00 },  /* р to r */
1541   { 0x0441,  0x73, 0x00 },  /* с to s */
1542   { 0x0442,  0x74, 0x00 },  /* т to t */
1543   { 0x0443,  0x75, 0x00 },  /* у to u */
1544   { 0x0444,  0x66, 0x00 },  /* ф to f */
1545   { 0x0445,  0x6b, 0x68 },  /* х to kh */
1546   { 0x0446,  0x74, 0x63 },  /* ц to tc */
1547   { 0x0447,  0x63, 0x68 },  /* ч to ch */
1548   { 0x0448,  0x73, 0x68 },  /* ш to sh */
1549   { 0x0449,  0x73, 0x68 },  /* щ to shch */
1550   { 0x044A,  0x61, 0x00 },  /*  to a */
1551   { 0x044B,  0x79, 0x00 },  /* ы to y */
1552   { 0x044C,  0x79, 0x00 },  /*  to y */
1553   { 0x044D,  0x65, 0x00 },  /* э to e */
1554   { 0x044E,  0x69, 0x75 },  /* ю to iu */
1555   { 0x044F,  0x69, 0x61 },  /* я to ia */
1556   { 0x0450,  0x65, 0x00 },  /* ѐ to e */
1557   { 0x0451,  0x65, 0x00 },  /* ё to e */
1558   { 0x0452,  0x64, 0x00 },  /* ђ to d */
1559   { 0x0453,  0x67, 0x00 },  /* ѓ to g */
1560   { 0x0454,  0x65, 0x00 },  /* є to e */
1561   { 0x0455,  0x7a, 0x00 },  /* ѕ to z */
1562   { 0x0456,  0x69, 0x00 },  /* і to i */
1563   { 0x0457,  0x69, 0x00 },  /* ї to i */
1564   { 0x0458,  0x6a, 0x00 },  /* ј to j */
1565   { 0x0459,  0x69, 0x00 },  /* љ to i */
1566   { 0x045A,  0x6e, 0x00 },  /* њ to n */
1567   { 0x045B,  0x64, 0x00 },  /* ћ to d */
1568   { 0x045C,  0x6b, 0x00 },  /* ќ to k */
1569   { 0x045D,  0x69, 0x00 },  /* ѝ to i */
1570   { 0x045E,  0x75, 0x00 },  /* ў to u */
1571   { 0x045F,  0x64, 0x00 },  /* џ to d */
1572   { 0x1E02,  0x42, 0x00 },  /* Ḃ to B */
1573   { 0x1E03,  0x62, 0x00 },  /* ḃ to b */
1574   { 0x1E0A,  0x44, 0x00 },  /* Ḋ to D */
1575   { 0x1E0B,  0x64, 0x00 },  /* ḋ to d */
1576   { 0x1E1E,  0x46, 0x00 },  /* Ḟ to F */
1577   { 0x1E1F,  0x66, 0x00 },  /* ḟ to f */
1578   { 0x1E40,  0x4D, 0x00 },  /* Ṁ to M */
1579   { 0x1E41,  0x6D, 0x00 },  /* ṁ to m */
1580   { 0x1E56,  0x50, 0x00 },  /* Ṗ to P */
1581   { 0x1E57,  0x70, 0x00 },  /* ṗ to p */
1582   { 0x1E60,  0x53, 0x00 },  /* Ṡ to S */
1583   { 0x1E61,  0x73, 0x00 },  /* ṡ to s */
1584   { 0x1E6A,  0x54, 0x00 },  /* Ṫ to T */
1585   { 0x1E6B,  0x74, 0x00 },  /* ṫ to t */
1586   { 0x1E80,  0x57, 0x00 },  /* Ẁ to W */
1587   { 0x1E81,  0x77, 0x00 },  /* ẁ to w */
1588   { 0x1E82,  0x57, 0x00 },  /* Ẃ to W */
1589   { 0x1E83,  0x77, 0x00 },  /* ẃ to w */
1590   { 0x1E84,  0x57, 0x00 },  /* Ẅ to W */
1591   { 0x1E85,  0x77, 0x00 },  /* ẅ to w */
1592   { 0x1EF2,  0x59, 0x00 },  /* Ỳ to Y */
1593   { 0x1EF3,  0x79, 0x00 },  /* ỳ to y */
1594   { 0xFB00,  0x66, 0x66 },  /* ff to ff */
1595   { 0xFB01,  0x66, 0x69 },  /* fi to fi */
1596   { 0xFB02,  0x66, 0x6C },  /* fl to fl */
1597   { 0xFB05,  0x73, 0x74 },  /* ſt to st */
1598   { 0xFB06,  0x73, 0x74 },  /* st to st */
1599 };
1600 
1601 /*
1602 ** Convert the input string from UTF-8 into pure ASCII by converting
1603 ** all non-ASCII characters to some combination of characters in the
1604 ** ASCII subset.
1605 **
1606 ** The returned string might contain more characters than the input.
1607 **
1608 ** Space to hold the returned string comes from sqlite3_malloc() and
1609 ** should be freed by the caller.
1610 */
1611 static unsigned char *transliterate(const unsigned char *zIn, int nIn){
1612   unsigned char *zOut = sqlite3_malloc64( nIn*4 + 1 );
1613   int c, sz, nOut;
1614   if( zOut==0 ) return 0;
1615   nOut = 0;
1616   while( nIn>0 ){
1617     c = utf8Read(zIn, nIn, &sz);
1618     zIn += sz;
1619     nIn -= sz;
1620     if( c<=127 ){
1621       zOut[nOut++] = (unsigned char)c;
1622     }else{
1623       int xTop, xBtm, x;
1624       xTop = sizeof(translit)/sizeof(translit[0]) - 1;
1625       xBtm = 0;
1626       while( xTop>=xBtm ){
1627         x = (xTop + xBtm)/2;
1628         if( translit[x].cFrom==c ){
1629           zOut[nOut++] = translit[x].cTo0;
1630           if( translit[x].cTo1 ){
1631             zOut[nOut++] = translit[x].cTo1;
1632             /* Add an extra "ch" after the "sh" for Щ and щ */
1633             if( c==0x0429 || c== 0x0449 ){
1634               zOut[nOut++] = 'c';
1635               zOut[nOut++] = 'h';
1636             }
1637           }
1638           c = 0;
1639           break;
1640         }else if( translit[x].cFrom>c ){
1641           xTop = x-1;
1642         }else{
1643           xBtm = x+1;
1644         }
1645       }
1646       if( c ) zOut[nOut++] = '?';
1647     }
1648   }
1649   zOut[nOut] = 0;
1650   return zOut;
1651 }
1652 
1653 /*
1654 ** Return the number of characters in the shortest prefix of the input
1655 ** string that transliterates to an ASCII string nTrans bytes or longer.
1656 ** Or, if the transliteration of the input string is less than nTrans
1657 ** bytes in size, return the number of characters in the input string.
1658 */
1659 static int translen_to_charlen(const char *zIn, int nIn, int nTrans){
1660   int i, c, sz, nOut;
1661   int nChar;
1662 
1663   i = nOut = 0;
1664   for(nChar=0; i<nIn && nOut<nTrans; nChar++){
1665     c = utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz);
1666     i += sz;
1667 
1668     nOut++;
1669     if( c>=128 ){
1670       int xTop, xBtm, x;
1671       xTop = sizeof(translit)/sizeof(translit[0]) - 1;
1672       xBtm = 0;
1673       while( xTop>=xBtm ){
1674         x = (xTop + xBtm)/2;
1675         if( translit[x].cFrom==c ){
1676           if( translit[x].cTo1 ) nOut++;
1677           if( c==0x0429 || c== 0x0449 ) nOut += 2;
1678           break;
1679         }else if( translit[x].cFrom>c ){
1680           xTop = x-1;
1681         }else{
1682           xBtm = x+1;
1683         }
1684       }
1685     }
1686   }
1687 
1688   return nChar;
1689 }
1690 
1691 
1692 /*
1693 **    spellfix1_translit(X)
1694 **
1695 ** Convert a string that contains non-ASCII Roman characters into
1696 ** pure ASCII.
1697 */
1698 static void transliterateSqlFunc(
1699   sqlite3_context *context,
1700   int argc,
1701   sqlite3_value **argv
1702 ){
1703   const unsigned char *zIn = sqlite3_value_text(argv[0]);
1704   int nIn = sqlite3_value_bytes(argv[0]);
1705   unsigned char *zOut = transliterate(zIn, nIn);
1706   if( zOut==0 ){
1707     sqlite3_result_error_nomem(context);
1708   }else{
1709     sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free);
1710   }
1711 }
1712 
1713 /*
1714 **    spellfix1_scriptcode(X)
1715 **
1716 ** Try to determine the dominant script used by the word X and return
1717 ** its ISO 15924 numeric code.
1718 **
1719 ** The current implementation only understands the following scripts:
1720 **
1721 **    215  (Latin)
1722 **    220  (Cyrillic)
1723 **    200  (Greek)
1724 **
1725 ** This routine will return 998 if the input X contains characters from
1726 ** two or more of the above scripts or 999 if X contains no characters
1727 ** from any of the above scripts.
1728 */
1729 static void scriptCodeSqlFunc(
1730   sqlite3_context *context,
1731   int argc,
1732   sqlite3_value **argv
1733 ){
1734   const unsigned char *zIn = sqlite3_value_text(argv[0]);
1735   int nIn = sqlite3_value_bytes(argv[0]);
1736   int c, sz;
1737   int scriptMask = 0;
1738   int res;
1739   int seenDigit = 0;
1740 # define SCRIPT_LATIN       0x0001
1741 # define SCRIPT_CYRILLIC    0x0002
1742 # define SCRIPT_GREEK       0x0004
1743 # define SCRIPT_HEBREW      0x0008
1744 # define SCRIPT_ARABIC      0x0010
1745 
1746   while( nIn>0 ){
1747     c = utf8Read(zIn, nIn, &sz);
1748     zIn += sz;
1749     nIn -= sz;
1750     if( c<0x02af ){
1751       if( c>=0x80 || midClass[c&0x7f]<CCLASS_DIGIT ){
1752         scriptMask |= SCRIPT_LATIN;
1753       }else if( c>='0' && c<='9' ){
1754         seenDigit = 1;
1755       }
1756     }else if( c>=0x0400 && c<=0x04ff ){
1757       scriptMask |= SCRIPT_CYRILLIC;
1758     }else if( c>=0x0386 && c<=0x03ce ){
1759       scriptMask |= SCRIPT_GREEK;
1760     }else if( c>=0x0590 && c<=0x05ff ){
1761       scriptMask |= SCRIPT_HEBREW;
1762     }else if( c>=0x0600 && c<=0x06ff ){
1763       scriptMask |= SCRIPT_ARABIC;
1764     }
1765   }
1766   if( scriptMask==0 && seenDigit ) scriptMask = SCRIPT_LATIN;
1767   switch( scriptMask ){
1768     case 0:                res = 999; break;
1769     case SCRIPT_LATIN:     res = 215; break;
1770     case SCRIPT_CYRILLIC:  res = 220; break;
1771     case SCRIPT_GREEK:     res = 200; break;
1772     case SCRIPT_HEBREW:    res = 125; break;
1773     case SCRIPT_ARABIC:    res = 160; break;
1774     default:               res = 998; break;
1775   }
1776   sqlite3_result_int(context, res);
1777 }
1778 
1779 /* End transliterate
1780 ******************************************************************************
1781 ******************************************************************************
1782 ** Begin spellfix1 virtual table.
1783 */
1784 
1785 /* Maximum length of a phonehash used for querying the shadow table */
1786 #define SPELLFIX_MX_HASH  32
1787 
1788 /* Maximum number of hash strings to examine per query */
1789 #define SPELLFIX_MX_RUN   1
1790 
1791 typedef struct spellfix1_vtab spellfix1_vtab;
1792 typedef struct spellfix1_cursor spellfix1_cursor;
1793 
1794 /* Fuzzy-search virtual table object */
1795 struct spellfix1_vtab {
1796   sqlite3_vtab base;         /* Base class - must be first */
1797   sqlite3 *db;               /* Database connection */
1798   char *zDbName;             /* Name of database holding this table */
1799   char *zTableName;          /* Name of the virtual table */
1800   char *zCostTable;          /* Table holding edit-distance cost numbers */
1801   EditDist3Config *pConfig3; /* Parsed edit distance costs */
1802 };
1803 
1804 /* Fuzzy-search cursor object */
1805 struct spellfix1_cursor {
1806   sqlite3_vtab_cursor base;    /* Base class - must be first */
1807   spellfix1_vtab *pVTab;       /* The table to which this cursor belongs */
1808   char *zPattern;              /* rhs of MATCH clause */
1809   int idxNum;                  /* idxNum value passed to xFilter() */
1810   int nRow;                    /* Number of rows of content */
1811   int nAlloc;                  /* Number of allocated rows */
1812   int iRow;                    /* Current row of content */
1813   int iLang;                   /* Value of the langid= constraint */
1814   int iTop;                    /* Value of the top= constraint */
1815   int iScope;                  /* Value of the scope= constraint */
1816   int nSearch;                 /* Number of vocabulary items checked */
1817   sqlite3_stmt *pFullScan;     /* Shadow query for a full table scan */
1818   struct spellfix1_row {       /* For each row of content */
1819     sqlite3_int64 iRowid;         /* Rowid for this row */
1820     char *zWord;                  /* Text for this row */
1821     int iRank;                    /* Rank for this row */
1822     int iDistance;                /* Distance from pattern for this row */
1823     int iScore;                   /* Score for sorting */
1824     int iMatchlen;                /* Value of matchlen column (or -1) */
1825     char zHash[SPELLFIX_MX_HASH]; /* the phonehash used for this match */
1826   } *a;
1827 };
1828 
1829 /*
1830 ** Construct one or more SQL statements from the format string given
1831 ** and then evaluate those statements. The success code is written
1832 ** into *pRc.
1833 **
1834 ** If *pRc is initially non-zero then this routine is a no-op.
1835 */
1836 static void spellfix1DbExec(
1837   int *pRc,              /* Success code */
1838   sqlite3 *db,           /* Database in which to run SQL */
1839   const char *zFormat,   /* Format string for SQL */
1840   ...                    /* Arguments to the format string */
1841 ){
1842   va_list ap;
1843   char *zSql;
1844   if( *pRc ) return;
1845   va_start(ap, zFormat);
1846   zSql = sqlite3_vmprintf(zFormat, ap);
1847   va_end(ap);
1848   if( zSql==0 ){
1849     *pRc = SQLITE_NOMEM;
1850   }else{
1851     *pRc = sqlite3_exec(db, zSql, 0, 0, 0);
1852     sqlite3_free(zSql);
1853   }
1854 }
1855 
1856 /*
1857 ** xDisconnect/xDestroy method for the fuzzy-search module.
1858 */
1859 static int spellfix1Uninit(int isDestroy, sqlite3_vtab *pVTab){
1860   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
1861   int rc = SQLITE_OK;
1862   if( isDestroy ){
1863     sqlite3 *db = p->db;
1864     spellfix1DbExec(&rc, db, "DROP TABLE IF EXISTS \"%w\".\"%w_vocab\"",
1865                   p->zDbName, p->zTableName);
1866   }
1867   if( rc==SQLITE_OK ){
1868     sqlite3_free(p->zTableName);
1869     editDist3ConfigDelete(p->pConfig3);
1870     sqlite3_free(p->zCostTable);
1871     sqlite3_free(p);
1872   }
1873   return rc;
1874 }
1875 static int spellfix1Disconnect(sqlite3_vtab *pVTab){
1876   return spellfix1Uninit(0, pVTab);
1877 }
1878 static int spellfix1Destroy(sqlite3_vtab *pVTab){
1879   return spellfix1Uninit(1, pVTab);
1880 }
1881 
1882 /*
1883 ** Make a copy of a string.  Remove leading and trailing whitespace
1884 ** and dequote it.
1885 */
1886 static char *spellfix1Dequote(const char *zIn){
1887   char *zOut;
1888   int i, j;
1889   char c;
1890   while( isspace((unsigned char)zIn[0]) ) zIn++;
1891   zOut = sqlite3_mprintf("%s", zIn);
1892   if( zOut==0 ) return 0;
1893   i = (int)strlen(zOut);
1894 #if 0  /* The parser will never leave spaces at the end */
1895   while( i>0 && isspace(zOut[i-1]) ){ i--; }
1896 #endif
1897   zOut[i] = 0;
1898   c = zOut[0];
1899   if( c=='\'' || c=='"' ){
1900     for(i=1, j=0; ALWAYS(zOut[i]); i++){
1901       zOut[j++] = zOut[i];
1902       if( zOut[i]==c ){
1903         if( zOut[i+1]==c ){
1904           i++;
1905         }else{
1906           zOut[j-1] = 0;
1907           break;
1908         }
1909       }
1910     }
1911   }
1912   return zOut;
1913 }
1914 
1915 
1916 /*
1917 ** xConnect/xCreate method for the spellfix1 module. Arguments are:
1918 **
1919 **   argv[0]   -> module name  ("spellfix1")
1920 **   argv[1]   -> database name
1921 **   argv[2]   -> table name
1922 **   argv[3].. -> optional arguments (i.e. "edit_cost_table" parameter)
1923 */
1924 static int spellfix1Init(
1925   int isCreate,
1926   sqlite3 *db,
1927   void *pAux,
1928   int argc, const char *const*argv,
1929   sqlite3_vtab **ppVTab,
1930   char **pzErr
1931 ){
1932   spellfix1_vtab *pNew = 0;
1933   /* const char *zModule = argv[0]; // not used */
1934   const char *zDbName = argv[1];
1935   const char *zTableName = argv[2];
1936   int nDbName;
1937   int rc = SQLITE_OK;
1938   int i;
1939 
1940   nDbName = (int)strlen(zDbName);
1941   pNew = sqlite3_malloc64( sizeof(*pNew) + nDbName + 1);
1942   if( pNew==0 ){
1943     rc = SQLITE_NOMEM;
1944   }else{
1945     memset(pNew, 0, sizeof(*pNew));
1946     pNew->zDbName = (char*)&pNew[1];
1947     memcpy(pNew->zDbName, zDbName, nDbName+1);
1948     pNew->zTableName = sqlite3_mprintf("%s", zTableName);
1949     pNew->db = db;
1950     if( pNew->zTableName==0 ){
1951       rc = SQLITE_NOMEM;
1952     }else{
1953       rc = sqlite3_declare_vtab(db,
1954            "CREATE TABLE x(word,rank,distance,langid, "
1955            "score, matchlen, phonehash HIDDEN, "
1956            "top HIDDEN, scope HIDDEN, srchcnt HIDDEN, "
1957            "soundslike HIDDEN, command HIDDEN)"
1958       );
1959 #define SPELLFIX_COL_WORD            0
1960 #define SPELLFIX_COL_RANK            1
1961 #define SPELLFIX_COL_DISTANCE        2
1962 #define SPELLFIX_COL_LANGID          3
1963 #define SPELLFIX_COL_SCORE           4
1964 #define SPELLFIX_COL_MATCHLEN        5
1965 #define SPELLFIX_COL_PHONEHASH       6
1966 #define SPELLFIX_COL_TOP             7
1967 #define SPELLFIX_COL_SCOPE           8
1968 #define SPELLFIX_COL_SRCHCNT         9
1969 #define SPELLFIX_COL_SOUNDSLIKE     10
1970 #define SPELLFIX_COL_COMMAND        11
1971     }
1972     if( rc==SQLITE_OK && isCreate ){
1973       spellfix1DbExec(&rc, db,
1974          "CREATE TABLE IF NOT EXISTS \"%w\".\"%w_vocab\"(\n"
1975          "  id INTEGER PRIMARY KEY,\n"
1976          "  rank INT,\n"
1977          "  langid INT,\n"
1978          "  word TEXT,\n"
1979          "  k1 TEXT,\n"
1980          "  k2 TEXT\n"
1981          ");\n",
1982          zDbName, zTableName
1983       );
1984       spellfix1DbExec(&rc, db,
1985          "CREATE INDEX IF NOT EXISTS \"%w\".\"%w_vocab_index_langid_k2\" "
1986             "ON \"%w_vocab\"(langid,k2);",
1987          zDbName, zTableName, zTableName
1988       );
1989     }
1990     for(i=3; rc==SQLITE_OK && i<argc; i++){
1991       if( strncmp(argv[i],"edit_cost_table=",16)==0 && pNew->zCostTable==0 ){
1992         pNew->zCostTable = spellfix1Dequote(&argv[i][16]);
1993         if( pNew->zCostTable==0 ) rc = SQLITE_NOMEM;
1994         continue;
1995       }
1996       *pzErr = sqlite3_mprintf("bad argument to spellfix1(): \"%s\"", argv[i]);
1997       rc = SQLITE_ERROR;
1998     }
1999   }
2000 
2001   if( rc && pNew ){
2002     *ppVTab = 0;
2003     spellfix1Uninit(0, &pNew->base);
2004   }else{
2005     *ppVTab = (sqlite3_vtab *)pNew;
2006   }
2007   return rc;
2008 }
2009 
2010 /*
2011 ** The xConnect and xCreate methods
2012 */
2013 static int spellfix1Connect(
2014   sqlite3 *db,
2015   void *pAux,
2016   int argc, const char *const*argv,
2017   sqlite3_vtab **ppVTab,
2018   char **pzErr
2019 ){
2020   return spellfix1Init(0, db, pAux, argc, argv, ppVTab, pzErr);
2021 }
2022 static int spellfix1Create(
2023   sqlite3 *db,
2024   void *pAux,
2025   int argc, const char *const*argv,
2026   sqlite3_vtab **ppVTab,
2027   char **pzErr
2028 ){
2029   return spellfix1Init(1, db, pAux, argc, argv, ppVTab, pzErr);
2030 }
2031 
2032 /*
2033 ** Clear all of the content from a cursor.
2034 */
2035 static void spellfix1ResetCursor(spellfix1_cursor *pCur){
2036   int i;
2037   for(i=0; i<pCur->nRow; i++){
2038     sqlite3_free(pCur->a[i].zWord);
2039   }
2040   pCur->nRow = 0;
2041   pCur->iRow = 0;
2042   pCur->nSearch = 0;
2043   if( pCur->pFullScan ){
2044     sqlite3_finalize(pCur->pFullScan);
2045     pCur->pFullScan = 0;
2046   }
2047 }
2048 
2049 /*
2050 ** Resize the cursor to hold up to N rows of content
2051 */
2052 static void spellfix1ResizeCursor(spellfix1_cursor *pCur, int N){
2053   struct spellfix1_row *aNew;
2054   assert( N>=pCur->nRow );
2055   aNew = sqlite3_realloc64(pCur->a, sizeof(pCur->a[0])*N);
2056   if( aNew==0 && N>0 ){
2057     spellfix1ResetCursor(pCur);
2058     sqlite3_free(pCur->a);
2059     pCur->nAlloc = 0;
2060     pCur->a = 0;
2061   }else{
2062     pCur->nAlloc = N;
2063     pCur->a = aNew;
2064   }
2065 }
2066 
2067 
2068 /*
2069 ** Close a fuzzy-search cursor.
2070 */
2071 static int spellfix1Close(sqlite3_vtab_cursor *cur){
2072   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2073   spellfix1ResetCursor(pCur);
2074   spellfix1ResizeCursor(pCur, 0);
2075   sqlite3_free(pCur->zPattern);
2076   sqlite3_free(pCur);
2077   return SQLITE_OK;
2078 }
2079 
2080 #define SPELLFIX_IDXNUM_MATCH  0x01         /* word MATCH $str */
2081 #define SPELLFIX_IDXNUM_LANGID 0x02         /* langid == $langid */
2082 #define SPELLFIX_IDXNUM_TOP    0x04         /* top = $top */
2083 #define SPELLFIX_IDXNUM_SCOPE  0x08         /* scope = $scope */
2084 #define SPELLFIX_IDXNUM_DISTLT 0x10         /* distance < $distance */
2085 #define SPELLFIX_IDXNUM_DISTLE 0x20         /* distance <= $distance */
2086 #define SPELLFIX_IDXNUM_ROWID  0x40         /* rowid = $rowid */
2087 #define SPELLFIX_IDXNUM_DIST   (0x10|0x20)  /* DISTLT and DISTLE */
2088 
2089 /*
2090 **
2091 ** The plan number is a bitmask of the SPELLFIX_IDXNUM_* values defined
2092 ** above.
2093 **
2094 ** filter.argv[*] values contains $str, $langid, $top, $scope and $rowid
2095 ** if specified and in that order.
2096 */
2097 static int spellfix1BestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
2098   int iPlan = 0;
2099   int iLangTerm = -1;
2100   int iTopTerm = -1;
2101   int iScopeTerm = -1;
2102   int iDistTerm = -1;
2103   int iRowidTerm = -1;
2104   int i;
2105   const struct sqlite3_index_constraint *pConstraint;
2106   pConstraint = pIdxInfo->aConstraint;
2107   for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){
2108     if( pConstraint->usable==0 ) continue;
2109 
2110     /* Terms of the form:  word MATCH $str */
2111     if( (iPlan & SPELLFIX_IDXNUM_MATCH)==0
2112      && pConstraint->iColumn==SPELLFIX_COL_WORD
2113      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH
2114     ){
2115       iPlan |= SPELLFIX_IDXNUM_MATCH;
2116       pIdxInfo->aConstraintUsage[i].argvIndex = 1;
2117       pIdxInfo->aConstraintUsage[i].omit = 1;
2118     }
2119 
2120     /* Terms of the form:  langid = $langid  */
2121     if( (iPlan & SPELLFIX_IDXNUM_LANGID)==0
2122      && pConstraint->iColumn==SPELLFIX_COL_LANGID
2123      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2124     ){
2125       iPlan |= SPELLFIX_IDXNUM_LANGID;
2126       iLangTerm = i;
2127     }
2128 
2129     /* Terms of the form:  top = $top */
2130     if( (iPlan & SPELLFIX_IDXNUM_TOP)==0
2131      && pConstraint->iColumn==SPELLFIX_COL_TOP
2132      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2133     ){
2134       iPlan |= SPELLFIX_IDXNUM_TOP;
2135       iTopTerm = i;
2136     }
2137 
2138     /* Terms of the form:  scope = $scope */
2139     if( (iPlan & SPELLFIX_IDXNUM_SCOPE)==0
2140      && pConstraint->iColumn==SPELLFIX_COL_SCOPE
2141      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2142     ){
2143       iPlan |= SPELLFIX_IDXNUM_SCOPE;
2144       iScopeTerm = i;
2145     }
2146 
2147     /* Terms of the form:  distance < $dist or distance <= $dist */
2148     if( (iPlan & SPELLFIX_IDXNUM_DIST)==0
2149      && pConstraint->iColumn==SPELLFIX_COL_DISTANCE
2150      && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT
2151           || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE)
2152     ){
2153       if( pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT ){
2154         iPlan |= SPELLFIX_IDXNUM_DISTLT;
2155       }else{
2156         iPlan |= SPELLFIX_IDXNUM_DISTLE;
2157       }
2158       iDistTerm = i;
2159     }
2160 
2161     /* Terms of the form:  distance < $dist or distance <= $dist */
2162     if( (iPlan & SPELLFIX_IDXNUM_ROWID)==0
2163      && pConstraint->iColumn<0
2164      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2165     ){
2166       iPlan |= SPELLFIX_IDXNUM_ROWID;
2167       iRowidTerm = i;
2168     }
2169   }
2170   if( iPlan&SPELLFIX_IDXNUM_MATCH ){
2171     int idx = 2;
2172     pIdxInfo->idxNum = iPlan;
2173     if( pIdxInfo->nOrderBy==1
2174      && pIdxInfo->aOrderBy[0].iColumn==SPELLFIX_COL_SCORE
2175      && pIdxInfo->aOrderBy[0].desc==0
2176     ){
2177       pIdxInfo->orderByConsumed = 1;  /* Default order by iScore */
2178     }
2179     if( iPlan&SPELLFIX_IDXNUM_LANGID ){
2180       pIdxInfo->aConstraintUsage[iLangTerm].argvIndex = idx++;
2181       pIdxInfo->aConstraintUsage[iLangTerm].omit = 1;
2182     }
2183     if( iPlan&SPELLFIX_IDXNUM_TOP ){
2184       pIdxInfo->aConstraintUsage[iTopTerm].argvIndex = idx++;
2185       pIdxInfo->aConstraintUsage[iTopTerm].omit = 1;
2186     }
2187     if( iPlan&SPELLFIX_IDXNUM_SCOPE ){
2188       pIdxInfo->aConstraintUsage[iScopeTerm].argvIndex = idx++;
2189       pIdxInfo->aConstraintUsage[iScopeTerm].omit = 1;
2190     }
2191     if( iPlan&SPELLFIX_IDXNUM_DIST ){
2192       pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = idx++;
2193       pIdxInfo->aConstraintUsage[iDistTerm].omit = 1;
2194     }
2195     pIdxInfo->estimatedCost = 1e5;
2196   }else if( (iPlan & SPELLFIX_IDXNUM_ROWID) ){
2197     pIdxInfo->idxNum = SPELLFIX_IDXNUM_ROWID;
2198     pIdxInfo->aConstraintUsage[iRowidTerm].argvIndex = 1;
2199     pIdxInfo->aConstraintUsage[iRowidTerm].omit = 1;
2200     pIdxInfo->estimatedCost = 5;
2201   }else{
2202     pIdxInfo->idxNum = 0;
2203     pIdxInfo->estimatedCost = 1e50;
2204   }
2205   return SQLITE_OK;
2206 }
2207 
2208 /*
2209 ** Open a new fuzzy-search cursor.
2210 */
2211 static int spellfix1Open(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
2212   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2213   spellfix1_cursor *pCur;
2214   pCur = sqlite3_malloc64( sizeof(*pCur) );
2215   if( pCur==0 ) return SQLITE_NOMEM;
2216   memset(pCur, 0, sizeof(*pCur));
2217   pCur->pVTab = p;
2218   *ppCursor = &pCur->base;
2219   return SQLITE_OK;
2220 }
2221 
2222 /*
2223 ** Adjust a distance measurement by the words rank in order to show
2224 ** preference to common words.
2225 */
2226 static int spellfix1Score(int iDistance, int iRank){
2227   int iLog2;
2228   for(iLog2=0; iRank>0; iLog2++, iRank>>=1){}
2229   return iDistance + 32 - iLog2;
2230 }
2231 
2232 /*
2233 ** Compare two spellfix1_row objects for sorting purposes in qsort() such
2234 ** that they sort in order of increasing distance.
2235 */
2236 static int SQLITE_CDECL spellfix1RowCompare(const void *A, const void *B){
2237   const struct spellfix1_row *a = (const struct spellfix1_row*)A;
2238   const struct spellfix1_row *b = (const struct spellfix1_row*)B;
2239   return a->iScore - b->iScore;
2240 }
2241 
2242 /*
2243 ** A structure used to pass information from spellfix1FilterForMatch()
2244 ** into spellfix1RunQuery().
2245 */
2246 typedef struct MatchQuery {
2247   spellfix1_cursor *pCur;          /* The cursor being queried */
2248   sqlite3_stmt *pStmt;             /* shadow table query statment */
2249   char zHash[SPELLFIX_MX_HASH];    /* The current phonehash for zPattern */
2250   const char *zPattern;            /* Transliterated input string */
2251   int nPattern;                    /* Length of zPattern */
2252   EditDist3FromString *pMatchStr3; /* Original unicode string */
2253   EditDist3Config *pConfig3;       /* Edit-distance cost coefficients */
2254   const EditDist3Lang *pLang;      /* The selected language coefficients */
2255   int iLang;                       /* The language id */
2256   int iScope;                      /* Default scope */
2257   int iMaxDist;                    /* Maximum allowed edit distance, or -1 */
2258   int rc;                          /* Error code */
2259   int nRun;                  /* Number of prior runs for the same zPattern */
2260   char azPrior[SPELLFIX_MX_RUN][SPELLFIX_MX_HASH];  /* Prior hashes */
2261 } MatchQuery;
2262 
2263 /*
2264 ** Run a query looking for the best matches against zPattern using
2265 ** zHash as the character class seed hash.
2266 */
2267 static void spellfix1RunQuery(MatchQuery *p, const char *zQuery, int nQuery){
2268   const char *zK1;
2269   const char *zWord;
2270   int iDist;
2271   int iRank;
2272   int iScore;
2273   int iWorst = 0;
2274   int idx;
2275   int idxWorst = -1;
2276   int i;
2277   int iScope = p->iScope;
2278   spellfix1_cursor *pCur = p->pCur;
2279   sqlite3_stmt *pStmt = p->pStmt;
2280   char zHash1[SPELLFIX_MX_HASH];
2281   char zHash2[SPELLFIX_MX_HASH];
2282   char *zClass;
2283   int nClass;
2284   int rc;
2285 
2286   if( pCur->a==0 || p->rc ) return;   /* Prior memory allocation failure */
2287   zClass = (char*)phoneticHash((unsigned char*)zQuery, nQuery);
2288   if( zClass==0 ){
2289     p->rc = SQLITE_NOMEM;
2290     return;
2291   }
2292   nClass = (int)strlen(zClass);
2293   if( nClass>SPELLFIX_MX_HASH-2 ){
2294     nClass = SPELLFIX_MX_HASH-2;
2295     zClass[nClass] = 0;
2296   }
2297   if( nClass<=iScope ){
2298     if( nClass>2 ){
2299       iScope = nClass-1;
2300     }else{
2301       iScope = nClass;
2302     }
2303   }
2304   memcpy(zHash1, zClass, iScope);
2305   sqlite3_free(zClass);
2306   zHash1[iScope] = 0;
2307   memcpy(zHash2, zHash1, iScope);
2308   zHash2[iScope] = 'Z';
2309   zHash2[iScope+1] = 0;
2310 #if SPELLFIX_MX_RUN>1
2311   for(i=0; i<p->nRun; i++){
2312     if( strcmp(p->azPrior[i], zHash1)==0 ) return;
2313   }
2314 #endif
2315   assert( p->nRun<SPELLFIX_MX_RUN );
2316   memcpy(p->azPrior[p->nRun++], zHash1, iScope+1);
2317   if( sqlite3_bind_text(pStmt, 1, zHash1, -1, SQLITE_STATIC)==SQLITE_NOMEM
2318    || sqlite3_bind_text(pStmt, 2, zHash2, -1, SQLITE_STATIC)==SQLITE_NOMEM
2319   ){
2320     p->rc = SQLITE_NOMEM;
2321     return;
2322   }
2323 #if SPELLFIX_MX_RUN>1
2324   for(i=0; i<pCur->nRow; i++){
2325     if( pCur->a[i].iScore>iWorst ){
2326       iWorst = pCur->a[i].iScore;
2327       idxWorst = i;
2328     }
2329   }
2330 #endif
2331   while( sqlite3_step(pStmt)==SQLITE_ROW ){
2332     int iMatchlen = -1;
2333     iRank = sqlite3_column_int(pStmt, 2);
2334     if( p->pMatchStr3 ){
2335       int nWord = sqlite3_column_bytes(pStmt, 1);
2336       zWord = (const char*)sqlite3_column_text(pStmt, 1);
2337       iDist = editDist3Core(p->pMatchStr3, zWord, nWord, p->pLang, &iMatchlen);
2338     }else{
2339       zK1 = (const char*)sqlite3_column_text(pStmt, 3);
2340       if( zK1==0 ) continue;
2341       iDist = editdist1(p->zPattern, zK1, 0);
2342     }
2343     if( iDist<0 ){
2344       p->rc = SQLITE_NOMEM;
2345       break;
2346     }
2347     pCur->nSearch++;
2348 
2349     /* If there is a "distance < $dist" or "distance <= $dist" constraint,
2350     ** check if this row meets it. If not, jump back up to the top of the
2351     ** loop to process the next row. Otherwise, if the row does match the
2352     ** distance constraint, check if the pCur->a[] array is already full.
2353     ** If it is and no explicit "top = ?" constraint was present in the
2354     ** query, grow the array to ensure there is room for the new entry. */
2355     assert( (p->iMaxDist>=0)==((pCur->idxNum & SPELLFIX_IDXNUM_DIST) ? 1 : 0) );
2356     if( p->iMaxDist>=0 ){
2357       if( iDist>p->iMaxDist ) continue;
2358       if( pCur->nRow>=pCur->nAlloc && (pCur->idxNum & SPELLFIX_IDXNUM_TOP)==0 ){
2359         spellfix1ResizeCursor(pCur, pCur->nAlloc*2 + 10);
2360         if( pCur->a==0 ) break;
2361       }
2362     }
2363 
2364     iScore = spellfix1Score(iDist,iRank);
2365     if( pCur->nRow<pCur->nAlloc ){
2366       idx = pCur->nRow;
2367     }else if( iScore<iWorst ){
2368       idx = idxWorst;
2369       sqlite3_free(pCur->a[idx].zWord);
2370     }else{
2371       continue;
2372     }
2373 
2374     pCur->a[idx].zWord = sqlite3_mprintf("%s", sqlite3_column_text(pStmt, 1));
2375     if( pCur->a[idx].zWord==0 ){
2376       p->rc = SQLITE_NOMEM;
2377       break;
2378     }
2379     pCur->a[idx].iRowid = sqlite3_column_int64(pStmt, 0);
2380     pCur->a[idx].iRank = iRank;
2381     pCur->a[idx].iDistance = iDist;
2382     pCur->a[idx].iScore = iScore;
2383     pCur->a[idx].iMatchlen = iMatchlen;
2384     memcpy(pCur->a[idx].zHash, zHash1, iScope+1);
2385     if( pCur->nRow<pCur->nAlloc ) pCur->nRow++;
2386     if( pCur->nRow==pCur->nAlloc ){
2387       iWorst = pCur->a[0].iScore;
2388       idxWorst = 0;
2389       for(i=1; i<pCur->nRow; i++){
2390         iScore = pCur->a[i].iScore;
2391         if( iWorst<iScore ){
2392           iWorst = iScore;
2393           idxWorst = i;
2394         }
2395       }
2396     }
2397   }
2398   rc = sqlite3_reset(pStmt);
2399   if( rc ) p->rc = rc;
2400 }
2401 
2402 /*
2403 ** This version of the xFilter method work if the MATCH term is present
2404 ** and we are doing a scan.
2405 */
2406 static int spellfix1FilterForMatch(
2407   spellfix1_cursor *pCur,
2408   int argc,
2409   sqlite3_value **argv
2410 ){
2411   int idxNum = pCur->idxNum;
2412   const unsigned char *zMatchThis;   /* RHS of the MATCH operator */
2413   EditDist3FromString *pMatchStr3 = 0; /* zMatchThis as an editdist string */
2414   char *zPattern;                    /* Transliteration of zMatchThis */
2415   int nPattern;                      /* Length of zPattern */
2416   int iLimit = 20;                   /* Max number of rows of output */
2417   int iScope = 3;                    /* Use this many characters of zClass */
2418   int iLang = 0;                     /* Language code */
2419   char *zSql;                        /* SQL of shadow table query */
2420   sqlite3_stmt *pStmt = 0;           /* Shadow table query */
2421   int rc;                            /* Result code */
2422   int idx = 1;                       /* Next available filter parameter */
2423   spellfix1_vtab *p = pCur->pVTab;   /* The virtual table that owns pCur */
2424   MatchQuery x;                      /* For passing info to RunQuery() */
2425 
2426   /* Load the cost table if we have not already done so */
2427   if( p->zCostTable!=0 && p->pConfig3==0 ){
2428     p->pConfig3 = sqlite3_malloc64( sizeof(p->pConfig3[0]) );
2429     if( p->pConfig3==0 ) return SQLITE_NOMEM;
2430     memset(p->pConfig3, 0, sizeof(p->pConfig3[0]));
2431     rc = editDist3ConfigLoad(p->pConfig3, p->db, p->zCostTable);
2432     if( rc ) return rc;
2433   }
2434   memset(&x, 0, sizeof(x));
2435   x.iScope = 3;  /* Default scope if none specified by "WHERE scope=N" */
2436   x.iMaxDist = -1;   /* Maximum allowed edit distance */
2437 
2438   if( idxNum&2 ){
2439     iLang = sqlite3_value_int(argv[idx++]);
2440   }
2441   if( idxNum&4 ){
2442     iLimit = sqlite3_value_int(argv[idx++]);
2443     if( iLimit<1 ) iLimit = 1;
2444   }
2445   if( idxNum&8 ){
2446     x.iScope = sqlite3_value_int(argv[idx++]);
2447     if( x.iScope<1 ) x.iScope = 1;
2448     if( x.iScope>SPELLFIX_MX_HASH-2 ) x.iScope = SPELLFIX_MX_HASH-2;
2449   }
2450   if( idxNum&(16|32) ){
2451     x.iMaxDist = sqlite3_value_int(argv[idx++]);
2452     if( idxNum&16 ) x.iMaxDist--;
2453     if( x.iMaxDist<0 ) x.iMaxDist = 0;
2454   }
2455   spellfix1ResetCursor(pCur);
2456   spellfix1ResizeCursor(pCur, iLimit);
2457   zMatchThis = sqlite3_value_text(argv[0]);
2458   if( zMatchThis==0 ) return SQLITE_OK;
2459   if( p->pConfig3 ){
2460     x.pLang = editDist3FindLang(p->pConfig3, iLang);
2461     pMatchStr3 = editDist3FromStringNew(x.pLang, (const char*)zMatchThis, -1);
2462     if( pMatchStr3==0 ){
2463       x.rc = SQLITE_NOMEM;
2464       goto filter_exit;
2465     }
2466   }else{
2467     x.pLang = 0;
2468   }
2469   zPattern = (char*)transliterate(zMatchThis, sqlite3_value_bytes(argv[0]));
2470   sqlite3_free(pCur->zPattern);
2471   pCur->zPattern = zPattern;
2472   if( zPattern==0 ){
2473     x.rc = SQLITE_NOMEM;
2474     goto filter_exit;
2475   }
2476   nPattern = (int)strlen(zPattern);
2477   if( zPattern[nPattern-1]=='*' ) nPattern--;
2478   zSql = sqlite3_mprintf(
2479      "SELECT id, word, rank, k1"
2480      "  FROM \"%w\".\"%w_vocab\""
2481      " WHERE langid=%d AND k2>=?1 AND k2<?2",
2482      p->zDbName, p->zTableName, iLang
2483   );
2484   if( zSql==0 ){
2485     x.rc = SQLITE_NOMEM;
2486     pStmt = 0;
2487     goto filter_exit;
2488   }
2489   rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
2490   sqlite3_free(zSql);
2491   pCur->iLang = iLang;
2492   x.pCur = pCur;
2493   x.pStmt = pStmt;
2494   x.zPattern = zPattern;
2495   x.nPattern = nPattern;
2496   x.pMatchStr3 = pMatchStr3;
2497   x.iLang = iLang;
2498   x.rc = rc;
2499   x.pConfig3 = p->pConfig3;
2500   if( x.rc==SQLITE_OK ){
2501     spellfix1RunQuery(&x, zPattern, nPattern);
2502   }
2503 
2504   if( pCur->a ){
2505     qsort(pCur->a, pCur->nRow, sizeof(pCur->a[0]), spellfix1RowCompare);
2506     pCur->iTop = iLimit;
2507     pCur->iScope = iScope;
2508   }else{
2509     x.rc = SQLITE_NOMEM;
2510   }
2511 
2512 filter_exit:
2513   sqlite3_finalize(pStmt);
2514   editDist3FromStringDelete(pMatchStr3);
2515   return x.rc;
2516 }
2517 
2518 /*
2519 ** This version of xFilter handles a full-table scan case
2520 */
2521 static int spellfix1FilterForFullScan(
2522   spellfix1_cursor *pCur,
2523   int argc,
2524   sqlite3_value **argv
2525 ){
2526   int rc = SQLITE_OK;
2527   int idxNum = pCur->idxNum;
2528   char *zSql;
2529   spellfix1_vtab *pVTab = pCur->pVTab;
2530   spellfix1ResetCursor(pCur);
2531   assert( idxNum==0 || idxNum==64 );
2532   zSql = sqlite3_mprintf(
2533      "SELECT word, rank, NULL, langid, id FROM \"%w\".\"%w_vocab\"%s",
2534      pVTab->zDbName, pVTab->zTableName,
2535      ((idxNum & 64) ? " WHERE rowid=?" : "")
2536   );
2537   if( zSql==0 ) return SQLITE_NOMEM;
2538   rc = sqlite3_prepare_v2(pVTab->db, zSql, -1, &pCur->pFullScan, 0);
2539   sqlite3_free(zSql);
2540   if( rc==SQLITE_OK && (idxNum & 64) ){
2541     assert( argc==1 );
2542     rc = sqlite3_bind_value(pCur->pFullScan, 1, argv[0]);
2543   }
2544   pCur->nRow = pCur->iRow = 0;
2545   if( rc==SQLITE_OK ){
2546     rc = sqlite3_step(pCur->pFullScan);
2547     if( rc==SQLITE_ROW ){ pCur->iRow = -1; rc = SQLITE_OK; }
2548     if( rc==SQLITE_DONE ){ rc = SQLITE_OK; }
2549   }else{
2550     pCur->iRow = 0;
2551   }
2552   return rc;
2553 }
2554 
2555 
2556 /*
2557 ** Called to "rewind" a cursor back to the beginning so that
2558 ** it starts its output over again.  Always called at least once
2559 ** prior to any spellfix1Column, spellfix1Rowid, or spellfix1Eof call.
2560 */
2561 static int spellfix1Filter(
2562   sqlite3_vtab_cursor *cur,
2563   int idxNum, const char *idxStr,
2564   int argc, sqlite3_value **argv
2565 ){
2566   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2567   int rc;
2568   pCur->idxNum = idxNum;
2569   if( idxNum & 1 ){
2570     rc = spellfix1FilterForMatch(pCur, argc, argv);
2571   }else{
2572     rc = spellfix1FilterForFullScan(pCur, argc, argv);
2573   }
2574   return rc;
2575 }
2576 
2577 
2578 /*
2579 ** Advance a cursor to its next row of output
2580 */
2581 static int spellfix1Next(sqlite3_vtab_cursor *cur){
2582   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2583   int rc = SQLITE_OK;
2584   if( pCur->iRow < pCur->nRow ){
2585     if( pCur->pFullScan ){
2586       rc = sqlite3_step(pCur->pFullScan);
2587       if( rc!=SQLITE_ROW ) pCur->iRow = pCur->nRow;
2588       if( rc==SQLITE_ROW || rc==SQLITE_DONE ) rc = SQLITE_OK;
2589     }else{
2590       pCur->iRow++;
2591     }
2592   }
2593   return rc;
2594 }
2595 
2596 /*
2597 ** Return TRUE if we are at the end-of-file
2598 */
2599 static int spellfix1Eof(sqlite3_vtab_cursor *cur){
2600   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2601   return pCur->iRow>=pCur->nRow;
2602 }
2603 
2604 /*
2605 ** Return columns from the current row.
2606 */
2607 static int spellfix1Column(
2608   sqlite3_vtab_cursor *cur,
2609   sqlite3_context *ctx,
2610   int i
2611 ){
2612   spellfix1_cursor *pCur = (spellfix1_cursor*)cur;
2613   if( pCur->pFullScan ){
2614     if( i<=SPELLFIX_COL_LANGID ){
2615       sqlite3_result_value(ctx, sqlite3_column_value(pCur->pFullScan, i));
2616     }else{
2617       sqlite3_result_null(ctx);
2618     }
2619     return SQLITE_OK;
2620   }
2621   switch( i ){
2622     case SPELLFIX_COL_WORD: {
2623       sqlite3_result_text(ctx, pCur->a[pCur->iRow].zWord, -1, SQLITE_STATIC);
2624       break;
2625     }
2626     case SPELLFIX_COL_RANK: {
2627       sqlite3_result_int(ctx, pCur->a[pCur->iRow].iRank);
2628       break;
2629     }
2630     case SPELLFIX_COL_DISTANCE: {
2631       sqlite3_result_int(ctx, pCur->a[pCur->iRow].iDistance);
2632       break;
2633     }
2634     case SPELLFIX_COL_LANGID: {
2635       sqlite3_result_int(ctx, pCur->iLang);
2636       break;
2637     }
2638     case SPELLFIX_COL_SCORE: {
2639       sqlite3_result_int(ctx, pCur->a[pCur->iRow].iScore);
2640       break;
2641     }
2642     case SPELLFIX_COL_MATCHLEN: {
2643       int iMatchlen = pCur->a[pCur->iRow].iMatchlen;
2644       if( iMatchlen<0 ){
2645         int nPattern = (int)strlen(pCur->zPattern);
2646         char *zWord = pCur->a[pCur->iRow].zWord;
2647         int nWord = (int)strlen(zWord);
2648 
2649         if( nPattern>0 && pCur->zPattern[nPattern-1]=='*' ){
2650           char *zTranslit;
2651           int res;
2652           zTranslit = (char *)transliterate((unsigned char *)zWord, nWord);
2653           if( !zTranslit ) return SQLITE_NOMEM;
2654           res = editdist1(pCur->zPattern, zTranslit, &iMatchlen);
2655           sqlite3_free(zTranslit);
2656           if( res<0 ) return SQLITE_NOMEM;
2657           iMatchlen = translen_to_charlen(zWord, nWord, iMatchlen);
2658         }else{
2659           iMatchlen = utf8Charlen(zWord, nWord);
2660         }
2661       }
2662 
2663       sqlite3_result_int(ctx, iMatchlen);
2664       break;
2665     }
2666     case SPELLFIX_COL_PHONEHASH: {
2667       sqlite3_result_text(ctx, pCur->a[pCur->iRow].zHash, -1, SQLITE_STATIC);
2668       break;
2669     }
2670     case SPELLFIX_COL_TOP: {
2671       sqlite3_result_int(ctx, pCur->iTop);
2672       break;
2673     }
2674     case SPELLFIX_COL_SCOPE: {
2675       sqlite3_result_int(ctx, pCur->iScope);
2676       break;
2677     }
2678     case SPELLFIX_COL_SRCHCNT: {
2679       sqlite3_result_int(ctx, pCur->nSearch);
2680       break;
2681     }
2682     default: {
2683       sqlite3_result_null(ctx);
2684       break;
2685     }
2686   }
2687   return SQLITE_OK;
2688 }
2689 
2690 /*
2691 ** The rowid.
2692 */
2693 static int spellfix1Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
2694   spellfix1_cursor *pCur = (spellfix1_cursor*)cur;
2695   if( pCur->pFullScan ){
2696     *pRowid = sqlite3_column_int64(pCur->pFullScan, 4);
2697   }else{
2698     *pRowid = pCur->a[pCur->iRow].iRowid;
2699   }
2700   return SQLITE_OK;
2701 }
2702 
2703 /*
2704 ** This function is called by the xUpdate() method. It returns a string
2705 ** containing the conflict mode that xUpdate() should use for the current
2706 ** operation. One of: "ROLLBACK", "IGNORE", "ABORT" or "REPLACE".
2707 */
2708 static const char *spellfix1GetConflict(sqlite3 *db){
2709   static const char *azConflict[] = {
2710     /* Note: Instead of "FAIL" - "ABORT". */
2711     "ROLLBACK", "IGNORE", "ABORT", "ABORT", "REPLACE"
2712   };
2713   int eConflict = sqlite3_vtab_on_conflict(db);
2714 
2715   assert( eConflict==SQLITE_ROLLBACK || eConflict==SQLITE_IGNORE
2716        || eConflict==SQLITE_FAIL || eConflict==SQLITE_ABORT
2717        || eConflict==SQLITE_REPLACE
2718   );
2719   assert( SQLITE_ROLLBACK==1 );
2720   assert( SQLITE_IGNORE==2 );
2721   assert( SQLITE_FAIL==3 );
2722   assert( SQLITE_ABORT==4 );
2723   assert( SQLITE_REPLACE==5 );
2724 
2725   return azConflict[eConflict-1];
2726 }
2727 
2728 /*
2729 ** The xUpdate() method.
2730 */
2731 static int spellfix1Update(
2732   sqlite3_vtab *pVTab,
2733   int argc,
2734   sqlite3_value **argv,
2735   sqlite_int64 *pRowid
2736 ){
2737   int rc = SQLITE_OK;
2738   sqlite3_int64 rowid, newRowid;
2739   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2740   sqlite3 *db = p->db;
2741 
2742   if( argc==1 ){
2743     /* A delete operation on the rowid given by argv[0] */
2744     rowid = *pRowid = sqlite3_value_int64(argv[0]);
2745     spellfix1DbExec(&rc, db, "DELETE FROM \"%w\".\"%w_vocab\" "
2746                            " WHERE id=%lld",
2747                   p->zDbName, p->zTableName, rowid);
2748   }else{
2749     const unsigned char *zWord = sqlite3_value_text(argv[SPELLFIX_COL_WORD+2]);
2750     int nWord = sqlite3_value_bytes(argv[SPELLFIX_COL_WORD+2]);
2751     int iLang = sqlite3_value_int(argv[SPELLFIX_COL_LANGID+2]);
2752     int iRank = sqlite3_value_int(argv[SPELLFIX_COL_RANK+2]);
2753     const unsigned char *zSoundslike =
2754            sqlite3_value_text(argv[SPELLFIX_COL_SOUNDSLIKE+2]);
2755     int nSoundslike = sqlite3_value_bytes(argv[SPELLFIX_COL_SOUNDSLIKE+2]);
2756     char *zK1, *zK2;
2757     int i;
2758     char c;
2759     const char *zConflict = spellfix1GetConflict(db);
2760 
2761     if( zWord==0 ){
2762       /* Inserts of the form:  INSERT INTO table(command) VALUES('xyzzy');
2763       ** cause zWord to be NULL, so we look at the "command" column to see
2764       ** what special actions to take */
2765       const char *zCmd =
2766          (const char*)sqlite3_value_text(argv[SPELLFIX_COL_COMMAND+2]);
2767       if( zCmd==0 ){
2768         pVTab->zErrMsg = sqlite3_mprintf("NOT NULL constraint failed: %s.word",
2769                                          p->zTableName);
2770         return SQLITE_CONSTRAINT_NOTNULL;
2771       }
2772       if( strcmp(zCmd,"reset")==0 ){
2773         /* Reset the  edit cost table (if there is one). */
2774         editDist3ConfigDelete(p->pConfig3);
2775         p->pConfig3 = 0;
2776         return SQLITE_OK;
2777       }
2778       if( strncmp(zCmd,"edit_cost_table=",16)==0 ){
2779         editDist3ConfigDelete(p->pConfig3);
2780         p->pConfig3 = 0;
2781         sqlite3_free(p->zCostTable);
2782         p->zCostTable = spellfix1Dequote(zCmd+16);
2783         if( p->zCostTable==0 ) return SQLITE_NOMEM;
2784         if( p->zCostTable[0]==0 || sqlite3_stricmp(p->zCostTable,"null")==0 ){
2785           sqlite3_free(p->zCostTable);
2786           p->zCostTable = 0;
2787         }
2788         return SQLITE_OK;
2789       }
2790       pVTab->zErrMsg = sqlite3_mprintf("unknown value for %s.command: \"%w\"",
2791                                        p->zTableName, zCmd);
2792       return SQLITE_ERROR;
2793     }
2794     if( iRank<1 ) iRank = 1;
2795     if( zSoundslike ){
2796       zK1 = (char*)transliterate(zSoundslike, nSoundslike);
2797     }else{
2798       zK1 = (char*)transliterate(zWord, nWord);
2799     }
2800     if( zK1==0 ) return SQLITE_NOMEM;
2801     for(i=0; (c = zK1[i])!=0; i++){
2802        if( c>='A' && c<='Z' ) zK1[i] += 'a' - 'A';
2803     }
2804     zK2 = (char*)phoneticHash((const unsigned char*)zK1, i);
2805     if( zK2==0 ){
2806       sqlite3_free(zK1);
2807       return SQLITE_NOMEM;
2808     }
2809     if( sqlite3_value_type(argv[0])==SQLITE_NULL ){
2810       if( sqlite3_value_type(argv[1])==SQLITE_NULL ){
2811         spellfix1DbExec(&rc, db,
2812                "INSERT INTO \"%w\".\"%w_vocab\"(rank,langid,word,k1,k2) "
2813                "VALUES(%d,%d,%Q,%Q,%Q)",
2814                p->zDbName, p->zTableName,
2815                iRank, iLang, zWord, zK1, zK2
2816         );
2817       }else{
2818         newRowid = sqlite3_value_int64(argv[1]);
2819         spellfix1DbExec(&rc, db,
2820             "INSERT OR %s INTO \"%w\".\"%w_vocab\"(id,rank,langid,word,k1,k2) "
2821             "VALUES(%lld,%d,%d,%Q,%Q,%Q)",
2822             zConflict, p->zDbName, p->zTableName,
2823             newRowid, iRank, iLang, zWord, zK1, zK2
2824         );
2825       }
2826       *pRowid = sqlite3_last_insert_rowid(db);
2827     }else{
2828       rowid = sqlite3_value_int64(argv[0]);
2829       newRowid = *pRowid = sqlite3_value_int64(argv[1]);
2830       spellfix1DbExec(&rc, db,
2831              "UPDATE OR %s \"%w\".\"%w_vocab\" SET id=%lld, rank=%d, langid=%d,"
2832              " word=%Q, k1=%Q, k2=%Q WHERE id=%lld",
2833              zConflict, p->zDbName, p->zTableName, newRowid, iRank, iLang,
2834              zWord, zK1, zK2, rowid
2835       );
2836     }
2837     sqlite3_free(zK1);
2838     sqlite3_free(zK2);
2839   }
2840   return rc;
2841 }
2842 
2843 /*
2844 ** Rename the spellfix1 table.
2845 */
2846 static int spellfix1Rename(sqlite3_vtab *pVTab, const char *zNew){
2847   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2848   sqlite3 *db = p->db;
2849   int rc = SQLITE_OK;
2850   char *zNewName = sqlite3_mprintf("%s", zNew);
2851   if( zNewName==0 ){
2852     return SQLITE_NOMEM;
2853   }
2854   spellfix1DbExec(&rc, db,
2855      "ALTER TABLE \"%w\".\"%w_vocab\" RENAME TO \"%w_vocab\"",
2856      p->zDbName, p->zTableName, zNewName
2857   );
2858   if( rc==SQLITE_OK ){
2859     sqlite3_free(p->zTableName);
2860     p->zTableName = zNewName;
2861   }else{
2862     sqlite3_free(zNewName);
2863   }
2864   return rc;
2865 }
2866 
2867 
2868 /*
2869 ** A virtual table module that provides fuzzy search.
2870 */
2871 static sqlite3_module spellfix1Module = {
2872   0,                       /* iVersion */
2873   spellfix1Create,         /* xCreate - handle CREATE VIRTUAL TABLE */
2874   spellfix1Connect,        /* xConnect - reconnected to an existing table */
2875   spellfix1BestIndex,      /* xBestIndex - figure out how to do a query */
2876   spellfix1Disconnect,     /* xDisconnect - close a connection */
2877   spellfix1Destroy,        /* xDestroy - handle DROP TABLE */
2878   spellfix1Open,           /* xOpen - open a cursor */
2879   spellfix1Close,          /* xClose - close a cursor */
2880   spellfix1Filter,         /* xFilter - configure scan constraints */
2881   spellfix1Next,           /* xNext - advance a cursor */
2882   spellfix1Eof,            /* xEof - check for end of scan */
2883   spellfix1Column,         /* xColumn - read data */
2884   spellfix1Rowid,          /* xRowid - read data */
2885   spellfix1Update,         /* xUpdate */
2886   0,                       /* xBegin */
2887   0,                       /* xSync */
2888   0,                       /* xCommit */
2889   0,                       /* xRollback */
2890   0,                       /* xFindMethod */
2891   spellfix1Rename,         /* xRename */
2892 };
2893 
2894 /*
2895 ** Register the various functions and the virtual table.
2896 */
2897 static int spellfix1Register(sqlite3 *db){
2898   int rc = SQLITE_OK;
2899   int i;
2900   rc = sqlite3_create_function(db, "spellfix1_translit", 1,
2901                                SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
2902                                 transliterateSqlFunc, 0, 0);
2903   if( rc==SQLITE_OK ){
2904     rc = sqlite3_create_function(db, "spellfix1_editdist", 2,
2905                                  SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
2906                                   editdistSqlFunc, 0, 0);
2907   }
2908   if( rc==SQLITE_OK ){
2909     rc = sqlite3_create_function(db, "spellfix1_phonehash", 1,
2910                                  SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
2911                                   phoneticHashSqlFunc, 0, 0);
2912   }
2913   if( rc==SQLITE_OK ){
2914     rc = sqlite3_create_function(db, "spellfix1_scriptcode", 1,
2915                                   SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
2916                                   scriptCodeSqlFunc, 0, 0);
2917   }
2918   if( rc==SQLITE_OK ){
2919     rc = sqlite3_create_module(db, "spellfix1", &spellfix1Module, 0);
2920   }
2921   if( rc==SQLITE_OK ){
2922     rc = editDist3Install(db);
2923   }
2924 
2925   /* Verify sanity of the translit[] table */
2926   for(i=0; i<sizeof(translit)/sizeof(translit[0])-1; i++){
2927     assert( translit[i].cFrom<translit[i+1].cFrom );
2928   }
2929 
2930   return rc;
2931 }
2932 
2933 #endif /* SQLITE_OMIT_VIRTUALTABLE */
2934 
2935 /*
2936 ** Extension load function.
2937 */
2938 #ifdef _WIN32
2939 __declspec(dllexport)
2940 #endif
2941 int sqlite3_spellfix_init(
2942   sqlite3 *db,
2943   char **pzErrMsg,
2944   const sqlite3_api_routines *pApi
2945 ){
2946   SQLITE_EXTENSION_INIT2(pApi);
2947 #ifndef SQLITE_OMIT_VIRTUALTABLE
2948   return spellfix1Register(db);
2949 #endif
2950   return SQLITE_OK;
2951 }
2952