xref: /sqlite-3.40.0/ext/misc/spellfix.c (revision 7aa3ebee)
1 /*
2 ** 2012 April 10
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 **
13 ** This module implements the spellfix1 VIRTUAL TABLE that can be used
14 ** to search a large vocabulary for close matches.  See separate
15 ** documentation (http://www.sqlite.org/spellfix1.html) for details.
16 */
17 #include "sqlite3ext.h"
18 SQLITE_EXTENSION_INIT1
19 
20 #ifndef SQLITE_AMALGAMATION
21 # include <string.h>
22 # include <stdio.h>
23 # include <stdlib.h>
24 # include <assert.h>
25 # define ALWAYS(X)  1
26 # define NEVER(X)   0
27   typedef unsigned char u8;
28   typedef unsigned short u16;
29 #endif
30 #include <ctype.h>
31 
32 #ifndef SQLITE_OMIT_VIRTUALTABLE
33 
34 /*
35 ** Character classes for ASCII characters:
36 **
37 **   0   ''        Silent letters:   H W
38 **   1   'A'       Any vowel:   A E I O U (Y)
39 **   2   'B'       A bilabeal stop or fricative:  B F P V W
40 **   3   'C'       Other fricatives or back stops:  C G J K Q S X Z
41 **   4   'D'       Alveolar stops:  D T
42 **   5   'H'       Letter H at the beginning of a word
43 **   6   'L'       Glide:  L
44 **   7   'R'       Semivowel:  R
45 **   8   'M'       Nasals:  M N
46 **   9   'Y'       Letter Y at the beginning of a word.
47 **   10  '9'       Digits: 0 1 2 3 4 5 6 7 8 9
48 **   11  ' '       White space
49 **   12  '?'       Other.
50 */
51 #define CCLASS_SILENT         0
52 #define CCLASS_VOWEL          1
53 #define CCLASS_B              2
54 #define CCLASS_C              3
55 #define CCLASS_D              4
56 #define CCLASS_H              5
57 #define CCLASS_L              6
58 #define CCLASS_R              7
59 #define CCLASS_M              8
60 #define CCLASS_Y              9
61 #define CCLASS_DIGIT         10
62 #define CCLASS_SPACE         11
63 #define CCLASS_OTHER         12
64 
65 /*
66 ** The following table gives the character class for non-initial ASCII
67 ** characters.
68 */
69 static const unsigned char midClass[] = {
70  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
71  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
72  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
73  /*   */ CCLASS_SPACE,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
74  /*   */ CCLASS_SPACE,    /*   */ CCLASS_SPACE,   /*   */ CCLASS_OTHER,
75  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
76  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
77  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
78  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
79  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
80  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_SPACE,
81  /* ! */ CCLASS_OTHER,    /* " */ CCLASS_OTHER,   /* # */ CCLASS_OTHER,
82  /* $ */ CCLASS_OTHER,    /* % */ CCLASS_OTHER,   /* & */ CCLASS_OTHER,
83  /* ' */ CCLASS_SILENT,   /* ( */ CCLASS_OTHER,   /* ) */ CCLASS_OTHER,
84  /* * */ CCLASS_OTHER,    /* + */ CCLASS_OTHER,   /* , */ CCLASS_OTHER,
85  /* - */ CCLASS_OTHER,    /* . */ CCLASS_OTHER,   /* / */ CCLASS_OTHER,
86  /* 0 */ CCLASS_DIGIT,    /* 1 */ CCLASS_DIGIT,   /* 2 */ CCLASS_DIGIT,
87  /* 3 */ CCLASS_DIGIT,    /* 4 */ CCLASS_DIGIT,   /* 5 */ CCLASS_DIGIT,
88  /* 6 */ CCLASS_DIGIT,    /* 7 */ CCLASS_DIGIT,   /* 8 */ CCLASS_DIGIT,
89  /* 9 */ CCLASS_DIGIT,    /* : */ CCLASS_OTHER,   /* ; */ CCLASS_OTHER,
90  /* < */ CCLASS_OTHER,    /* = */ CCLASS_OTHER,   /* > */ CCLASS_OTHER,
91  /* ? */ CCLASS_OTHER,    /* @ */ CCLASS_OTHER,   /* A */ CCLASS_VOWEL,
92  /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D,
93  /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C,
94  /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C,
95  /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M,
96  /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B,
97  /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C,
98  /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B,
99  /* W */ CCLASS_B,        /* X */ CCLASS_C,       /* Y */ CCLASS_VOWEL,
100  /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER,
101  /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER,
102  /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B,
103  /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL,
104  /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT,
105  /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C,
106  /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M,
107  /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C,
108  /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D,
109  /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_B,
110  /* x */ CCLASS_C,        /* y */ CCLASS_VOWEL,   /* z */ CCLASS_C,
111  /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER,
112  /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,
113 };
114 /*
115 ** This tables gives the character class for ASCII characters that form the
116 ** initial character of a word.  The only difference from midClass is with
117 ** the letters H, W, and Y.
118 */
119 static const unsigned char initClass[] = {
120  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
121  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
122  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
123  /*   */ CCLASS_SPACE,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
124  /*   */ CCLASS_SPACE,    /*   */ CCLASS_SPACE,   /*   */ CCLASS_OTHER,
125  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
126  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
127  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
128  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
129  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
130  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_SPACE,
131  /* ! */ CCLASS_OTHER,    /* " */ CCLASS_OTHER,   /* # */ CCLASS_OTHER,
132  /* $ */ CCLASS_OTHER,    /* % */ CCLASS_OTHER,   /* & */ CCLASS_OTHER,
133  /* ' */ CCLASS_OTHER,    /* ( */ CCLASS_OTHER,   /* ) */ CCLASS_OTHER,
134  /* * */ CCLASS_OTHER,    /* + */ CCLASS_OTHER,   /* , */ CCLASS_OTHER,
135  /* - */ CCLASS_OTHER,    /* . */ CCLASS_OTHER,   /* / */ CCLASS_OTHER,
136  /* 0 */ CCLASS_DIGIT,    /* 1 */ CCLASS_DIGIT,   /* 2 */ CCLASS_DIGIT,
137  /* 3 */ CCLASS_DIGIT,    /* 4 */ CCLASS_DIGIT,   /* 5 */ CCLASS_DIGIT,
138  /* 6 */ CCLASS_DIGIT,    /* 7 */ CCLASS_DIGIT,   /* 8 */ CCLASS_DIGIT,
139  /* 9 */ CCLASS_DIGIT,    /* : */ CCLASS_OTHER,   /* ; */ CCLASS_OTHER,
140  /* < */ CCLASS_OTHER,    /* = */ CCLASS_OTHER,   /* > */ CCLASS_OTHER,
141  /* ? */ CCLASS_OTHER,    /* @ */ CCLASS_OTHER,   /* A */ CCLASS_VOWEL,
142  /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D,
143  /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C,
144  /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C,
145  /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M,
146  /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B,
147  /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C,
148  /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B,
149  /* W */ CCLASS_B,        /* X */ CCLASS_C,       /* Y */ CCLASS_Y,
150  /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER,
151  /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER,
152  /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B,
153  /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL,
154  /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT,
155  /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C,
156  /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M,
157  /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C,
158  /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D,
159  /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_B,
160  /* x */ CCLASS_C,        /* y */ CCLASS_Y,       /* z */ CCLASS_C,
161  /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER,
162  /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,
163 };
164 
165 /*
166 ** Mapping from the character class number (0-13) to a symbol for each
167 ** character class.  Note that initClass[] can be used to map the class
168 ** symbol back into the class number.
169 */
170 static const unsigned char className[] = ".ABCDHLRMY9 ?";
171 
172 /*
173 ** Generate a "phonetic hash" from a string of ASCII characters
174 ** in zIn[0..nIn-1].
175 **
176 **   * Map characters by character class as defined above.
177 **   * Omit double-letters
178 **   * Omit vowels beside R and L
179 **   * Omit T when followed by CH
180 **   * Omit W when followed by R
181 **   * Omit D when followed by J or G
182 **   * Omit K in KN or G in GN at the beginning of a word
183 **
184 ** Space to hold the result is obtained from sqlite3_malloc()
185 **
186 ** Return NULL if memory allocation fails.
187 */
188 static unsigned char *phoneticHash(const unsigned char *zIn, int nIn){
189   unsigned char *zOut = sqlite3_malloc64( nIn + 1 );
190   int i;
191   int nOut = 0;
192   char cPrev = 0x77;
193   char cPrevX = 0x77;
194   const unsigned char *aClass = initClass;
195 
196   if( zOut==0 ) return 0;
197   if( nIn>2 ){
198     switch( zIn[0] ){
199       case 'g':
200       case 'k': {
201         if( zIn[1]=='n' ){ zIn++; nIn--; }
202         break;
203       }
204     }
205   }
206   for(i=0; i<nIn; i++){
207     unsigned char c = zIn[i];
208     if( i+1<nIn ){
209       if( c=='w' && zIn[i+1]=='r' ) continue;
210       if( c=='d' && (zIn[i+1]=='j' || zIn[i+1]=='g') ) continue;
211       if( i+2<nIn ){
212         if( c=='t' && zIn[i+1]=='c' && zIn[i+2]=='h' ) continue;
213       }
214     }
215     c = aClass[c&0x7f];
216     if( c==CCLASS_SPACE ) continue;
217     if( c==CCLASS_OTHER && cPrev!=CCLASS_DIGIT ) continue;
218     aClass = midClass;
219     if( c==CCLASS_VOWEL && (cPrevX==CCLASS_R || cPrevX==CCLASS_L) ){
220        continue; /* No vowels beside L or R */
221     }
222     if( (c==CCLASS_R || c==CCLASS_L) && cPrevX==CCLASS_VOWEL ){
223        nOut--;   /* No vowels beside L or R */
224     }
225     cPrev = c;
226     if( c==CCLASS_SILENT ) continue;
227     cPrevX = c;
228     c = className[c];
229     assert( nOut>=0 );
230     if( nOut==0 || c!=zOut[nOut-1] ) zOut[nOut++] = c;
231   }
232   zOut[nOut] = 0;
233   return zOut;
234 }
235 
236 /*
237 ** This is an SQL function wrapper around phoneticHash().  See
238 ** the description of phoneticHash() for additional information.
239 */
240 static void phoneticHashSqlFunc(
241   sqlite3_context *context,
242   int argc,
243   sqlite3_value **argv
244 ){
245   const unsigned char *zIn;
246   unsigned char *zOut;
247 
248   zIn = sqlite3_value_text(argv[0]);
249   if( zIn==0 ) return;
250   zOut = phoneticHash(zIn, sqlite3_value_bytes(argv[0]));
251   if( zOut==0 ){
252     sqlite3_result_error_nomem(context);
253   }else{
254     sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free);
255   }
256 }
257 
258 /*
259 ** Return the character class number for a character given its
260 ** context.
261 */
262 static char characterClass(char cPrev, char c){
263   return cPrev==0 ? initClass[c&0x7f] : midClass[c&0x7f];
264 }
265 
266 /*
267 ** Return the cost of inserting or deleting character c immediately
268 ** following character cPrev.  If cPrev==0, that means c is the first
269 ** character of the word.
270 */
271 static int insertOrDeleteCost(char cPrev, char c, char cNext){
272   char classC = characterClass(cPrev, c);
273   char classCprev;
274 
275   if( classC==CCLASS_SILENT ){
276     /* Insert or delete "silent" characters such as H or W */
277     return 1;
278   }
279   if( cPrev==c ){
280     /* Repeated characters, or miss a repeat */
281     return 10;
282   }
283   if( classC==CCLASS_VOWEL && (cPrev=='r' || cNext=='r') ){
284     return 20;  /* Insert a vowel before or after 'r' */
285   }
286   classCprev = characterClass(cPrev, cPrev);
287   if( classC==classCprev ){
288     if( classC==CCLASS_VOWEL ){
289       /* Remove or add a new vowel to a vowel cluster */
290       return 15;
291     }else{
292       /* Remove or add a consonant not in the same class */
293       return 50;
294     }
295   }
296 
297   /* any other character insertion or deletion */
298   return 100;
299 }
300 
301 /*
302 ** Divide the insertion cost by this factor when appending to the
303 ** end of the word.
304 */
305 #define FINAL_INS_COST_DIV  4
306 
307 /*
308 ** Return the cost of substituting cTo in place of cFrom assuming
309 ** the previous character is cPrev.  If cPrev==0 then cTo is the first
310 ** character of the word.
311 */
312 static int substituteCost(char cPrev, char cFrom, char cTo){
313   char classFrom, classTo;
314   if( cFrom==cTo ){
315     /* Exact match */
316     return 0;
317   }
318   if( cFrom==(cTo^0x20) && ((cTo>='A' && cTo<='Z') || (cTo>='a' && cTo<='z')) ){
319     /* differ only in case */
320     return 0;
321   }
322   classFrom = characterClass(cPrev, cFrom);
323   classTo = characterClass(cPrev, cTo);
324   if( classFrom==classTo ){
325     /* Same character class */
326     return 40;
327   }
328   if( classFrom>=CCLASS_B && classFrom<=CCLASS_Y
329       && classTo>=CCLASS_B && classTo<=CCLASS_Y ){
330     /* Convert from one consonant to another, but in a different class */
331     return 75;
332   }
333   /* Any other subsitution */
334   return 100;
335 }
336 
337 /*
338 ** Given two strings zA and zB which are pure ASCII, return the cost
339 ** of transforming zA into zB.  If zA ends with '*' assume that it is
340 ** a prefix of zB and give only minimal penalty for extra characters
341 ** on the end of zB.
342 **
343 ** Smaller numbers mean a closer match.
344 **
345 ** Negative values indicate an error:
346 **    -1  One of the inputs is NULL
347 **    -2  Non-ASCII characters on input
348 **    -3  Unable to allocate memory
349 **
350 ** If pnMatch is not NULL, then *pnMatch is set to the number of bytes
351 ** of zB that matched the pattern in zA. If zA does not end with a '*',
352 ** then this value is always the number of bytes in zB (i.e. strlen(zB)).
353 ** If zA does end in a '*', then it is the number of bytes in the prefix
354 ** of zB that was deemed to match zA.
355 */
356 static int editdist1(const char *zA, const char *zB, int *pnMatch){
357   int nA, nB;            /* Number of characters in zA[] and zB[] */
358   int xA, xB;            /* Loop counters for zA[] and zB[] */
359   char cA = 0, cB;       /* Current character of zA and zB */
360   char cAprev, cBprev;   /* Previous character of zA and zB */
361   char cAnext, cBnext;   /* Next character in zA and zB */
362   int d;                 /* North-west cost value */
363   int dc = 0;            /* North-west character value */
364   int res;               /* Final result */
365   int *m;                /* The cost matrix */
366   char *cx;              /* Corresponding character values */
367   int *toFree = 0;       /* Malloced space */
368   int nMatch = 0;
369   int mStack[60+15];     /* Stack space to use if not too much is needed */
370 
371   /* Early out if either input is NULL */
372   if( zA==0 || zB==0 ) return -1;
373 
374   /* Skip any common prefix */
375   while( zA[0] && zA[0]==zB[0] ){ dc = zA[0]; zA++; zB++; nMatch++; }
376   if( pnMatch ) *pnMatch = nMatch;
377   if( zA[0]==0 && zB[0]==0 ) return 0;
378 
379 #if 0
380   printf("A=\"%s\" B=\"%s\" dc=%c\n", zA, zB, dc?dc:' ');
381 #endif
382 
383   /* Verify input strings and measure their lengths */
384   for(nA=0; zA[nA]; nA++){
385     if( zA[nA]&0x80 ) return -2;
386   }
387   for(nB=0; zB[nB]; nB++){
388     if( zB[nB]&0x80 ) return -2;
389   }
390 
391   /* Special processing if either string is empty */
392   if( nA==0 ){
393     cBprev = dc;
394     for(xB=res=0; (cB = zB[xB])!=0; xB++){
395       res += insertOrDeleteCost(cBprev, cB, zB[xB+1])/FINAL_INS_COST_DIV;
396       cBprev = cB;
397     }
398     return res;
399   }
400   if( nB==0 ){
401     cAprev = dc;
402     for(xA=res=0; (cA = zA[xA])!=0; xA++){
403       res += insertOrDeleteCost(cAprev, cA, zA[xA+1]);
404       cAprev = cA;
405     }
406     return res;
407   }
408 
409   /* A is a prefix of B */
410   if( zA[0]=='*' && zA[1]==0 ) return 0;
411 
412   /* Allocate and initialize the Wagner matrix */
413   if( nB<(sizeof(mStack)*4)/(sizeof(mStack[0])*5) ){
414     m = mStack;
415   }else{
416     m = toFree = sqlite3_malloc64( (nB+1)*5*sizeof(m[0])/4 );
417     if( m==0 ) return -3;
418   }
419   cx = (char*)&m[nB+1];
420 
421   /* Compute the Wagner edit distance */
422   m[0] = 0;
423   cx[0] = dc;
424   cBprev = dc;
425   for(xB=1; xB<=nB; xB++){
426     cBnext = zB[xB];
427     cB = zB[xB-1];
428     cx[xB] = cB;
429     m[xB] = m[xB-1] + insertOrDeleteCost(cBprev, cB, cBnext);
430     cBprev = cB;
431   }
432   cAprev = dc;
433   for(xA=1; xA<=nA; xA++){
434     int lastA = (xA==nA);
435     cA = zA[xA-1];
436     cAnext = zA[xA];
437     if( cA=='*' && lastA ) break;
438     d = m[0];
439     dc = cx[0];
440     m[0] = d + insertOrDeleteCost(cAprev, cA, cAnext);
441     cBprev = 0;
442     for(xB=1; xB<=nB; xB++){
443       int totalCost, insCost, delCost, subCost, ncx;
444       cB = zB[xB-1];
445       cBnext = zB[xB];
446 
447       /* Cost to insert cB */
448       insCost = insertOrDeleteCost(cx[xB-1], cB, cBnext);
449       if( lastA ) insCost /= FINAL_INS_COST_DIV;
450 
451       /* Cost to delete cA */
452       delCost = insertOrDeleteCost(cx[xB], cA, cBnext);
453 
454       /* Cost to substitute cA->cB */
455       subCost = substituteCost(cx[xB-1], cA, cB);
456 
457       /* Best cost */
458       totalCost = insCost + m[xB-1];
459       ncx = cB;
460       if( (delCost + m[xB])<totalCost ){
461         totalCost = delCost + m[xB];
462         ncx = cA;
463       }
464       if( (subCost + d)<totalCost ){
465         totalCost = subCost + d;
466       }
467 
468 #if 0
469       printf("%d,%d d=%4d u=%4d r=%4d dc=%c cA=%c cB=%c"
470              " ins=%4d del=%4d sub=%4d t=%4d ncx=%c\n",
471              xA, xB, d, m[xB], m[xB-1], dc?dc:' ', cA, cB,
472              insCost, delCost, subCost, totalCost, ncx?ncx:' ');
473 #endif
474 
475       /* Update the matrix */
476       d = m[xB];
477       dc = cx[xB];
478       m[xB] = totalCost;
479       cx[xB] = ncx;
480       cBprev = cB;
481     }
482     cAprev = cA;
483   }
484 
485   /* Free the wagner matrix and return the result */
486   if( cA=='*' ){
487     res = m[1];
488     for(xB=1; xB<=nB; xB++){
489       if( m[xB]<res ){
490         res = m[xB];
491         if( pnMatch ) *pnMatch = xB+nMatch;
492       }
493     }
494   }else{
495     res = m[nB];
496     /* In the current implementation, pnMatch is always NULL if zA does
497     ** not end in "*" */
498     assert( pnMatch==0 );
499   }
500   sqlite3_free(toFree);
501   return res;
502 }
503 
504 /*
505 ** Function:    editdist(A,B)
506 **
507 ** Return the cost of transforming string A into string B.  Both strings
508 ** must be pure ASCII text.  If A ends with '*' then it is assumed to be
509 ** a prefix of B and extra characters on the end of B have minimal additional
510 ** cost.
511 */
512 static void editdistSqlFunc(
513   sqlite3_context *context,
514   int argc,
515   sqlite3_value **argv
516 ){
517   int res = editdist1(
518                     (const char*)sqlite3_value_text(argv[0]),
519                     (const char*)sqlite3_value_text(argv[1]),
520                     0);
521   if( res<0 ){
522     if( res==(-3) ){
523       sqlite3_result_error_nomem(context);
524     }else if( res==(-2) ){
525       sqlite3_result_error(context, "non-ASCII input to editdist()", -1);
526     }else{
527       sqlite3_result_error(context, "NULL input to editdist()", -1);
528     }
529   }else{
530     sqlite3_result_int(context, res);
531   }
532 }
533 
534 /* End of the fixed-cost edit distance implementation
535 ******************************************************************************
536 *****************************************************************************
537 ** Begin: Configurable cost unicode edit distance routines
538 */
539 /* Forward declaration of structures */
540 typedef struct EditDist3Cost EditDist3Cost;
541 typedef struct EditDist3Config EditDist3Config;
542 typedef struct EditDist3Point EditDist3Point;
543 typedef struct EditDist3From EditDist3From;
544 typedef struct EditDist3FromString EditDist3FromString;
545 typedef struct EditDist3To EditDist3To;
546 typedef struct EditDist3ToString EditDist3ToString;
547 typedef struct EditDist3Lang EditDist3Lang;
548 
549 
550 /*
551 ** An entry in the edit cost table
552 */
553 struct EditDist3Cost {
554   EditDist3Cost *pNext;     /* Next cost element */
555   u8 nFrom;                 /* Number of bytes in aFrom */
556   u8 nTo;                   /* Number of bytes in aTo */
557   u16 iCost;                /* Cost of this transformation */
558   char a[4]    ;            /* FROM string followed by TO string */
559   /* Additional TO and FROM string bytes appended as necessary */
560 };
561 
562 /*
563 ** Edit costs for a particular language ID
564 */
565 struct EditDist3Lang {
566   int iLang;             /* Language ID */
567   int iInsCost;          /* Default insertion cost */
568   int iDelCost;          /* Default deletion cost */
569   int iSubCost;          /* Default substitution cost */
570   EditDist3Cost *pCost;  /* Costs */
571 };
572 
573 
574 /*
575 ** The default EditDist3Lang object, with default costs.
576 */
577 static const EditDist3Lang editDist3Lang = { 0, 100, 100, 150, 0 };
578 
579 /*
580 ** Complete configuration
581 */
582 struct EditDist3Config {
583   int nLang;             /* Number of language IDs.  Size of a[] */
584   EditDist3Lang *a;      /* One for each distinct language ID */
585 };
586 
587 /*
588 ** Extra information about each character in the FROM string.
589 */
590 struct EditDist3From {
591   int nSubst;              /* Number of substitution cost entries */
592   int nDel;                /* Number of deletion cost entries */
593   int nByte;               /* Number of bytes in this character */
594   EditDist3Cost **apSubst; /* Array of substitution costs for this element */
595   EditDist3Cost **apDel;   /* Array of deletion cost entries */
596 };
597 
598 /*
599 ** A precompiled FROM string.
600 *
601 ** In the common case we expect the FROM string to be reused multiple times.
602 ** In other words, the common case will be to measure the edit distance
603 ** from a single origin string to multiple target strings.
604 */
605 struct EditDist3FromString {
606   char *z;                 /* The complete text of the FROM string */
607   int n;                   /* Number of characters in the FROM string */
608   int isPrefix;            /* True if ends with '*' character */
609   EditDist3From *a;        /* Extra info about each char of the FROM string */
610 };
611 
612 /*
613 ** Extra information about each character in the TO string.
614 */
615 struct EditDist3To {
616   int nIns;                /* Number of insertion cost entries */
617   int nByte;               /* Number of bytes in this character */
618   EditDist3Cost **apIns;   /* Array of deletion cost entries */
619 };
620 
621 /*
622 ** A precompiled FROM string
623 */
624 struct EditDist3ToString {
625   char *z;                 /* The complete text of the TO string */
626   int n;                   /* Number of characters in the TO string */
627   EditDist3To *a;          /* Extra info about each char of the TO string */
628 };
629 
630 /*
631 ** Clear or delete an instance of the object that records all edit-distance
632 ** weights.
633 */
634 static void editDist3ConfigClear(EditDist3Config *p){
635   int i;
636   if( p==0 ) return;
637   for(i=0; i<p->nLang; i++){
638     EditDist3Cost *pCost, *pNext;
639     pCost = p->a[i].pCost;
640     while( pCost ){
641       pNext = pCost->pNext;
642       sqlite3_free(pCost);
643       pCost = pNext;
644     }
645   }
646   sqlite3_free(p->a);
647   memset(p, 0, sizeof(*p));
648 }
649 static void editDist3ConfigDelete(void *pIn){
650   EditDist3Config *p = (EditDist3Config*)pIn;
651   editDist3ConfigClear(p);
652   sqlite3_free(p);
653 }
654 
655 /*
656 ** Load all edit-distance weights from a table.
657 */
658 static int editDist3ConfigLoad(
659   EditDist3Config *p,      /* The edit distance configuration to load */
660   sqlite3 *db,            /* Load from this database */
661   const char *zTable      /* Name of the table from which to load */
662 ){
663   sqlite3_stmt *pStmt;
664   int rc, rc2;
665   char *zSql;
666   int iLangPrev = -9999;
667   EditDist3Lang *pLang = 0;
668 
669   zSql = sqlite3_mprintf("SELECT iLang, cFrom, cTo, iCost"
670                          " FROM \"%w\" WHERE iLang>=0 ORDER BY iLang", zTable);
671   if( zSql==0 ) return SQLITE_NOMEM;
672   rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
673   sqlite3_free(zSql);
674   if( rc ) return rc;
675   editDist3ConfigClear(p);
676   while( sqlite3_step(pStmt)==SQLITE_ROW ){
677     int iLang = sqlite3_column_int(pStmt, 0);
678     const char *zFrom = (const char*)sqlite3_column_text(pStmt, 1);
679     int nFrom = zFrom ? sqlite3_column_bytes(pStmt, 1) : 0;
680     const char *zTo = (const char*)sqlite3_column_text(pStmt, 2);
681     int nTo = zTo ? sqlite3_column_bytes(pStmt, 2) : 0;
682     int iCost = sqlite3_column_int(pStmt, 3);
683 
684     assert( zFrom!=0 || nFrom==0 );
685     assert( zTo!=0 || nTo==0 );
686     if( nFrom>100 || nTo>100 ) continue;
687     if( iCost<0 ) continue;
688     if( pLang==0 || iLang!=iLangPrev ){
689       EditDist3Lang *pNew;
690       pNew = sqlite3_realloc64(p->a, (p->nLang+1)*sizeof(p->a[0]));
691       if( pNew==0 ){ rc = SQLITE_NOMEM; break; }
692       p->a = pNew;
693       pLang = &p->a[p->nLang];
694       p->nLang++;
695       pLang->iLang = iLang;
696       pLang->iInsCost = 100;
697       pLang->iDelCost = 100;
698       pLang->iSubCost = 150;
699       pLang->pCost = 0;
700       iLangPrev = iLang;
701     }
702     if( nFrom==1 && zFrom[0]=='?' && nTo==0 ){
703       pLang->iDelCost = iCost;
704     }else if( nFrom==0 && nTo==1 && zTo[0]=='?' ){
705       pLang->iInsCost = iCost;
706     }else if( nFrom==1 && nTo==1 && zFrom[0]=='?' && zTo[0]=='?' ){
707       pLang->iSubCost = iCost;
708     }else{
709       EditDist3Cost *pCost;
710       int nExtra = nFrom + nTo - 4;
711       if( nExtra<0 ) nExtra = 0;
712       pCost = sqlite3_malloc64( sizeof(*pCost) + nExtra );
713       if( pCost==0 ){ rc = SQLITE_NOMEM; break; }
714       pCost->nFrom = nFrom;
715       pCost->nTo = nTo;
716       pCost->iCost = iCost;
717       memcpy(pCost->a, zFrom, nFrom);
718       memcpy(pCost->a + nFrom, zTo, nTo);
719       pCost->pNext = pLang->pCost;
720       pLang->pCost = pCost;
721     }
722   }
723   rc2 = sqlite3_finalize(pStmt);
724   if( rc==SQLITE_OK ) rc = rc2;
725   return rc;
726 }
727 
728 /*
729 ** Return the length (in bytes) of a utf-8 character.  Or return a maximum
730 ** of N.
731 */
732 static int utf8Len(unsigned char c, int N){
733   int len = 1;
734   if( c>0x7f ){
735     if( (c&0xe0)==0xc0 ){
736       len = 2;
737     }else if( (c&0xf0)==0xe0 ){
738       len = 3;
739     }else{
740       len = 4;
741     }
742   }
743   if( len>N ) len = N;
744   return len;
745 }
746 
747 /*
748 ** Return TRUE (non-zero) if the To side of the given cost matches
749 ** the given string.
750 */
751 static int matchTo(EditDist3Cost *p, const char *z, int n){
752   if( p->nTo>n ) return 0;
753   if( strncmp(p->a+p->nFrom, z, p->nTo)!=0 ) return 0;
754   return 1;
755 }
756 
757 /*
758 ** Return TRUE (non-zero) if the From side of the given cost matches
759 ** the given string.
760 */
761 static int matchFrom(EditDist3Cost *p, const char *z, int n){
762   assert( p->nFrom<=n );
763   if( strncmp(p->a, z, p->nFrom)!=0 ) return 0;
764   return 1;
765 }
766 
767 /*
768 ** Return TRUE (non-zero) of the next FROM character and the next TO
769 ** character are the same.
770 */
771 static int matchFromTo(
772   EditDist3FromString *pStr,  /* Left hand string */
773   int n1,                     /* Index of comparison character on the left */
774   const char *z2,             /* Right-handl comparison character */
775   int n2                      /* Bytes remaining in z2[] */
776 ){
777   int b1 = pStr->a[n1].nByte;
778   if( b1>n2 ) return 0;
779   if( memcmp(pStr->z+n1, z2, b1)!=0 ) return 0;
780   return 1;
781 }
782 
783 /*
784 ** Delete an EditDist3FromString objecct
785 */
786 static void editDist3FromStringDelete(EditDist3FromString *p){
787   int i;
788   if( p ){
789     for(i=0; i<p->n; i++){
790       sqlite3_free(p->a[i].apDel);
791       sqlite3_free(p->a[i].apSubst);
792     }
793     sqlite3_free(p);
794   }
795 }
796 
797 /*
798 ** Create a EditDist3FromString object.
799 */
800 static EditDist3FromString *editDist3FromStringNew(
801   const EditDist3Lang *pLang,
802   const char *z,
803   int n
804 ){
805   EditDist3FromString *pStr;
806   EditDist3Cost *p;
807   int i;
808 
809   if( z==0 ) return 0;
810   if( n<0 ) n = (int)strlen(z);
811   pStr = sqlite3_malloc64( sizeof(*pStr) + sizeof(pStr->a[0])*n + n + 1 );
812   if( pStr==0 ) return 0;
813   pStr->a = (EditDist3From*)&pStr[1];
814   memset(pStr->a, 0, sizeof(pStr->a[0])*n);
815   pStr->n = n;
816   pStr->z = (char*)&pStr->a[n];
817   memcpy(pStr->z, z, n+1);
818   if( n && z[n-1]=='*' ){
819     pStr->isPrefix = 1;
820     n--;
821     pStr->n--;
822     pStr->z[n] = 0;
823   }else{
824     pStr->isPrefix = 0;
825   }
826 
827   for(i=0; i<n; i++){
828     EditDist3From *pFrom = &pStr->a[i];
829     memset(pFrom, 0, sizeof(*pFrom));
830     pFrom->nByte = utf8Len((unsigned char)z[i], n-i);
831     for(p=pLang->pCost; p; p=p->pNext){
832       EditDist3Cost **apNew;
833       if( i+p->nFrom>n ) continue;
834       if( matchFrom(p, z+i, n-i)==0 ) continue;
835       if( p->nTo==0 ){
836         apNew = sqlite3_realloc64(pFrom->apDel,
837                                 sizeof(*apNew)*(pFrom->nDel+1));
838         if( apNew==0 ) break;
839         pFrom->apDel = apNew;
840         apNew[pFrom->nDel++] = p;
841       }else{
842         apNew = sqlite3_realloc64(pFrom->apSubst,
843                                 sizeof(*apNew)*(pFrom->nSubst+1));
844         if( apNew==0 ) break;
845         pFrom->apSubst = apNew;
846         apNew[pFrom->nSubst++] = p;
847       }
848     }
849     if( p ){
850       editDist3FromStringDelete(pStr);
851       pStr = 0;
852       break;
853     }
854   }
855   return pStr;
856 }
857 
858 /*
859 ** Update entry m[i] such that it is the minimum of its current value
860 ** and m[j]+iCost.
861 **
862 ** If the iCost is 1,000,000 or greater, then consider the cost to be
863 ** infinite and skip the update.
864 */
865 static void updateCost(
866   unsigned int *m,
867   int i,
868   int j,
869   int iCost
870 ){
871   assert( iCost>=0 );
872   if( iCost<10000 ){
873     unsigned int b = m[j] + iCost;
874     if( b<m[i] ) m[i] = b;
875   }
876 }
877 
878 /*
879 ** How much stack space (int bytes) to use for Wagner matrix in
880 ** editDist3Core().  If more space than this is required, the entire
881 ** matrix is taken from the heap.  To reduce the load on the memory
882 ** allocator, make this value as large as practical for the
883 ** architecture in use.
884 */
885 #ifndef SQLITE_SPELLFIX_STACKALLOC_SZ
886 # define SQLITE_SPELLFIX_STACKALLOC_SZ  (1024)
887 #endif
888 
889 /* Compute the edit distance between two strings.
890 **
891 ** If an error occurs, return a negative number which is the error code.
892 **
893 ** If pnMatch is not NULL, then *pnMatch is set to the number of characters
894 ** (not bytes) in z2 that matched the search pattern in *pFrom. If pFrom does
895 ** not contain the pattern for a prefix-search, then this is always the number
896 ** of characters in z2. If pFrom does contain a prefix search pattern, then
897 ** it is the number of characters in the prefix of z2 that was deemed to
898 ** match pFrom.
899 */
900 static int editDist3Core(
901   EditDist3FromString *pFrom,  /* The FROM string */
902   const char *z2,              /* The TO string */
903   int n2,                      /* Length of the TO string */
904   const EditDist3Lang *pLang,  /* Edit weights for a particular language ID */
905   int *pnMatch                 /* OUT: Characters in matched prefix */
906 ){
907   int k, n;
908   int i1, b1;
909   int i2, b2;
910   EditDist3FromString f = *pFrom;
911   EditDist3To *a2;
912   unsigned int *m;
913   unsigned int *pToFree;
914   int szRow;
915   EditDist3Cost *p;
916   int res;
917   sqlite3_uint64 nByte;
918   unsigned int stackSpace[SQLITE_SPELLFIX_STACKALLOC_SZ/sizeof(unsigned int)];
919 
920   /* allocate the Wagner matrix and the aTo[] array for the TO string */
921   n = (f.n+1)*(n2+1);
922   n = (n+1)&~1;
923   nByte = n*sizeof(m[0]) + sizeof(a2[0])*n2;
924   if( nByte<=sizeof(stackSpace) ){
925     m = stackSpace;
926     pToFree = 0;
927   }else{
928     m = pToFree = sqlite3_malloc64( nByte );
929     if( m==0 ) return -1;            /* Out of memory */
930   }
931   a2 = (EditDist3To*)&m[n];
932   memset(a2, 0, sizeof(a2[0])*n2);
933 
934   /* Fill in the a1[] matrix for all characters of the TO string */
935   for(i2=0; i2<n2; i2++){
936     a2[i2].nByte = utf8Len((unsigned char)z2[i2], n2-i2);
937     for(p=pLang->pCost; p; p=p->pNext){
938       EditDist3Cost **apNew;
939       if( p->nFrom>0 ) continue;
940       if( i2+p->nTo>n2 ) continue;
941       if( matchTo(p, z2+i2, n2-i2)==0 ) continue;
942       a2[i2].nIns++;
943       apNew = sqlite3_realloc64(a2[i2].apIns, sizeof(*apNew)*a2[i2].nIns);
944       if( apNew==0 ){
945         res = -1;  /* Out of memory */
946         goto editDist3Abort;
947       }
948       a2[i2].apIns = apNew;
949       a2[i2].apIns[a2[i2].nIns-1] = p;
950     }
951   }
952 
953   /* Prepare to compute the minimum edit distance */
954   szRow = f.n+1;
955   memset(m, 0x01, (n2+1)*szRow*sizeof(m[0]));
956   m[0] = 0;
957 
958   /* First fill in the top-row of the matrix with FROM deletion costs */
959   for(i1=0; i1<f.n; i1 += b1){
960     b1 = f.a[i1].nByte;
961     updateCost(m, i1+b1, i1, pLang->iDelCost);
962     for(k=0; k<f.a[i1].nDel; k++){
963       p = f.a[i1].apDel[k];
964       updateCost(m, i1+p->nFrom, i1, p->iCost);
965     }
966   }
967 
968   /* Fill in all subsequent rows, top-to-bottom, left-to-right */
969   for(i2=0; i2<n2; i2 += b2){
970     int rx;      /* Starting index for current row */
971     int rxp;     /* Starting index for previous row */
972     b2 = a2[i2].nByte;
973     rx = szRow*(i2+b2);
974     rxp = szRow*i2;
975     updateCost(m, rx, rxp, pLang->iInsCost);
976     for(k=0; k<a2[i2].nIns; k++){
977       p = a2[i2].apIns[k];
978       updateCost(m, szRow*(i2+p->nTo), rxp, p->iCost);
979     }
980     for(i1=0; i1<f.n; i1+=b1){
981       int cx;    /* Index of current cell */
982       int cxp;   /* Index of cell immediately to the left */
983       int cxd;   /* Index of cell to the left and one row above */
984       int cxu;   /* Index of cell immediately above */
985       b1 = f.a[i1].nByte;
986       cxp = rx + i1;
987       cx = cxp + b1;
988       cxd = rxp + i1;
989       cxu = cxd + b1;
990       updateCost(m, cx, cxp, pLang->iDelCost);
991       for(k=0; k<f.a[i1].nDel; k++){
992         p = f.a[i1].apDel[k];
993         updateCost(m, cxp+p->nFrom, cxp, p->iCost);
994       }
995       updateCost(m, cx, cxu, pLang->iInsCost);
996       if( matchFromTo(&f, i1, z2+i2, n2-i2) ){
997         updateCost(m, cx, cxd, 0);
998       }
999       updateCost(m, cx, cxd, pLang->iSubCost);
1000       for(k=0; k<f.a[i1].nSubst; k++){
1001         p = f.a[i1].apSubst[k];
1002         if( matchTo(p, z2+i2, n2-i2) ){
1003           updateCost(m, cxd+p->nFrom+szRow*p->nTo, cxd, p->iCost);
1004         }
1005       }
1006     }
1007   }
1008 
1009 #if 0  /* Enable for debugging */
1010   printf("         ^");
1011   for(i1=0; i1<f.n; i1++) printf(" %c-%2x", f.z[i1], f.z[i1]&0xff);
1012   printf("\n   ^:");
1013   for(i1=0; i1<szRow; i1++){
1014     int v = m[i1];
1015     if( v>9999 ) printf(" ****");
1016     else         printf(" %4d", v);
1017   }
1018   printf("\n");
1019   for(i2=0; i2<n2; i2++){
1020     printf("%c-%02x:", z2[i2], z2[i2]&0xff);
1021     for(i1=0; i1<szRow; i1++){
1022       int v = m[(i2+1)*szRow+i1];
1023       if( v>9999 ) printf(" ****");
1024       else         printf(" %4d", v);
1025     }
1026     printf("\n");
1027   }
1028 #endif
1029 
1030   /* Free memory allocations and return the result */
1031   res = (int)m[szRow*(n2+1)-1];
1032   n = n2;
1033   if( f.isPrefix ){
1034     for(i2=1; i2<=n2; i2++){
1035       int b = m[szRow*i2-1];
1036       if( b<=res ){
1037         res = b;
1038         n = i2 - 1;
1039       }
1040     }
1041   }
1042   if( pnMatch ){
1043     int nExtra = 0;
1044     for(k=0; k<n; k++){
1045       if( (z2[k] & 0xc0)==0x80 ) nExtra++;
1046     }
1047     *pnMatch = n - nExtra;
1048   }
1049 
1050 editDist3Abort:
1051   for(i2=0; i2<n2; i2++) sqlite3_free(a2[i2].apIns);
1052   sqlite3_free(pToFree);
1053   return res;
1054 }
1055 
1056 /*
1057 ** Get an appropriate EditDist3Lang object.
1058 */
1059 static const EditDist3Lang *editDist3FindLang(
1060   EditDist3Config *pConfig,
1061   int iLang
1062 ){
1063   int i;
1064   for(i=0; i<pConfig->nLang; i++){
1065     if( pConfig->a[i].iLang==iLang ) return &pConfig->a[i];
1066   }
1067   return &editDist3Lang;
1068 }
1069 
1070 /*
1071 ** Function:    editdist3(A,B,iLang)
1072 **              editdist3(tablename)
1073 **
1074 ** Return the cost of transforming string A into string B using edit
1075 ** weights for iLang.
1076 **
1077 ** The second form loads edit weights into memory from a table.
1078 */
1079 static void editDist3SqlFunc(
1080   sqlite3_context *context,
1081   int argc,
1082   sqlite3_value **argv
1083 ){
1084   EditDist3Config *pConfig = (EditDist3Config*)sqlite3_user_data(context);
1085   sqlite3 *db = sqlite3_context_db_handle(context);
1086   int rc;
1087   if( argc==1 ){
1088     const char *zTable = (const char*)sqlite3_value_text(argv[0]);
1089     rc = editDist3ConfigLoad(pConfig, db, zTable);
1090     if( rc ) sqlite3_result_error_code(context, rc);
1091   }else{
1092     const char *zA = (const char*)sqlite3_value_text(argv[0]);
1093     const char *zB = (const char*)sqlite3_value_text(argv[1]);
1094     int nA = sqlite3_value_bytes(argv[0]);
1095     int nB = sqlite3_value_bytes(argv[1]);
1096     int iLang = argc==3 ? sqlite3_value_int(argv[2]) : 0;
1097     const EditDist3Lang *pLang = editDist3FindLang(pConfig, iLang);
1098     EditDist3FromString *pFrom;
1099     int dist;
1100 
1101     pFrom = editDist3FromStringNew(pLang, zA, nA);
1102     if( pFrom==0 ){
1103       sqlite3_result_error_nomem(context);
1104       return;
1105     }
1106     dist = editDist3Core(pFrom, zB, nB, pLang, 0);
1107     editDist3FromStringDelete(pFrom);
1108     if( dist==(-1) ){
1109       sqlite3_result_error_nomem(context);
1110     }else{
1111       sqlite3_result_int(context, dist);
1112     }
1113   }
1114 }
1115 
1116 /*
1117 ** Register the editDist3 function with SQLite
1118 */
1119 static int editDist3Install(sqlite3 *db){
1120   int rc;
1121   EditDist3Config *pConfig = sqlite3_malloc64( sizeof(*pConfig) );
1122   if( pConfig==0 ) return SQLITE_NOMEM;
1123   memset(pConfig, 0, sizeof(*pConfig));
1124   rc = sqlite3_create_function_v2(db, "editdist3",
1125               2, SQLITE_UTF8, pConfig, editDist3SqlFunc, 0, 0, 0);
1126   if( rc==SQLITE_OK ){
1127     rc = sqlite3_create_function_v2(db, "editdist3",
1128                 3, SQLITE_UTF8, pConfig, editDist3SqlFunc, 0, 0, 0);
1129   }
1130   if( rc==SQLITE_OK ){
1131     rc = sqlite3_create_function_v2(db, "editdist3",
1132                 1, SQLITE_UTF8, pConfig, editDist3SqlFunc, 0, 0,
1133                 editDist3ConfigDelete);
1134   }else{
1135     sqlite3_free(pConfig);
1136   }
1137   return rc;
1138 }
1139 /* End configurable cost unicode edit distance routines
1140 ******************************************************************************
1141 ******************************************************************************
1142 ** Begin transliterate unicode-to-ascii implementation
1143 */
1144 
1145 #if !SQLITE_AMALGAMATION
1146 /*
1147 ** This lookup table is used to help decode the first byte of
1148 ** a multi-byte UTF8 character.
1149 */
1150 static const unsigned char sqlite3Utf8Trans1[] = {
1151   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1152   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1153   0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
1154   0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
1155   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1156   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1157   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1158   0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
1159 };
1160 #endif
1161 
1162 /*
1163 ** Return the value of the first UTF-8 character in the string.
1164 */
1165 static int utf8Read(const unsigned char *z, int n, int *pSize){
1166   int c, i;
1167 
1168   /* All callers to this routine (in the current implementation)
1169   ** always have n>0. */
1170   if( NEVER(n==0) ){
1171     c = i = 0;
1172   }else{
1173     c = z[0];
1174     i = 1;
1175     if( c>=0xc0 ){
1176       c = sqlite3Utf8Trans1[c-0xc0];
1177       while( i<n && (z[i] & 0xc0)==0x80 ){
1178         c = (c<<6) + (0x3f & z[i++]);
1179       }
1180     }
1181   }
1182   *pSize = i;
1183   return c;
1184 }
1185 
1186 /*
1187 ** Return the number of characters in the utf-8 string in the nIn byte
1188 ** buffer pointed to by zIn.
1189 */
1190 static int utf8Charlen(const char *zIn, int nIn){
1191   int i;
1192   int nChar = 0;
1193   for(i=0; i<nIn; nChar++){
1194     int sz;
1195     utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz);
1196     i += sz;
1197   }
1198   return nChar;
1199 }
1200 
1201 /*
1202 ** Table of translations from unicode characters into ASCII.
1203 */
1204 static const struct {
1205  unsigned short int cFrom;
1206  unsigned char cTo0, cTo1;
1207 } translit[] = {
1208   { 0x00A0,  0x20, 0x00 },  /*   to   */
1209   { 0x00B5,  0x75, 0x00 },  /* µ to u */
1210   { 0x00C0,  0x41, 0x00 },  /* À to A */
1211   { 0x00C1,  0x41, 0x00 },  /* Á to A */
1212   { 0x00C2,  0x41, 0x00 },  /* Â to A */
1213   { 0x00C3,  0x41, 0x00 },  /* Ã to A */
1214   { 0x00C4,  0x41, 0x65 },  /* Ä to Ae */
1215   { 0x00C5,  0x41, 0x61 },  /* Å to Aa */
1216   { 0x00C6,  0x41, 0x45 },  /* Æ to AE */
1217   { 0x00C7,  0x43, 0x00 },  /* Ç to C */
1218   { 0x00C8,  0x45, 0x00 },  /* È to E */
1219   { 0x00C9,  0x45, 0x00 },  /* É to E */
1220   { 0x00CA,  0x45, 0x00 },  /* Ê to E */
1221   { 0x00CB,  0x45, 0x00 },  /* Ë to E */
1222   { 0x00CC,  0x49, 0x00 },  /* Ì to I */
1223   { 0x00CD,  0x49, 0x00 },  /* Í to I */
1224   { 0x00CE,  0x49, 0x00 },  /* Î to I */
1225   { 0x00CF,  0x49, 0x00 },  /* Ï to I */
1226   { 0x00D0,  0x44, 0x00 },  /* Ð to D */
1227   { 0x00D1,  0x4E, 0x00 },  /* Ñ to N */
1228   { 0x00D2,  0x4F, 0x00 },  /* Ò to O */
1229   { 0x00D3,  0x4F, 0x00 },  /* Ó to O */
1230   { 0x00D4,  0x4F, 0x00 },  /* Ô to O */
1231   { 0x00D5,  0x4F, 0x00 },  /* Õ to O */
1232   { 0x00D6,  0x4F, 0x65 },  /* Ö to Oe */
1233   { 0x00D7,  0x78, 0x00 },  /* × to x */
1234   { 0x00D8,  0x4F, 0x00 },  /* Ø to O */
1235   { 0x00D9,  0x55, 0x00 },  /* Ù to U */
1236   { 0x00DA,  0x55, 0x00 },  /* Ú to U */
1237   { 0x00DB,  0x55, 0x00 },  /* Û to U */
1238   { 0x00DC,  0x55, 0x65 },  /* Ü to Ue */
1239   { 0x00DD,  0x59, 0x00 },  /* Ý to Y */
1240   { 0x00DE,  0x54, 0x68 },  /* Þ to Th */
1241   { 0x00DF,  0x73, 0x73 },  /* ß to ss */
1242   { 0x00E0,  0x61, 0x00 },  /* à to a */
1243   { 0x00E1,  0x61, 0x00 },  /* á to a */
1244   { 0x00E2,  0x61, 0x00 },  /* â to a */
1245   { 0x00E3,  0x61, 0x00 },  /* ã to a */
1246   { 0x00E4,  0x61, 0x65 },  /* ä to ae */
1247   { 0x00E5,  0x61, 0x61 },  /* å to aa */
1248   { 0x00E6,  0x61, 0x65 },  /* æ to ae */
1249   { 0x00E7,  0x63, 0x00 },  /* ç to c */
1250   { 0x00E8,  0x65, 0x00 },  /* è to e */
1251   { 0x00E9,  0x65, 0x00 },  /* é to e */
1252   { 0x00EA,  0x65, 0x00 },  /* ê to e */
1253   { 0x00EB,  0x65, 0x00 },  /* ë to e */
1254   { 0x00EC,  0x69, 0x00 },  /* ì to i */
1255   { 0x00ED,  0x69, 0x00 },  /* í to i */
1256   { 0x00EE,  0x69, 0x00 },  /* î to i */
1257   { 0x00EF,  0x69, 0x00 },  /* ï to i */
1258   { 0x00F0,  0x64, 0x00 },  /* ð to d */
1259   { 0x00F1,  0x6E, 0x00 },  /* ñ to n */
1260   { 0x00F2,  0x6F, 0x00 },  /* ò to o */
1261   { 0x00F3,  0x6F, 0x00 },  /* ó to o */
1262   { 0x00F4,  0x6F, 0x00 },  /* ô to o */
1263   { 0x00F5,  0x6F, 0x00 },  /* õ to o */
1264   { 0x00F6,  0x6F, 0x65 },  /* ö to oe */
1265   { 0x00F7,  0x3A, 0x00 },  /* ÷ to : */
1266   { 0x00F8,  0x6F, 0x00 },  /* ø to o */
1267   { 0x00F9,  0x75, 0x00 },  /* ù to u */
1268   { 0x00FA,  0x75, 0x00 },  /* ú to u */
1269   { 0x00FB,  0x75, 0x00 },  /* û to u */
1270   { 0x00FC,  0x75, 0x65 },  /* ü to ue */
1271   { 0x00FD,  0x79, 0x00 },  /* ý to y */
1272   { 0x00FE,  0x74, 0x68 },  /* þ to th */
1273   { 0x00FF,  0x79, 0x00 },  /* ÿ to y */
1274   { 0x0100,  0x41, 0x00 },  /* Ā to A */
1275   { 0x0101,  0x61, 0x00 },  /* ā to a */
1276   { 0x0102,  0x41, 0x00 },  /* Ă to A */
1277   { 0x0103,  0x61, 0x00 },  /* ă to a */
1278   { 0x0104,  0x41, 0x00 },  /* Ą to A */
1279   { 0x0105,  0x61, 0x00 },  /* ą to a */
1280   { 0x0106,  0x43, 0x00 },  /* Ć to C */
1281   { 0x0107,  0x63, 0x00 },  /* ć to c */
1282   { 0x0108,  0x43, 0x68 },  /* Ĉ to Ch */
1283   { 0x0109,  0x63, 0x68 },  /* ĉ to ch */
1284   { 0x010A,  0x43, 0x00 },  /* Ċ to C */
1285   { 0x010B,  0x63, 0x00 },  /* ċ to c */
1286   { 0x010C,  0x43, 0x00 },  /* Č to C */
1287   { 0x010D,  0x63, 0x00 },  /* č to c */
1288   { 0x010E,  0x44, 0x00 },  /* Ď to D */
1289   { 0x010F,  0x64, 0x00 },  /* ď to d */
1290   { 0x0110,  0x44, 0x00 },  /* Đ to D */
1291   { 0x0111,  0x64, 0x00 },  /* đ to d */
1292   { 0x0112,  0x45, 0x00 },  /* Ē to E */
1293   { 0x0113,  0x65, 0x00 },  /* ē to e */
1294   { 0x0114,  0x45, 0x00 },  /* Ĕ to E */
1295   { 0x0115,  0x65, 0x00 },  /* ĕ to e */
1296   { 0x0116,  0x45, 0x00 },  /* Ė to E */
1297   { 0x0117,  0x65, 0x00 },  /* ė to e */
1298   { 0x0118,  0x45, 0x00 },  /* Ę to E */
1299   { 0x0119,  0x65, 0x00 },  /* ę to e */
1300   { 0x011A,  0x45, 0x00 },  /* Ě to E */
1301   { 0x011B,  0x65, 0x00 },  /* ě to e */
1302   { 0x011C,  0x47, 0x68 },  /* Ĝ to Gh */
1303   { 0x011D,  0x67, 0x68 },  /* ĝ to gh */
1304   { 0x011E,  0x47, 0x00 },  /* Ğ to G */
1305   { 0x011F,  0x67, 0x00 },  /* ğ to g */
1306   { 0x0120,  0x47, 0x00 },  /* Ġ to G */
1307   { 0x0121,  0x67, 0x00 },  /* ġ to g */
1308   { 0x0122,  0x47, 0x00 },  /* Ģ to G */
1309   { 0x0123,  0x67, 0x00 },  /* ģ to g */
1310   { 0x0124,  0x48, 0x68 },  /* Ĥ to Hh */
1311   { 0x0125,  0x68, 0x68 },  /* ĥ to hh */
1312   { 0x0126,  0x48, 0x00 },  /* Ħ to H */
1313   { 0x0127,  0x68, 0x00 },  /* ħ to h */
1314   { 0x0128,  0x49, 0x00 },  /* Ĩ to I */
1315   { 0x0129,  0x69, 0x00 },  /* ĩ to i */
1316   { 0x012A,  0x49, 0x00 },  /* Ī to I */
1317   { 0x012B,  0x69, 0x00 },  /* ī to i */
1318   { 0x012C,  0x49, 0x00 },  /* Ĭ to I */
1319   { 0x012D,  0x69, 0x00 },  /* ĭ to i */
1320   { 0x012E,  0x49, 0x00 },  /* Į to I */
1321   { 0x012F,  0x69, 0x00 },  /* į to i */
1322   { 0x0130,  0x49, 0x00 },  /* İ to I */
1323   { 0x0131,  0x69, 0x00 },  /* ı to i */
1324   { 0x0132,  0x49, 0x4A },  /* IJ to IJ */
1325   { 0x0133,  0x69, 0x6A },  /* ij to ij */
1326   { 0x0134,  0x4A, 0x68 },  /* Ĵ to Jh */
1327   { 0x0135,  0x6A, 0x68 },  /* ĵ to jh */
1328   { 0x0136,  0x4B, 0x00 },  /* Ķ to K */
1329   { 0x0137,  0x6B, 0x00 },  /* ķ to k */
1330   { 0x0138,  0x6B, 0x00 },  /* ĸ to k */
1331   { 0x0139,  0x4C, 0x00 },  /* Ĺ to L */
1332   { 0x013A,  0x6C, 0x00 },  /* ĺ to l */
1333   { 0x013B,  0x4C, 0x00 },  /* Ļ to L */
1334   { 0x013C,  0x6C, 0x00 },  /* ļ to l */
1335   { 0x013D,  0x4C, 0x00 },  /* Ľ to L */
1336   { 0x013E,  0x6C, 0x00 },  /* ľ to l */
1337   { 0x013F,  0x4C, 0x2E },  /* Ŀ to L. */
1338   { 0x0140,  0x6C, 0x2E },  /* ŀ to l. */
1339   { 0x0141,  0x4C, 0x00 },  /* Ł to L */
1340   { 0x0142,  0x6C, 0x00 },  /* ł to l */
1341   { 0x0143,  0x4E, 0x00 },  /* Ń to N */
1342   { 0x0144,  0x6E, 0x00 },  /* ń to n */
1343   { 0x0145,  0x4E, 0x00 },  /* Ņ to N */
1344   { 0x0146,  0x6E, 0x00 },  /* ņ to n */
1345   { 0x0147,  0x4E, 0x00 },  /* Ň to N */
1346   { 0x0148,  0x6E, 0x00 },  /* ň to n */
1347   { 0x0149,  0x27, 0x6E },  /* ʼn to 'n */
1348   { 0x014A,  0x4E, 0x47 },  /* Ŋ to NG */
1349   { 0x014B,  0x6E, 0x67 },  /* ŋ to ng */
1350   { 0x014C,  0x4F, 0x00 },  /* Ō to O */
1351   { 0x014D,  0x6F, 0x00 },  /* ō to o */
1352   { 0x014E,  0x4F, 0x00 },  /* Ŏ to O */
1353   { 0x014F,  0x6F, 0x00 },  /* ŏ to o */
1354   { 0x0150,  0x4F, 0x00 },  /* Ő to O */
1355   { 0x0151,  0x6F, 0x00 },  /* ő to o */
1356   { 0x0152,  0x4F, 0x45 },  /* Œ to OE */
1357   { 0x0153,  0x6F, 0x65 },  /* œ to oe */
1358   { 0x0154,  0x52, 0x00 },  /* Ŕ to R */
1359   { 0x0155,  0x72, 0x00 },  /* ŕ to r */
1360   { 0x0156,  0x52, 0x00 },  /* Ŗ to R */
1361   { 0x0157,  0x72, 0x00 },  /* ŗ to r */
1362   { 0x0158,  0x52, 0x00 },  /* Ř to R */
1363   { 0x0159,  0x72, 0x00 },  /* ř to r */
1364   { 0x015A,  0x53, 0x00 },  /* Ś to S */
1365   { 0x015B,  0x73, 0x00 },  /* ś to s */
1366   { 0x015C,  0x53, 0x68 },  /* Ŝ to Sh */
1367   { 0x015D,  0x73, 0x68 },  /* ŝ to sh */
1368   { 0x015E,  0x53, 0x00 },  /* Ş to S */
1369   { 0x015F,  0x73, 0x00 },  /* ş to s */
1370   { 0x0160,  0x53, 0x00 },  /* Š to S */
1371   { 0x0161,  0x73, 0x00 },  /* š to s */
1372   { 0x0162,  0x54, 0x00 },  /* Ţ to T */
1373   { 0x0163,  0x74, 0x00 },  /* ţ to t */
1374   { 0x0164,  0x54, 0x00 },  /* Ť to T */
1375   { 0x0165,  0x74, 0x00 },  /* ť to t */
1376   { 0x0166,  0x54, 0x00 },  /* Ŧ to T */
1377   { 0x0167,  0x74, 0x00 },  /* ŧ to t */
1378   { 0x0168,  0x55, 0x00 },  /* Ũ to U */
1379   { 0x0169,  0x75, 0x00 },  /* ũ to u */
1380   { 0x016A,  0x55, 0x00 },  /* Ū to U */
1381   { 0x016B,  0x75, 0x00 },  /* ū to u */
1382   { 0x016C,  0x55, 0x00 },  /* Ŭ to U */
1383   { 0x016D,  0x75, 0x00 },  /* ŭ to u */
1384   { 0x016E,  0x55, 0x00 },  /* Ů to U */
1385   { 0x016F,  0x75, 0x00 },  /* ů to u */
1386   { 0x0170,  0x55, 0x00 },  /* Ű to U */
1387   { 0x0171,  0x75, 0x00 },  /* ű to u */
1388   { 0x0172,  0x55, 0x00 },  /* Ų to U */
1389   { 0x0173,  0x75, 0x00 },  /* ų to u */
1390   { 0x0174,  0x57, 0x00 },  /* Ŵ to W */
1391   { 0x0175,  0x77, 0x00 },  /* ŵ to w */
1392   { 0x0176,  0x59, 0x00 },  /* Ŷ to Y */
1393   { 0x0177,  0x79, 0x00 },  /* ŷ to y */
1394   { 0x0178,  0x59, 0x00 },  /* Ÿ to Y */
1395   { 0x0179,  0x5A, 0x00 },  /* Ź to Z */
1396   { 0x017A,  0x7A, 0x00 },  /* ź to z */
1397   { 0x017B,  0x5A, 0x00 },  /* Ż to Z */
1398   { 0x017C,  0x7A, 0x00 },  /* ż to z */
1399   { 0x017D,  0x5A, 0x00 },  /* Ž to Z */
1400   { 0x017E,  0x7A, 0x00 },  /* ž to z */
1401   { 0x017F,  0x73, 0x00 },  /* ſ to s */
1402   { 0x0192,  0x66, 0x00 },  /* ƒ to f */
1403   { 0x0218,  0x53, 0x00 },  /* Ș to S */
1404   { 0x0219,  0x73, 0x00 },  /* ș to s */
1405   { 0x021A,  0x54, 0x00 },  /* Ț to T */
1406   { 0x021B,  0x74, 0x00 },  /* ț to t */
1407   { 0x0386,  0x41, 0x00 },  /* Ά to A */
1408   { 0x0388,  0x45, 0x00 },  /* Έ to E */
1409   { 0x0389,  0x49, 0x00 },  /* Ή to I */
1410   { 0x038A,  0x49, 0x00 },  /* Ί to I */
1411   { 0x038C,  0x4f, 0x00 },  /* Ό to O */
1412   { 0x038E,  0x59, 0x00 },  /* Ύ to Y */
1413   { 0x038F,  0x4f, 0x00 },  /* Ώ to O */
1414   { 0x0390,  0x69, 0x00 },  /* ΐ to i */
1415   { 0x0391,  0x41, 0x00 },  /* Α to A */
1416   { 0x0392,  0x42, 0x00 },  /* Β to B */
1417   { 0x0393,  0x47, 0x00 },  /* Γ to G */
1418   { 0x0394,  0x44, 0x00 },  /* Δ to D */
1419   { 0x0395,  0x45, 0x00 },  /* Ε to E */
1420   { 0x0396,  0x5a, 0x00 },  /* Ζ to Z */
1421   { 0x0397,  0x49, 0x00 },  /* Η to I */
1422   { 0x0398,  0x54, 0x68 },  /* Θ to Th */
1423   { 0x0399,  0x49, 0x00 },  /* Ι to I */
1424   { 0x039A,  0x4b, 0x00 },  /* Κ to K */
1425   { 0x039B,  0x4c, 0x00 },  /* Λ to L */
1426   { 0x039C,  0x4d, 0x00 },  /* Μ to M */
1427   { 0x039D,  0x4e, 0x00 },  /* Ν to N */
1428   { 0x039E,  0x58, 0x00 },  /* Ξ to X */
1429   { 0x039F,  0x4f, 0x00 },  /* Ο to O */
1430   { 0x03A0,  0x50, 0x00 },  /* Π to P */
1431   { 0x03A1,  0x52, 0x00 },  /* Ρ to R */
1432   { 0x03A3,  0x53, 0x00 },  /* Σ to S */
1433   { 0x03A4,  0x54, 0x00 },  /* Τ to T */
1434   { 0x03A5,  0x59, 0x00 },  /* Υ to Y */
1435   { 0x03A6,  0x46, 0x00 },  /* Φ to F */
1436   { 0x03A7,  0x43, 0x68 },  /* Χ to Ch */
1437   { 0x03A8,  0x50, 0x73 },  /* Ψ to Ps */
1438   { 0x03A9,  0x4f, 0x00 },  /* Ω to O */
1439   { 0x03AA,  0x49, 0x00 },  /* Ϊ to I */
1440   { 0x03AB,  0x59, 0x00 },  /* Ϋ to Y */
1441   { 0x03AC,  0x61, 0x00 },  /* ά to a */
1442   { 0x03AD,  0x65, 0x00 },  /* έ to e */
1443   { 0x03AE,  0x69, 0x00 },  /* ή to i */
1444   { 0x03AF,  0x69, 0x00 },  /* ί to i */
1445   { 0x03B1,  0x61, 0x00 },  /* α to a */
1446   { 0x03B2,  0x62, 0x00 },  /* β to b */
1447   { 0x03B3,  0x67, 0x00 },  /* γ to g */
1448   { 0x03B4,  0x64, 0x00 },  /* δ to d */
1449   { 0x03B5,  0x65, 0x00 },  /* ε to e */
1450   { 0x03B6,  0x7a, 0x00 },  /* ζ to z */
1451   { 0x03B7,  0x69, 0x00 },  /* η to i */
1452   { 0x03B8,  0x74, 0x68 },  /* θ to th */
1453   { 0x03B9,  0x69, 0x00 },  /* ι to i */
1454   { 0x03BA,  0x6b, 0x00 },  /* κ to k */
1455   { 0x03BB,  0x6c, 0x00 },  /* λ to l */
1456   { 0x03BC,  0x6d, 0x00 },  /* μ to m */
1457   { 0x03BD,  0x6e, 0x00 },  /* ν to n */
1458   { 0x03BE,  0x78, 0x00 },  /* ξ to x */
1459   { 0x03BF,  0x6f, 0x00 },  /* ο to o */
1460   { 0x03C0,  0x70, 0x00 },  /* π to p */
1461   { 0x03C1,  0x72, 0x00 },  /* ρ to r */
1462   { 0x03C3,  0x73, 0x00 },  /* σ to s */
1463   { 0x03C4,  0x74, 0x00 },  /* τ to t */
1464   { 0x03C5,  0x79, 0x00 },  /* υ to y */
1465   { 0x03C6,  0x66, 0x00 },  /* φ to f */
1466   { 0x03C7,  0x63, 0x68 },  /* χ to ch */
1467   { 0x03C8,  0x70, 0x73 },  /* ψ to ps */
1468   { 0x03C9,  0x6f, 0x00 },  /* ω to o */
1469   { 0x03CA,  0x69, 0x00 },  /* ϊ to i */
1470   { 0x03CB,  0x79, 0x00 },  /* ϋ to y */
1471   { 0x03CC,  0x6f, 0x00 },  /* ό to o */
1472   { 0x03CD,  0x79, 0x00 },  /* ύ to y */
1473   { 0x03CE,  0x69, 0x00 },  /* ώ to i */
1474   { 0x0400,  0x45, 0x00 },  /* Ѐ to E */
1475   { 0x0401,  0x45, 0x00 },  /* Ё to E */
1476   { 0x0402,  0x44, 0x00 },  /* Ђ to D */
1477   { 0x0403,  0x47, 0x00 },  /* Ѓ to G */
1478   { 0x0404,  0x45, 0x00 },  /* Є to E */
1479   { 0x0405,  0x5a, 0x00 },  /* Ѕ to Z */
1480   { 0x0406,  0x49, 0x00 },  /* І to I */
1481   { 0x0407,  0x49, 0x00 },  /* Ї to I */
1482   { 0x0408,  0x4a, 0x00 },  /* Ј to J */
1483   { 0x0409,  0x49, 0x00 },  /* Љ to I */
1484   { 0x040A,  0x4e, 0x00 },  /* Њ to N */
1485   { 0x040B,  0x44, 0x00 },  /* Ћ to D */
1486   { 0x040C,  0x4b, 0x00 },  /* Ќ to K */
1487   { 0x040D,  0x49, 0x00 },  /* Ѝ to I */
1488   { 0x040E,  0x55, 0x00 },  /* Ў to U */
1489   { 0x040F,  0x44, 0x00 },  /* Џ to D */
1490   { 0x0410,  0x41, 0x00 },  /* А to A */
1491   { 0x0411,  0x42, 0x00 },  /* Б to B */
1492   { 0x0412,  0x56, 0x00 },  /* В to V */
1493   { 0x0413,  0x47, 0x00 },  /* Г to G */
1494   { 0x0414,  0x44, 0x00 },  /* Д to D */
1495   { 0x0415,  0x45, 0x00 },  /* Е to E */
1496   { 0x0416,  0x5a, 0x68 },  /* Ж to Zh */
1497   { 0x0417,  0x5a, 0x00 },  /* З to Z */
1498   { 0x0418,  0x49, 0x00 },  /* И to I */
1499   { 0x0419,  0x49, 0x00 },  /* Й to I */
1500   { 0x041A,  0x4b, 0x00 },  /* К to K */
1501   { 0x041B,  0x4c, 0x00 },  /* Л to L */
1502   { 0x041C,  0x4d, 0x00 },  /* М to M */
1503   { 0x041D,  0x4e, 0x00 },  /* Н to N */
1504   { 0x041E,  0x4f, 0x00 },  /* О to O */
1505   { 0x041F,  0x50, 0x00 },  /* П to P */
1506   { 0x0420,  0x52, 0x00 },  /* Р to R */
1507   { 0x0421,  0x53, 0x00 },  /* С to S */
1508   { 0x0422,  0x54, 0x00 },  /* Т to T */
1509   { 0x0423,  0x55, 0x00 },  /* У to U */
1510   { 0x0424,  0x46, 0x00 },  /* Ф to F */
1511   { 0x0425,  0x4b, 0x68 },  /* Х to Kh */
1512   { 0x0426,  0x54, 0x63 },  /* Ц to Tc */
1513   { 0x0427,  0x43, 0x68 },  /* Ч to Ch */
1514   { 0x0428,  0x53, 0x68 },  /* Ш to Sh */
1515   { 0x0429,  0x53, 0x68 },  /* Щ to Shch */
1516   { 0x042A,  0x61, 0x00 },  /*  to A */
1517   { 0x042B,  0x59, 0x00 },  /* Ы to Y */
1518   { 0x042C,  0x59, 0x00 },  /*  to Y */
1519   { 0x042D,  0x45, 0x00 },  /* Э to E */
1520   { 0x042E,  0x49, 0x75 },  /* Ю to Iu */
1521   { 0x042F,  0x49, 0x61 },  /* Я to Ia */
1522   { 0x0430,  0x61, 0x00 },  /* а to a */
1523   { 0x0431,  0x62, 0x00 },  /* б to b */
1524   { 0x0432,  0x76, 0x00 },  /* в to v */
1525   { 0x0433,  0x67, 0x00 },  /* г to g */
1526   { 0x0434,  0x64, 0x00 },  /* д to d */
1527   { 0x0435,  0x65, 0x00 },  /* е to e */
1528   { 0x0436,  0x7a, 0x68 },  /* ж to zh */
1529   { 0x0437,  0x7a, 0x00 },  /* з to z */
1530   { 0x0438,  0x69, 0x00 },  /* и to i */
1531   { 0x0439,  0x69, 0x00 },  /* й to i */
1532   { 0x043A,  0x6b, 0x00 },  /* к to k */
1533   { 0x043B,  0x6c, 0x00 },  /* л to l */
1534   { 0x043C,  0x6d, 0x00 },  /* м to m */
1535   { 0x043D,  0x6e, 0x00 },  /* н to n */
1536   { 0x043E,  0x6f, 0x00 },  /* о to o */
1537   { 0x043F,  0x70, 0x00 },  /* п to p */
1538   { 0x0440,  0x72, 0x00 },  /* р to r */
1539   { 0x0441,  0x73, 0x00 },  /* с to s */
1540   { 0x0442,  0x74, 0x00 },  /* т to t */
1541   { 0x0443,  0x75, 0x00 },  /* у to u */
1542   { 0x0444,  0x66, 0x00 },  /* ф to f */
1543   { 0x0445,  0x6b, 0x68 },  /* х to kh */
1544   { 0x0446,  0x74, 0x63 },  /* ц to tc */
1545   { 0x0447,  0x63, 0x68 },  /* ч to ch */
1546   { 0x0448,  0x73, 0x68 },  /* ш to sh */
1547   { 0x0449,  0x73, 0x68 },  /* щ to shch */
1548   { 0x044A,  0x61, 0x00 },  /*  to a */
1549   { 0x044B,  0x79, 0x00 },  /* ы to y */
1550   { 0x044C,  0x79, 0x00 },  /*  to y */
1551   { 0x044D,  0x65, 0x00 },  /* э to e */
1552   { 0x044E,  0x69, 0x75 },  /* ю to iu */
1553   { 0x044F,  0x69, 0x61 },  /* я to ia */
1554   { 0x0450,  0x65, 0x00 },  /* ѐ to e */
1555   { 0x0451,  0x65, 0x00 },  /* ё to e */
1556   { 0x0452,  0x64, 0x00 },  /* ђ to d */
1557   { 0x0453,  0x67, 0x00 },  /* ѓ to g */
1558   { 0x0454,  0x65, 0x00 },  /* є to e */
1559   { 0x0455,  0x7a, 0x00 },  /* ѕ to z */
1560   { 0x0456,  0x69, 0x00 },  /* і to i */
1561   { 0x0457,  0x69, 0x00 },  /* ї to i */
1562   { 0x0458,  0x6a, 0x00 },  /* ј to j */
1563   { 0x0459,  0x69, 0x00 },  /* љ to i */
1564   { 0x045A,  0x6e, 0x00 },  /* њ to n */
1565   { 0x045B,  0x64, 0x00 },  /* ћ to d */
1566   { 0x045C,  0x6b, 0x00 },  /* ќ to k */
1567   { 0x045D,  0x69, 0x00 },  /* ѝ to i */
1568   { 0x045E,  0x75, 0x00 },  /* ў to u */
1569   { 0x045F,  0x64, 0x00 },  /* џ to d */
1570   { 0x1E02,  0x42, 0x00 },  /* Ḃ to B */
1571   { 0x1E03,  0x62, 0x00 },  /* ḃ to b */
1572   { 0x1E0A,  0x44, 0x00 },  /* Ḋ to D */
1573   { 0x1E0B,  0x64, 0x00 },  /* ḋ to d */
1574   { 0x1E1E,  0x46, 0x00 },  /* Ḟ to F */
1575   { 0x1E1F,  0x66, 0x00 },  /* ḟ to f */
1576   { 0x1E40,  0x4D, 0x00 },  /* Ṁ to M */
1577   { 0x1E41,  0x6D, 0x00 },  /* ṁ to m */
1578   { 0x1E56,  0x50, 0x00 },  /* Ṗ to P */
1579   { 0x1E57,  0x70, 0x00 },  /* ṗ to p */
1580   { 0x1E60,  0x53, 0x00 },  /* Ṡ to S */
1581   { 0x1E61,  0x73, 0x00 },  /* ṡ to s */
1582   { 0x1E6A,  0x54, 0x00 },  /* Ṫ to T */
1583   { 0x1E6B,  0x74, 0x00 },  /* ṫ to t */
1584   { 0x1E80,  0x57, 0x00 },  /* Ẁ to W */
1585   { 0x1E81,  0x77, 0x00 },  /* ẁ to w */
1586   { 0x1E82,  0x57, 0x00 },  /* Ẃ to W */
1587   { 0x1E83,  0x77, 0x00 },  /* ẃ to w */
1588   { 0x1E84,  0x57, 0x00 },  /* Ẅ to W */
1589   { 0x1E85,  0x77, 0x00 },  /* ẅ to w */
1590   { 0x1EF2,  0x59, 0x00 },  /* Ỳ to Y */
1591   { 0x1EF3,  0x79, 0x00 },  /* ỳ to y */
1592   { 0xFB00,  0x66, 0x66 },  /* ff to ff */
1593   { 0xFB01,  0x66, 0x69 },  /* fi to fi */
1594   { 0xFB02,  0x66, 0x6C },  /* fl to fl */
1595   { 0xFB05,  0x73, 0x74 },  /* ſt to st */
1596   { 0xFB06,  0x73, 0x74 },  /* st to st */
1597 };
1598 
1599 /*
1600 ** Convert the input string from UTF-8 into pure ASCII by converting
1601 ** all non-ASCII characters to some combination of characters in the
1602 ** ASCII subset.
1603 **
1604 ** The returned string might contain more characters than the input.
1605 **
1606 ** Space to hold the returned string comes from sqlite3_malloc() and
1607 ** should be freed by the caller.
1608 */
1609 static unsigned char *transliterate(const unsigned char *zIn, int nIn){
1610   unsigned char *zOut = sqlite3_malloc64( nIn*4 + 1 );
1611   int c, sz, nOut;
1612   if( zOut==0 ) return 0;
1613   nOut = 0;
1614   while( nIn>0 ){
1615     c = utf8Read(zIn, nIn, &sz);
1616     zIn += sz;
1617     nIn -= sz;
1618     if( c<=127 ){
1619       zOut[nOut++] = c;
1620     }else{
1621       int xTop, xBtm, x;
1622       xTop = sizeof(translit)/sizeof(translit[0]) - 1;
1623       xBtm = 0;
1624       while( xTop>=xBtm ){
1625         x = (xTop + xBtm)/2;
1626         if( translit[x].cFrom==c ){
1627           zOut[nOut++] = translit[x].cTo0;
1628           if( translit[x].cTo1 ){
1629             zOut[nOut++] = translit[x].cTo1;
1630             /* Add an extra "ch" after the "sh" for Щ and щ */
1631             if( c==0x0429 || c== 0x0449 ){
1632               zOut[nOut++] = 'c';
1633               zOut[nOut++] = 'h';
1634             }
1635           }
1636           c = 0;
1637           break;
1638         }else if( translit[x].cFrom>c ){
1639           xTop = x-1;
1640         }else{
1641           xBtm = x+1;
1642         }
1643       }
1644       if( c ) zOut[nOut++] = '?';
1645     }
1646   }
1647   zOut[nOut] = 0;
1648   return zOut;
1649 }
1650 
1651 /*
1652 ** Return the number of characters in the shortest prefix of the input
1653 ** string that transliterates to an ASCII string nTrans bytes or longer.
1654 ** Or, if the transliteration of the input string is less than nTrans
1655 ** bytes in size, return the number of characters in the input string.
1656 */
1657 static int translen_to_charlen(const char *zIn, int nIn, int nTrans){
1658   int i, c, sz, nOut;
1659   int nChar;
1660 
1661   i = nOut = 0;
1662   for(nChar=0; i<nIn && nOut<nTrans; nChar++){
1663     c = utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz);
1664     i += sz;
1665 
1666     nOut++;
1667     if( c>=128 ){
1668       int xTop, xBtm, x;
1669       xTop = sizeof(translit)/sizeof(translit[0]) - 1;
1670       xBtm = 0;
1671       while( xTop>=xBtm ){
1672         x = (xTop + xBtm)/2;
1673         if( translit[x].cFrom==c ){
1674           if( translit[x].cTo1 ) nOut++;
1675           if( c==0x0429 || c== 0x0449 ) nOut += 2;
1676           break;
1677         }else if( translit[x].cFrom>c ){
1678           xTop = x-1;
1679         }else{
1680           xBtm = x+1;
1681         }
1682       }
1683     }
1684   }
1685 
1686   return nChar;
1687 }
1688 
1689 
1690 /*
1691 **    spellfix1_translit(X)
1692 **
1693 ** Convert a string that contains non-ASCII Roman characters into
1694 ** pure ASCII.
1695 */
1696 static void transliterateSqlFunc(
1697   sqlite3_context *context,
1698   int argc,
1699   sqlite3_value **argv
1700 ){
1701   const unsigned char *zIn = sqlite3_value_text(argv[0]);
1702   int nIn = sqlite3_value_bytes(argv[0]);
1703   unsigned char *zOut = transliterate(zIn, nIn);
1704   if( zOut==0 ){
1705     sqlite3_result_error_nomem(context);
1706   }else{
1707     sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free);
1708   }
1709 }
1710 
1711 /*
1712 **    spellfix1_scriptcode(X)
1713 **
1714 ** Try to determine the dominant script used by the word X and return
1715 ** its ISO 15924 numeric code.
1716 **
1717 ** The current implementation only understands the following scripts:
1718 **
1719 **    215  (Latin)
1720 **    220  (Cyrillic)
1721 **    200  (Greek)
1722 **
1723 ** This routine will return 998 if the input X contains characters from
1724 ** two or more of the above scripts or 999 if X contains no characters
1725 ** from any of the above scripts.
1726 */
1727 static void scriptCodeSqlFunc(
1728   sqlite3_context *context,
1729   int argc,
1730   sqlite3_value **argv
1731 ){
1732   const unsigned char *zIn = sqlite3_value_text(argv[0]);
1733   int nIn = sqlite3_value_bytes(argv[0]);
1734   int c, sz;
1735   int scriptMask = 0;
1736   int res;
1737 # define SCRIPT_LATIN       0x0001
1738 # define SCRIPT_CYRILLIC    0x0002
1739 # define SCRIPT_GREEK       0x0004
1740 # define SCRIPT_HEBREW      0x0008
1741 # define SCRIPT_ARABIC      0x0010
1742 
1743   while( nIn>0 ){
1744     c = utf8Read(zIn, nIn, &sz);
1745     zIn += sz;
1746     nIn -= sz;
1747     if( c<0x02af && (c>=0x80 || midClass[c&0x7f]<CCLASS_DIGIT) ){
1748       scriptMask |= SCRIPT_LATIN;
1749     }else if( c>=0x0400 && c<=0x04ff ){
1750       scriptMask |= SCRIPT_CYRILLIC;
1751     }else if( c>=0x0386 && c<=0x03ce ){
1752       scriptMask |= SCRIPT_GREEK;
1753     }else if( c>=0x0590 && c<=0x05ff ){
1754       scriptMask |= SCRIPT_HEBREW;
1755     }else if( c>=0x0600 && c<=0x06ff ){
1756       scriptMask |= SCRIPT_ARABIC;
1757     }
1758   }
1759   switch( scriptMask ){
1760     case 0:                res = 999; break;
1761     case SCRIPT_LATIN:     res = 215; break;
1762     case SCRIPT_CYRILLIC:  res = 220; break;
1763     case SCRIPT_GREEK:     res = 200; break;
1764     case SCRIPT_HEBREW:    res = 125; break;
1765     case SCRIPT_ARABIC:    res = 160; break;
1766     default:               res = 998; break;
1767   }
1768   sqlite3_result_int(context, res);
1769 }
1770 
1771 /* End transliterate
1772 ******************************************************************************
1773 ******************************************************************************
1774 ** Begin spellfix1 virtual table.
1775 */
1776 
1777 /* Maximum length of a phonehash used for querying the shadow table */
1778 #define SPELLFIX_MX_HASH  8
1779 
1780 /* Maximum number of hash strings to examine per query */
1781 #define SPELLFIX_MX_RUN   1
1782 
1783 typedef struct spellfix1_vtab spellfix1_vtab;
1784 typedef struct spellfix1_cursor spellfix1_cursor;
1785 
1786 /* Fuzzy-search virtual table object */
1787 struct spellfix1_vtab {
1788   sqlite3_vtab base;         /* Base class - must be first */
1789   sqlite3 *db;               /* Database connection */
1790   char *zDbName;             /* Name of database holding this table */
1791   char *zTableName;          /* Name of the virtual table */
1792   char *zCostTable;          /* Table holding edit-distance cost numbers */
1793   EditDist3Config *pConfig3; /* Parsed edit distance costs */
1794 };
1795 
1796 /* Fuzzy-search cursor object */
1797 struct spellfix1_cursor {
1798   sqlite3_vtab_cursor base;    /* Base class - must be first */
1799   spellfix1_vtab *pVTab;       /* The table to which this cursor belongs */
1800   char *zPattern;              /* rhs of MATCH clause */
1801   int idxNum;                  /* idxNum value passed to xFilter() */
1802   int nRow;                    /* Number of rows of content */
1803   int nAlloc;                  /* Number of allocated rows */
1804   int iRow;                    /* Current row of content */
1805   int iLang;                   /* Value of the langid= constraint */
1806   int iTop;                    /* Value of the top= constraint */
1807   int iScope;                  /* Value of the scope= constraint */
1808   int nSearch;                 /* Number of vocabulary items checked */
1809   sqlite3_stmt *pFullScan;     /* Shadow query for a full table scan */
1810   struct spellfix1_row {       /* For each row of content */
1811     sqlite3_int64 iRowid;         /* Rowid for this row */
1812     char *zWord;                  /* Text for this row */
1813     int iRank;                    /* Rank for this row */
1814     int iDistance;                /* Distance from pattern for this row */
1815     int iScore;                   /* Score for sorting */
1816     int iMatchlen;                /* Value of matchlen column (or -1) */
1817     char zHash[SPELLFIX_MX_HASH]; /* the phonehash used for this match */
1818   } *a;
1819 };
1820 
1821 /*
1822 ** Construct one or more SQL statements from the format string given
1823 ** and then evaluate those statements. The success code is written
1824 ** into *pRc.
1825 **
1826 ** If *pRc is initially non-zero then this routine is a no-op.
1827 */
1828 static void spellfix1DbExec(
1829   int *pRc,              /* Success code */
1830   sqlite3 *db,           /* Database in which to run SQL */
1831   const char *zFormat,   /* Format string for SQL */
1832   ...                    /* Arguments to the format string */
1833 ){
1834   va_list ap;
1835   char *zSql;
1836   if( *pRc ) return;
1837   va_start(ap, zFormat);
1838   zSql = sqlite3_vmprintf(zFormat, ap);
1839   va_end(ap);
1840   if( zSql==0 ){
1841     *pRc = SQLITE_NOMEM;
1842   }else{
1843     *pRc = sqlite3_exec(db, zSql, 0, 0, 0);
1844     sqlite3_free(zSql);
1845   }
1846 }
1847 
1848 /*
1849 ** xDisconnect/xDestroy method for the fuzzy-search module.
1850 */
1851 static int spellfix1Uninit(int isDestroy, sqlite3_vtab *pVTab){
1852   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
1853   int rc = SQLITE_OK;
1854   if( isDestroy ){
1855     sqlite3 *db = p->db;
1856     spellfix1DbExec(&rc, db, "DROP TABLE IF EXISTS \"%w\".\"%w_vocab\"",
1857                   p->zDbName, p->zTableName);
1858   }
1859   if( rc==SQLITE_OK ){
1860     sqlite3_free(p->zTableName);
1861     editDist3ConfigDelete(p->pConfig3);
1862     sqlite3_free(p->zCostTable);
1863     sqlite3_free(p);
1864   }
1865   return rc;
1866 }
1867 static int spellfix1Disconnect(sqlite3_vtab *pVTab){
1868   return spellfix1Uninit(0, pVTab);
1869 }
1870 static int spellfix1Destroy(sqlite3_vtab *pVTab){
1871   return spellfix1Uninit(1, pVTab);
1872 }
1873 
1874 /*
1875 ** Make a copy of a string.  Remove leading and trailing whitespace
1876 ** and dequote it.
1877 */
1878 static char *spellfix1Dequote(const char *zIn){
1879   char *zOut;
1880   int i, j;
1881   char c;
1882   while( isspace((unsigned char)zIn[0]) ) zIn++;
1883   zOut = sqlite3_mprintf("%s", zIn);
1884   if( zOut==0 ) return 0;
1885   i = (int)strlen(zOut);
1886 #if 0  /* The parser will never leave spaces at the end */
1887   while( i>0 && isspace(zOut[i-1]) ){ i--; }
1888 #endif
1889   zOut[i] = 0;
1890   c = zOut[0];
1891   if( c=='\'' || c=='"' ){
1892     for(i=1, j=0; ALWAYS(zOut[i]); i++){
1893       zOut[j++] = zOut[i];
1894       if( zOut[i]==c ){
1895         if( zOut[i+1]==c ){
1896           i++;
1897         }else{
1898           zOut[j-1] = 0;
1899           break;
1900         }
1901       }
1902     }
1903   }
1904   return zOut;
1905 }
1906 
1907 
1908 /*
1909 ** xConnect/xCreate method for the spellfix1 module. Arguments are:
1910 **
1911 **   argv[0]   -> module name  ("spellfix1")
1912 **   argv[1]   -> database name
1913 **   argv[2]   -> table name
1914 **   argv[3].. -> optional arguments (i.e. "edit_cost_table" parameter)
1915 */
1916 static int spellfix1Init(
1917   int isCreate,
1918   sqlite3 *db,
1919   void *pAux,
1920   int argc, const char *const*argv,
1921   sqlite3_vtab **ppVTab,
1922   char **pzErr
1923 ){
1924   spellfix1_vtab *pNew = 0;
1925   /* const char *zModule = argv[0]; // not used */
1926   const char *zDbName = argv[1];
1927   const char *zTableName = argv[2];
1928   int nDbName;
1929   int rc = SQLITE_OK;
1930   int i;
1931 
1932   nDbName = (int)strlen(zDbName);
1933   pNew = sqlite3_malloc64( sizeof(*pNew) + nDbName + 1);
1934   if( pNew==0 ){
1935     rc = SQLITE_NOMEM;
1936   }else{
1937     memset(pNew, 0, sizeof(*pNew));
1938     pNew->zDbName = (char*)&pNew[1];
1939     memcpy(pNew->zDbName, zDbName, nDbName+1);
1940     pNew->zTableName = sqlite3_mprintf("%s", zTableName);
1941     pNew->db = db;
1942     if( pNew->zTableName==0 ){
1943       rc = SQLITE_NOMEM;
1944     }else{
1945       rc = sqlite3_declare_vtab(db,
1946            "CREATE TABLE x(word,rank,distance,langid, "
1947            "score, matchlen, phonehash HIDDEN, "
1948            "top HIDDEN, scope HIDDEN, srchcnt HIDDEN, "
1949            "soundslike HIDDEN, command HIDDEN)"
1950       );
1951 #define SPELLFIX_COL_WORD            0
1952 #define SPELLFIX_COL_RANK            1
1953 #define SPELLFIX_COL_DISTANCE        2
1954 #define SPELLFIX_COL_LANGID          3
1955 #define SPELLFIX_COL_SCORE           4
1956 #define SPELLFIX_COL_MATCHLEN        5
1957 #define SPELLFIX_COL_PHONEHASH       6
1958 #define SPELLFIX_COL_TOP             7
1959 #define SPELLFIX_COL_SCOPE           8
1960 #define SPELLFIX_COL_SRCHCNT         9
1961 #define SPELLFIX_COL_SOUNDSLIKE     10
1962 #define SPELLFIX_COL_COMMAND        11
1963     }
1964     if( rc==SQLITE_OK && isCreate ){
1965       spellfix1DbExec(&rc, db,
1966          "CREATE TABLE IF NOT EXISTS \"%w\".\"%w_vocab\"(\n"
1967          "  id INTEGER PRIMARY KEY,\n"
1968          "  rank INT,\n"
1969          "  langid INT,\n"
1970          "  word TEXT,\n"
1971          "  k1 TEXT,\n"
1972          "  k2 TEXT\n"
1973          ");\n",
1974          zDbName, zTableName
1975       );
1976       spellfix1DbExec(&rc, db,
1977          "CREATE INDEX IF NOT EXISTS \"%w\".\"%w_vocab_index_langid_k2\" "
1978             "ON \"%w_vocab\"(langid,k2);",
1979          zDbName, zTableName, zTableName
1980       );
1981     }
1982     for(i=3; rc==SQLITE_OK && i<argc; i++){
1983       if( strncmp(argv[i],"edit_cost_table=",16)==0 && pNew->zCostTable==0 ){
1984         pNew->zCostTable = spellfix1Dequote(&argv[i][16]);
1985         if( pNew->zCostTable==0 ) rc = SQLITE_NOMEM;
1986         continue;
1987       }
1988       *pzErr = sqlite3_mprintf("bad argument to spellfix1(): \"%s\"", argv[i]);
1989       rc = SQLITE_ERROR;
1990     }
1991   }
1992 
1993   if( rc && pNew ){
1994     *ppVTab = 0;
1995     spellfix1Uninit(0, &pNew->base);
1996   }else{
1997     *ppVTab = (sqlite3_vtab *)pNew;
1998   }
1999   return rc;
2000 }
2001 
2002 /*
2003 ** The xConnect and xCreate methods
2004 */
2005 static int spellfix1Connect(
2006   sqlite3 *db,
2007   void *pAux,
2008   int argc, const char *const*argv,
2009   sqlite3_vtab **ppVTab,
2010   char **pzErr
2011 ){
2012   return spellfix1Init(0, db, pAux, argc, argv, ppVTab, pzErr);
2013 }
2014 static int spellfix1Create(
2015   sqlite3 *db,
2016   void *pAux,
2017   int argc, const char *const*argv,
2018   sqlite3_vtab **ppVTab,
2019   char **pzErr
2020 ){
2021   return spellfix1Init(1, db, pAux, argc, argv, ppVTab, pzErr);
2022 }
2023 
2024 /*
2025 ** Clear all of the content from a cursor.
2026 */
2027 static void spellfix1ResetCursor(spellfix1_cursor *pCur){
2028   int i;
2029   for(i=0; i<pCur->nRow; i++){
2030     sqlite3_free(pCur->a[i].zWord);
2031   }
2032   pCur->nRow = 0;
2033   pCur->iRow = 0;
2034   pCur->nSearch = 0;
2035   if( pCur->pFullScan ){
2036     sqlite3_finalize(pCur->pFullScan);
2037     pCur->pFullScan = 0;
2038   }
2039 }
2040 
2041 /*
2042 ** Resize the cursor to hold up to N rows of content
2043 */
2044 static void spellfix1ResizeCursor(spellfix1_cursor *pCur, int N){
2045   struct spellfix1_row *aNew;
2046   assert( N>=pCur->nRow );
2047   aNew = sqlite3_realloc64(pCur->a, sizeof(pCur->a[0])*N);
2048   if( aNew==0 && N>0 ){
2049     spellfix1ResetCursor(pCur);
2050     sqlite3_free(pCur->a);
2051     pCur->nAlloc = 0;
2052     pCur->a = 0;
2053   }else{
2054     pCur->nAlloc = N;
2055     pCur->a = aNew;
2056   }
2057 }
2058 
2059 
2060 /*
2061 ** Close a fuzzy-search cursor.
2062 */
2063 static int spellfix1Close(sqlite3_vtab_cursor *cur){
2064   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2065   spellfix1ResetCursor(pCur);
2066   spellfix1ResizeCursor(pCur, 0);
2067   sqlite3_free(pCur->zPattern);
2068   sqlite3_free(pCur);
2069   return SQLITE_OK;
2070 }
2071 
2072 #define SPELLFIX_IDXNUM_MATCH  0x01         /* word MATCH $str */
2073 #define SPELLFIX_IDXNUM_LANGID 0x02         /* langid == $langid */
2074 #define SPELLFIX_IDXNUM_TOP    0x04         /* top = $top */
2075 #define SPELLFIX_IDXNUM_SCOPE  0x08         /* scope = $scope */
2076 #define SPELLFIX_IDXNUM_DISTLT 0x10         /* distance < $distance */
2077 #define SPELLFIX_IDXNUM_DISTLE 0x20         /* distance <= $distance */
2078 #define SPELLFIX_IDXNUM_ROWID  0x40         /* rowid = $rowid */
2079 #define SPELLFIX_IDXNUM_DIST   (0x10|0x20)  /* DISTLT and DISTLE */
2080 
2081 /*
2082 **
2083 ** The plan number is a bitmask of the SPELLFIX_IDXNUM_* values defined
2084 ** above.
2085 **
2086 ** filter.argv[*] values contains $str, $langid, $top, $scope and $rowid
2087 ** if specified and in that order.
2088 */
2089 static int spellfix1BestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
2090   int iPlan = 0;
2091   int iLangTerm = -1;
2092   int iTopTerm = -1;
2093   int iScopeTerm = -1;
2094   int iDistTerm = -1;
2095   int iRowidTerm = -1;
2096   int i;
2097   const struct sqlite3_index_constraint *pConstraint;
2098   pConstraint = pIdxInfo->aConstraint;
2099   for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){
2100     if( pConstraint->usable==0 ) continue;
2101 
2102     /* Terms of the form:  word MATCH $str */
2103     if( (iPlan & SPELLFIX_IDXNUM_MATCH)==0
2104      && pConstraint->iColumn==SPELLFIX_COL_WORD
2105      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH
2106     ){
2107       iPlan |= SPELLFIX_IDXNUM_MATCH;
2108       pIdxInfo->aConstraintUsage[i].argvIndex = 1;
2109       pIdxInfo->aConstraintUsage[i].omit = 1;
2110     }
2111 
2112     /* Terms of the form:  langid = $langid  */
2113     if( (iPlan & SPELLFIX_IDXNUM_LANGID)==0
2114      && pConstraint->iColumn==SPELLFIX_COL_LANGID
2115      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2116     ){
2117       iPlan |= SPELLFIX_IDXNUM_LANGID;
2118       iLangTerm = i;
2119     }
2120 
2121     /* Terms of the form:  top = $top */
2122     if( (iPlan & SPELLFIX_IDXNUM_TOP)==0
2123      && pConstraint->iColumn==SPELLFIX_COL_TOP
2124      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2125     ){
2126       iPlan |= SPELLFIX_IDXNUM_TOP;
2127       iTopTerm = i;
2128     }
2129 
2130     /* Terms of the form:  scope = $scope */
2131     if( (iPlan & SPELLFIX_IDXNUM_SCOPE)==0
2132      && pConstraint->iColumn==SPELLFIX_COL_SCOPE
2133      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2134     ){
2135       iPlan |= SPELLFIX_IDXNUM_SCOPE;
2136       iScopeTerm = i;
2137     }
2138 
2139     /* Terms of the form:  distance < $dist or distance <= $dist */
2140     if( (iPlan & SPELLFIX_IDXNUM_DIST)==0
2141      && pConstraint->iColumn==SPELLFIX_COL_DISTANCE
2142      && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT
2143           || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE)
2144     ){
2145       if( pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT ){
2146         iPlan |= SPELLFIX_IDXNUM_DISTLT;
2147       }else{
2148         iPlan |= SPELLFIX_IDXNUM_DISTLE;
2149       }
2150       iDistTerm = i;
2151     }
2152 
2153     /* Terms of the form:  distance < $dist or distance <= $dist */
2154     if( (iPlan & SPELLFIX_IDXNUM_ROWID)==0
2155      && pConstraint->iColumn<0
2156      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2157     ){
2158       iPlan |= SPELLFIX_IDXNUM_ROWID;
2159       iRowidTerm = i;
2160     }
2161   }
2162   if( iPlan&SPELLFIX_IDXNUM_MATCH ){
2163     int idx = 2;
2164     pIdxInfo->idxNum = iPlan;
2165     if( pIdxInfo->nOrderBy==1
2166      && pIdxInfo->aOrderBy[0].iColumn==SPELLFIX_COL_SCORE
2167      && pIdxInfo->aOrderBy[0].desc==0
2168     ){
2169       pIdxInfo->orderByConsumed = 1;  /* Default order by iScore */
2170     }
2171     if( iPlan&SPELLFIX_IDXNUM_LANGID ){
2172       pIdxInfo->aConstraintUsage[iLangTerm].argvIndex = idx++;
2173       pIdxInfo->aConstraintUsage[iLangTerm].omit = 1;
2174     }
2175     if( iPlan&SPELLFIX_IDXNUM_TOP ){
2176       pIdxInfo->aConstraintUsage[iTopTerm].argvIndex = idx++;
2177       pIdxInfo->aConstraintUsage[iTopTerm].omit = 1;
2178     }
2179     if( iPlan&SPELLFIX_IDXNUM_SCOPE ){
2180       pIdxInfo->aConstraintUsage[iScopeTerm].argvIndex = idx++;
2181       pIdxInfo->aConstraintUsage[iScopeTerm].omit = 1;
2182     }
2183     if( iPlan&SPELLFIX_IDXNUM_DIST ){
2184       pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = idx++;
2185       pIdxInfo->aConstraintUsage[iDistTerm].omit = 1;
2186     }
2187     pIdxInfo->estimatedCost = 1e5;
2188   }else if( (iPlan & SPELLFIX_IDXNUM_ROWID) ){
2189     pIdxInfo->idxNum = SPELLFIX_IDXNUM_ROWID;
2190     pIdxInfo->aConstraintUsage[iRowidTerm].argvIndex = 1;
2191     pIdxInfo->aConstraintUsage[iRowidTerm].omit = 1;
2192     pIdxInfo->estimatedCost = 5;
2193   }else{
2194     pIdxInfo->idxNum = 0;
2195     pIdxInfo->estimatedCost = 1e50;
2196   }
2197   return SQLITE_OK;
2198 }
2199 
2200 /*
2201 ** Open a new fuzzy-search cursor.
2202 */
2203 static int spellfix1Open(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
2204   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2205   spellfix1_cursor *pCur;
2206   pCur = sqlite3_malloc64( sizeof(*pCur) );
2207   if( pCur==0 ) return SQLITE_NOMEM;
2208   memset(pCur, 0, sizeof(*pCur));
2209   pCur->pVTab = p;
2210   *ppCursor = &pCur->base;
2211   return SQLITE_OK;
2212 }
2213 
2214 /*
2215 ** Adjust a distance measurement by the words rank in order to show
2216 ** preference to common words.
2217 */
2218 static int spellfix1Score(int iDistance, int iRank){
2219   int iLog2;
2220   for(iLog2=0; iRank>0; iLog2++, iRank>>=1){}
2221   return iDistance + 32 - iLog2;
2222 }
2223 
2224 /*
2225 ** Compare two spellfix1_row objects for sorting purposes in qsort() such
2226 ** that they sort in order of increasing distance.
2227 */
2228 static int spellfix1RowCompare(const void *A, const void *B){
2229   const struct spellfix1_row *a = (const struct spellfix1_row*)A;
2230   const struct spellfix1_row *b = (const struct spellfix1_row*)B;
2231   return a->iScore - b->iScore;
2232 }
2233 
2234 /*
2235 ** A structure used to pass information from spellfix1FilterForMatch()
2236 ** into spellfix1RunQuery().
2237 */
2238 typedef struct MatchQuery {
2239   spellfix1_cursor *pCur;          /* The cursor being queried */
2240   sqlite3_stmt *pStmt;             /* shadow table query statment */
2241   char zHash[SPELLFIX_MX_HASH];    /* The current phonehash for zPattern */
2242   const char *zPattern;            /* Transliterated input string */
2243   int nPattern;                    /* Length of zPattern */
2244   EditDist3FromString *pMatchStr3; /* Original unicode string */
2245   EditDist3Config *pConfig3;       /* Edit-distance cost coefficients */
2246   const EditDist3Lang *pLang;      /* The selected language coefficients */
2247   int iLang;                       /* The language id */
2248   int iScope;                      /* Default scope */
2249   int iMaxDist;                    /* Maximum allowed edit distance, or -1 */
2250   int rc;                          /* Error code */
2251   int nRun;                  /* Number of prior runs for the same zPattern */
2252   char azPrior[SPELLFIX_MX_RUN][SPELLFIX_MX_HASH];  /* Prior hashes */
2253 } MatchQuery;
2254 
2255 /*
2256 ** Run a query looking for the best matches against zPattern using
2257 ** zHash as the character class seed hash.
2258 */
2259 static void spellfix1RunQuery(MatchQuery *p, const char *zQuery, int nQuery){
2260   const char *zK1;
2261   const char *zWord;
2262   int iDist;
2263   int iRank;
2264   int iScore;
2265   int iWorst = 0;
2266   int idx;
2267   int idxWorst = -1;
2268   int i;
2269   int iScope = p->iScope;
2270   spellfix1_cursor *pCur = p->pCur;
2271   sqlite3_stmt *pStmt = p->pStmt;
2272   char zHash1[SPELLFIX_MX_HASH];
2273   char zHash2[SPELLFIX_MX_HASH];
2274   char *zClass;
2275   int nClass;
2276   int rc;
2277 
2278   if( pCur->a==0 || p->rc ) return;   /* Prior memory allocation failure */
2279   zClass = (char*)phoneticHash((unsigned char*)zQuery, nQuery);
2280   if( zClass==0 ){
2281     p->rc = SQLITE_NOMEM;
2282     return;
2283   }
2284   nClass = (int)strlen(zClass);
2285   if( nClass>SPELLFIX_MX_HASH-2 ){
2286     nClass = SPELLFIX_MX_HASH-2;
2287     zClass[nClass] = 0;
2288   }
2289   if( nClass<=iScope ){
2290     if( nClass>2 ){
2291       iScope = nClass-1;
2292     }else{
2293       iScope = nClass;
2294     }
2295   }
2296   memcpy(zHash1, zClass, iScope);
2297   sqlite3_free(zClass);
2298   zHash1[iScope] = 0;
2299   memcpy(zHash2, zHash1, iScope);
2300   zHash2[iScope] = 'Z';
2301   zHash2[iScope+1] = 0;
2302 #if SPELLFIX_MX_RUN>1
2303   for(i=0; i<p->nRun; i++){
2304     if( strcmp(p->azPrior[i], zHash1)==0 ) return;
2305   }
2306 #endif
2307   assert( p->nRun<SPELLFIX_MX_RUN );
2308   memcpy(p->azPrior[p->nRun++], zHash1, iScope+1);
2309   if( sqlite3_bind_text(pStmt, 1, zHash1, -1, SQLITE_STATIC)==SQLITE_NOMEM
2310    || sqlite3_bind_text(pStmt, 2, zHash2, -1, SQLITE_STATIC)==SQLITE_NOMEM
2311   ){
2312     p->rc = SQLITE_NOMEM;
2313     return;
2314   }
2315 #if SPELLFIX_MX_RUN>1
2316   for(i=0; i<pCur->nRow; i++){
2317     if( pCur->a[i].iScore>iWorst ){
2318       iWorst = pCur->a[i].iScore;
2319       idxWorst = i;
2320     }
2321   }
2322 #endif
2323   while( sqlite3_step(pStmt)==SQLITE_ROW ){
2324     int iMatchlen = -1;
2325     iRank = sqlite3_column_int(pStmt, 2);
2326     if( p->pMatchStr3 ){
2327       int nWord = sqlite3_column_bytes(pStmt, 1);
2328       zWord = (const char*)sqlite3_column_text(pStmt, 1);
2329       iDist = editDist3Core(p->pMatchStr3, zWord, nWord, p->pLang, &iMatchlen);
2330     }else{
2331       zK1 = (const char*)sqlite3_column_text(pStmt, 3);
2332       if( zK1==0 ) continue;
2333       iDist = editdist1(p->zPattern, zK1, 0);
2334     }
2335     if( iDist<0 ){
2336       p->rc = SQLITE_NOMEM;
2337       break;
2338     }
2339     pCur->nSearch++;
2340 
2341     /* If there is a "distance < $dist" or "distance <= $dist" constraint,
2342     ** check if this row meets it. If not, jump back up to the top of the
2343     ** loop to process the next row. Otherwise, if the row does match the
2344     ** distance constraint, check if the pCur->a[] array is already full.
2345     ** If it is and no explicit "top = ?" constraint was present in the
2346     ** query, grow the array to ensure there is room for the new entry. */
2347     assert( (p->iMaxDist>=0)==((pCur->idxNum & SPELLFIX_IDXNUM_DIST) ? 1 : 0) );
2348     if( p->iMaxDist>=0 ){
2349       if( iDist>p->iMaxDist ) continue;
2350       if( pCur->nRow>=pCur->nAlloc && (pCur->idxNum & SPELLFIX_IDXNUM_TOP)==0 ){
2351         spellfix1ResizeCursor(pCur, pCur->nAlloc*2 + 10);
2352         if( pCur->a==0 ) break;
2353       }
2354     }
2355 
2356     iScore = spellfix1Score(iDist,iRank);
2357     if( pCur->nRow<pCur->nAlloc ){
2358       idx = pCur->nRow;
2359     }else if( iScore<iWorst ){
2360       idx = idxWorst;
2361       sqlite3_free(pCur->a[idx].zWord);
2362     }else{
2363       continue;
2364     }
2365 
2366     pCur->a[idx].zWord = sqlite3_mprintf("%s", sqlite3_column_text(pStmt, 1));
2367     if( pCur->a[idx].zWord==0 ){
2368       p->rc = SQLITE_NOMEM;
2369       break;
2370     }
2371     pCur->a[idx].iRowid = sqlite3_column_int64(pStmt, 0);
2372     pCur->a[idx].iRank = iRank;
2373     pCur->a[idx].iDistance = iDist;
2374     pCur->a[idx].iScore = iScore;
2375     pCur->a[idx].iMatchlen = iMatchlen;
2376     memcpy(pCur->a[idx].zHash, zHash1, iScope+1);
2377     if( pCur->nRow<pCur->nAlloc ) pCur->nRow++;
2378     if( pCur->nRow==pCur->nAlloc ){
2379       iWorst = pCur->a[0].iScore;
2380       idxWorst = 0;
2381       for(i=1; i<pCur->nRow; i++){
2382         iScore = pCur->a[i].iScore;
2383         if( iWorst<iScore ){
2384           iWorst = iScore;
2385           idxWorst = i;
2386         }
2387       }
2388     }
2389   }
2390   rc = sqlite3_reset(pStmt);
2391   if( rc ) p->rc = rc;
2392 }
2393 
2394 /*
2395 ** This version of the xFilter method work if the MATCH term is present
2396 ** and we are doing a scan.
2397 */
2398 static int spellfix1FilterForMatch(
2399   spellfix1_cursor *pCur,
2400   int argc,
2401   sqlite3_value **argv
2402 ){
2403   int idxNum = pCur->idxNum;
2404   const unsigned char *zMatchThis;   /* RHS of the MATCH operator */
2405   EditDist3FromString *pMatchStr3 = 0; /* zMatchThis as an editdist string */
2406   char *zPattern;                    /* Transliteration of zMatchThis */
2407   int nPattern;                      /* Length of zPattern */
2408   int iLimit = 20;                   /* Max number of rows of output */
2409   int iScope = 3;                    /* Use this many characters of zClass */
2410   int iLang = 0;                     /* Language code */
2411   char *zSql;                        /* SQL of shadow table query */
2412   sqlite3_stmt *pStmt = 0;           /* Shadow table query */
2413   int rc;                            /* Result code */
2414   int idx = 1;                       /* Next available filter parameter */
2415   spellfix1_vtab *p = pCur->pVTab;   /* The virtual table that owns pCur */
2416   MatchQuery x;                      /* For passing info to RunQuery() */
2417 
2418   /* Load the cost table if we have not already done so */
2419   if( p->zCostTable!=0 && p->pConfig3==0 ){
2420     p->pConfig3 = sqlite3_malloc64( sizeof(p->pConfig3[0]) );
2421     if( p->pConfig3==0 ) return SQLITE_NOMEM;
2422     memset(p->pConfig3, 0, sizeof(p->pConfig3[0]));
2423     rc = editDist3ConfigLoad(p->pConfig3, p->db, p->zCostTable);
2424     if( rc ) return rc;
2425   }
2426   memset(&x, 0, sizeof(x));
2427   x.iScope = 3;  /* Default scope if none specified by "WHERE scope=N" */
2428   x.iMaxDist = -1;   /* Maximum allowed edit distance */
2429 
2430   if( idxNum&2 ){
2431     iLang = sqlite3_value_int(argv[idx++]);
2432   }
2433   if( idxNum&4 ){
2434     iLimit = sqlite3_value_int(argv[idx++]);
2435     if( iLimit<1 ) iLimit = 1;
2436   }
2437   if( idxNum&8 ){
2438     x.iScope = sqlite3_value_int(argv[idx++]);
2439     if( x.iScope<1 ) x.iScope = 1;
2440     if( x.iScope>SPELLFIX_MX_HASH-2 ) x.iScope = SPELLFIX_MX_HASH-2;
2441   }
2442   if( idxNum&(16|32) ){
2443     x.iMaxDist = sqlite3_value_int(argv[idx++]);
2444     if( idxNum&16 ) x.iMaxDist--;
2445     if( x.iMaxDist<0 ) x.iMaxDist = 0;
2446   }
2447   spellfix1ResetCursor(pCur);
2448   spellfix1ResizeCursor(pCur, iLimit);
2449   zMatchThis = sqlite3_value_text(argv[0]);
2450   if( zMatchThis==0 ) return SQLITE_OK;
2451   if( p->pConfig3 ){
2452     x.pLang = editDist3FindLang(p->pConfig3, iLang);
2453     pMatchStr3 = editDist3FromStringNew(x.pLang, (const char*)zMatchThis, -1);
2454     if( pMatchStr3==0 ){
2455       x.rc = SQLITE_NOMEM;
2456       goto filter_exit;
2457     }
2458   }else{
2459     x.pLang = 0;
2460   }
2461   zPattern = (char*)transliterate(zMatchThis, sqlite3_value_bytes(argv[0]));
2462   sqlite3_free(pCur->zPattern);
2463   pCur->zPattern = zPattern;
2464   if( zPattern==0 ){
2465     x.rc = SQLITE_NOMEM;
2466     goto filter_exit;
2467   }
2468   nPattern = (int)strlen(zPattern);
2469   if( zPattern[nPattern-1]=='*' ) nPattern--;
2470   zSql = sqlite3_mprintf(
2471      "SELECT id, word, rank, k1"
2472      "  FROM \"%w\".\"%w_vocab\""
2473      " WHERE langid=%d AND k2>=?1 AND k2<?2",
2474      p->zDbName, p->zTableName, iLang
2475   );
2476   if( zSql==0 ){
2477     x.rc = SQLITE_NOMEM;
2478     pStmt = 0;
2479     goto filter_exit;
2480   }
2481   rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
2482   sqlite3_free(zSql);
2483   pCur->iLang = iLang;
2484   x.pCur = pCur;
2485   x.pStmt = pStmt;
2486   x.zPattern = zPattern;
2487   x.nPattern = nPattern;
2488   x.pMatchStr3 = pMatchStr3;
2489   x.iLang = iLang;
2490   x.rc = rc;
2491   x.pConfig3 = p->pConfig3;
2492   if( x.rc==SQLITE_OK ){
2493     spellfix1RunQuery(&x, zPattern, nPattern);
2494   }
2495 
2496   if( pCur->a ){
2497     qsort(pCur->a, pCur->nRow, sizeof(pCur->a[0]), spellfix1RowCompare);
2498     pCur->iTop = iLimit;
2499     pCur->iScope = iScope;
2500   }else{
2501     x.rc = SQLITE_NOMEM;
2502   }
2503 
2504 filter_exit:
2505   sqlite3_finalize(pStmt);
2506   editDist3FromStringDelete(pMatchStr3);
2507   return x.rc;
2508 }
2509 
2510 /*
2511 ** This version of xFilter handles a full-table scan case
2512 */
2513 static int spellfix1FilterForFullScan(
2514   spellfix1_cursor *pCur,
2515   int argc,
2516   sqlite3_value **argv
2517 ){
2518   int rc = SQLITE_OK;
2519   int idxNum = pCur->idxNum;
2520   char *zSql;
2521   spellfix1_vtab *pVTab = pCur->pVTab;
2522   spellfix1ResetCursor(pCur);
2523   assert( idxNum==0 || idxNum==64 );
2524   zSql = sqlite3_mprintf(
2525      "SELECT word, rank, NULL, langid, id FROM \"%w\".\"%w_vocab\"%s",
2526      pVTab->zDbName, pVTab->zTableName,
2527      ((idxNum & 64) ? " WHERE rowid=?" : "")
2528   );
2529   if( zSql==0 ) return SQLITE_NOMEM;
2530   rc = sqlite3_prepare_v2(pVTab->db, zSql, -1, &pCur->pFullScan, 0);
2531   sqlite3_free(zSql);
2532   if( rc==SQLITE_OK && (idxNum & 64) ){
2533     assert( argc==1 );
2534     rc = sqlite3_bind_value(pCur->pFullScan, 1, argv[0]);
2535   }
2536   pCur->nRow = pCur->iRow = 0;
2537   if( rc==SQLITE_OK ){
2538     rc = sqlite3_step(pCur->pFullScan);
2539     if( rc==SQLITE_ROW ){ pCur->iRow = -1; rc = SQLITE_OK; }
2540     if( rc==SQLITE_DONE ){ rc = SQLITE_OK; }
2541   }else{
2542     pCur->iRow = 0;
2543   }
2544   return rc;
2545 }
2546 
2547 
2548 /*
2549 ** Called to "rewind" a cursor back to the beginning so that
2550 ** it starts its output over again.  Always called at least once
2551 ** prior to any spellfix1Column, spellfix1Rowid, or spellfix1Eof call.
2552 */
2553 static int spellfix1Filter(
2554   sqlite3_vtab_cursor *cur,
2555   int idxNum, const char *idxStr,
2556   int argc, sqlite3_value **argv
2557 ){
2558   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2559   int rc;
2560   pCur->idxNum = idxNum;
2561   if( idxNum & 1 ){
2562     rc = spellfix1FilterForMatch(pCur, argc, argv);
2563   }else{
2564     rc = spellfix1FilterForFullScan(pCur, argc, argv);
2565   }
2566   return rc;
2567 }
2568 
2569 
2570 /*
2571 ** Advance a cursor to its next row of output
2572 */
2573 static int spellfix1Next(sqlite3_vtab_cursor *cur){
2574   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2575   int rc = SQLITE_OK;
2576   if( pCur->iRow < pCur->nRow ){
2577     if( pCur->pFullScan ){
2578       rc = sqlite3_step(pCur->pFullScan);
2579       if( rc!=SQLITE_ROW ) pCur->iRow = pCur->nRow;
2580       if( rc==SQLITE_ROW || rc==SQLITE_DONE ) rc = SQLITE_OK;
2581     }else{
2582       pCur->iRow++;
2583     }
2584   }
2585   return rc;
2586 }
2587 
2588 /*
2589 ** Return TRUE if we are at the end-of-file
2590 */
2591 static int spellfix1Eof(sqlite3_vtab_cursor *cur){
2592   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2593   return pCur->iRow>=pCur->nRow;
2594 }
2595 
2596 /*
2597 ** Return columns from the current row.
2598 */
2599 static int spellfix1Column(
2600   sqlite3_vtab_cursor *cur,
2601   sqlite3_context *ctx,
2602   int i
2603 ){
2604   spellfix1_cursor *pCur = (spellfix1_cursor*)cur;
2605   if( pCur->pFullScan ){
2606     if( i<=SPELLFIX_COL_LANGID ){
2607       sqlite3_result_value(ctx, sqlite3_column_value(pCur->pFullScan, i));
2608     }else{
2609       sqlite3_result_null(ctx);
2610     }
2611     return SQLITE_OK;
2612   }
2613   switch( i ){
2614     case SPELLFIX_COL_WORD: {
2615       sqlite3_result_text(ctx, pCur->a[pCur->iRow].zWord, -1, SQLITE_STATIC);
2616       break;
2617     }
2618     case SPELLFIX_COL_RANK: {
2619       sqlite3_result_int(ctx, pCur->a[pCur->iRow].iRank);
2620       break;
2621     }
2622     case SPELLFIX_COL_DISTANCE: {
2623       sqlite3_result_int(ctx, pCur->a[pCur->iRow].iDistance);
2624       break;
2625     }
2626     case SPELLFIX_COL_LANGID: {
2627       sqlite3_result_int(ctx, pCur->iLang);
2628       break;
2629     }
2630     case SPELLFIX_COL_SCORE: {
2631       sqlite3_result_int(ctx, pCur->a[pCur->iRow].iScore);
2632       break;
2633     }
2634     case SPELLFIX_COL_MATCHLEN: {
2635       int iMatchlen = pCur->a[pCur->iRow].iMatchlen;
2636       if( iMatchlen<0 ){
2637         int nPattern = (int)strlen(pCur->zPattern);
2638         char *zWord = pCur->a[pCur->iRow].zWord;
2639         int nWord = (int)strlen(zWord);
2640 
2641         if( nPattern>0 && pCur->zPattern[nPattern-1]=='*' ){
2642           char *zTranslit;
2643           int res;
2644           zTranslit = (char *)transliterate((unsigned char *)zWord, nWord);
2645           if( !zTranslit ) return SQLITE_NOMEM;
2646           res = editdist1(pCur->zPattern, zTranslit, &iMatchlen);
2647           sqlite3_free(zTranslit);
2648           if( res<0 ) return SQLITE_NOMEM;
2649           iMatchlen = translen_to_charlen(zWord, nWord, iMatchlen);
2650         }else{
2651           iMatchlen = utf8Charlen(zWord, nWord);
2652         }
2653       }
2654 
2655       sqlite3_result_int(ctx, iMatchlen);
2656       break;
2657     }
2658     case SPELLFIX_COL_PHONEHASH: {
2659       sqlite3_result_text(ctx, pCur->a[pCur->iRow].zHash, -1, SQLITE_STATIC);
2660       break;
2661     }
2662     case SPELLFIX_COL_TOP: {
2663       sqlite3_result_int(ctx, pCur->iTop);
2664       break;
2665     }
2666     case SPELLFIX_COL_SCOPE: {
2667       sqlite3_result_int(ctx, pCur->iScope);
2668       break;
2669     }
2670     case SPELLFIX_COL_SRCHCNT: {
2671       sqlite3_result_int(ctx, pCur->nSearch);
2672       break;
2673     }
2674     default: {
2675       sqlite3_result_null(ctx);
2676       break;
2677     }
2678   }
2679   return SQLITE_OK;
2680 }
2681 
2682 /*
2683 ** The rowid.
2684 */
2685 static int spellfix1Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
2686   spellfix1_cursor *pCur = (spellfix1_cursor*)cur;
2687   if( pCur->pFullScan ){
2688     *pRowid = sqlite3_column_int64(pCur->pFullScan, 4);
2689   }else{
2690     *pRowid = pCur->a[pCur->iRow].iRowid;
2691   }
2692   return SQLITE_OK;
2693 }
2694 
2695 /*
2696 ** This function is called by the xUpdate() method. It returns a string
2697 ** containing the conflict mode that xUpdate() should use for the current
2698 ** operation. One of: "ROLLBACK", "IGNORE", "ABORT" or "REPLACE".
2699 */
2700 static const char *spellfix1GetConflict(sqlite3 *db){
2701   static const char *azConflict[] = {
2702     /* Note: Instead of "FAIL" - "ABORT". */
2703     "ROLLBACK", "IGNORE", "ABORT", "ABORT", "REPLACE"
2704   };
2705   int eConflict = sqlite3_vtab_on_conflict(db);
2706 
2707   assert( eConflict==SQLITE_ROLLBACK || eConflict==SQLITE_IGNORE
2708        || eConflict==SQLITE_FAIL || eConflict==SQLITE_ABORT
2709        || eConflict==SQLITE_REPLACE
2710   );
2711   assert( SQLITE_ROLLBACK==1 );
2712   assert( SQLITE_IGNORE==2 );
2713   assert( SQLITE_FAIL==3 );
2714   assert( SQLITE_ABORT==4 );
2715   assert( SQLITE_REPLACE==5 );
2716 
2717   return azConflict[eConflict-1];
2718 }
2719 
2720 /*
2721 ** The xUpdate() method.
2722 */
2723 static int spellfix1Update(
2724   sqlite3_vtab *pVTab,
2725   int argc,
2726   sqlite3_value **argv,
2727   sqlite_int64 *pRowid
2728 ){
2729   int rc = SQLITE_OK;
2730   sqlite3_int64 rowid, newRowid;
2731   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2732   sqlite3 *db = p->db;
2733 
2734   if( argc==1 ){
2735     /* A delete operation on the rowid given by argv[0] */
2736     rowid = *pRowid = sqlite3_value_int64(argv[0]);
2737     spellfix1DbExec(&rc, db, "DELETE FROM \"%w\".\"%w_vocab\" "
2738                            " WHERE id=%lld",
2739                   p->zDbName, p->zTableName, rowid);
2740   }else{
2741     const unsigned char *zWord = sqlite3_value_text(argv[SPELLFIX_COL_WORD+2]);
2742     int nWord = sqlite3_value_bytes(argv[SPELLFIX_COL_WORD+2]);
2743     int iLang = sqlite3_value_int(argv[SPELLFIX_COL_LANGID+2]);
2744     int iRank = sqlite3_value_int(argv[SPELLFIX_COL_RANK+2]);
2745     const unsigned char *zSoundslike =
2746            sqlite3_value_text(argv[SPELLFIX_COL_SOUNDSLIKE+2]);
2747     int nSoundslike = sqlite3_value_bytes(argv[SPELLFIX_COL_SOUNDSLIKE+2]);
2748     char *zK1, *zK2;
2749     int i;
2750     char c;
2751     const char *zConflict = spellfix1GetConflict(db);
2752 
2753     if( zWord==0 ){
2754       /* Inserts of the form:  INSERT INTO table(command) VALUES('xyzzy');
2755       ** cause zWord to be NULL, so we look at the "command" column to see
2756       ** what special actions to take */
2757       const char *zCmd =
2758          (const char*)sqlite3_value_text(argv[SPELLFIX_COL_COMMAND+2]);
2759       if( zCmd==0 ){
2760         pVTab->zErrMsg = sqlite3_mprintf("NOT NULL constraint failed: %s.word",
2761                                          p->zTableName);
2762         return SQLITE_CONSTRAINT_NOTNULL;
2763       }
2764       if( strcmp(zCmd,"reset")==0 ){
2765         /* Reset the  edit cost table (if there is one). */
2766         editDist3ConfigDelete(p->pConfig3);
2767         p->pConfig3 = 0;
2768         return SQLITE_OK;
2769       }
2770       if( strncmp(zCmd,"edit_cost_table=",16)==0 ){
2771         editDist3ConfigDelete(p->pConfig3);
2772         p->pConfig3 = 0;
2773         sqlite3_free(p->zCostTable);
2774         p->zCostTable = spellfix1Dequote(zCmd+16);
2775         if( p->zCostTable==0 ) return SQLITE_NOMEM;
2776         if( p->zCostTable[0]==0 || sqlite3_stricmp(p->zCostTable,"null")==0 ){
2777           sqlite3_free(p->zCostTable);
2778           p->zCostTable = 0;
2779         }
2780         return SQLITE_OK;
2781       }
2782       pVTab->zErrMsg = sqlite3_mprintf("unknown value for %s.command: \"%w\"",
2783                                        p->zTableName, zCmd);
2784       return SQLITE_ERROR;
2785     }
2786     if( iRank<1 ) iRank = 1;
2787     if( zSoundslike ){
2788       zK1 = (char*)transliterate(zSoundslike, nSoundslike);
2789     }else{
2790       zK1 = (char*)transliterate(zWord, nWord);
2791     }
2792     if( zK1==0 ) return SQLITE_NOMEM;
2793     for(i=0; (c = zK1[i])!=0; i++){
2794        if( c>='A' && c<='Z' ) zK1[i] += 'a' - 'A';
2795     }
2796     zK2 = (char*)phoneticHash((const unsigned char*)zK1, i);
2797     if( zK2==0 ){
2798       sqlite3_free(zK1);
2799       return SQLITE_NOMEM;
2800     }
2801     if( sqlite3_value_type(argv[0])==SQLITE_NULL ){
2802       if( sqlite3_value_type(argv[1])==SQLITE_NULL ){
2803         spellfix1DbExec(&rc, db,
2804                "INSERT INTO \"%w\".\"%w_vocab\"(rank,langid,word,k1,k2) "
2805                "VALUES(%d,%d,%Q,%Q,%Q)",
2806                p->zDbName, p->zTableName,
2807                iRank, iLang, zWord, zK1, zK2
2808         );
2809       }else{
2810         newRowid = sqlite3_value_int64(argv[1]);
2811         spellfix1DbExec(&rc, db,
2812             "INSERT OR %s INTO \"%w\".\"%w_vocab\"(id,rank,langid,word,k1,k2) "
2813             "VALUES(%lld,%d,%d,%Q,%Q,%Q)",
2814             zConflict, p->zDbName, p->zTableName,
2815             newRowid, iRank, iLang, zWord, zK1, zK2
2816         );
2817       }
2818       *pRowid = sqlite3_last_insert_rowid(db);
2819     }else{
2820       rowid = sqlite3_value_int64(argv[0]);
2821       newRowid = *pRowid = sqlite3_value_int64(argv[1]);
2822       spellfix1DbExec(&rc, db,
2823              "UPDATE OR %s \"%w\".\"%w_vocab\" SET id=%lld, rank=%d, langid=%d,"
2824              " word=%Q, k1=%Q, k2=%Q WHERE id=%lld",
2825              zConflict, p->zDbName, p->zTableName, newRowid, iRank, iLang,
2826              zWord, zK1, zK2, rowid
2827       );
2828     }
2829     sqlite3_free(zK1);
2830     sqlite3_free(zK2);
2831   }
2832   return rc;
2833 }
2834 
2835 /*
2836 ** Rename the spellfix1 table.
2837 */
2838 static int spellfix1Rename(sqlite3_vtab *pVTab, const char *zNew){
2839   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2840   sqlite3 *db = p->db;
2841   int rc = SQLITE_OK;
2842   char *zNewName = sqlite3_mprintf("%s", zNew);
2843   if( zNewName==0 ){
2844     return SQLITE_NOMEM;
2845   }
2846   spellfix1DbExec(&rc, db,
2847      "ALTER TABLE \"%w\".\"%w_vocab\" RENAME TO \"%w_vocab\"",
2848      p->zDbName, p->zTableName, zNewName
2849   );
2850   if( rc==SQLITE_OK ){
2851     sqlite3_free(p->zTableName);
2852     p->zTableName = zNewName;
2853   }else{
2854     sqlite3_free(zNewName);
2855   }
2856   return rc;
2857 }
2858 
2859 
2860 /*
2861 ** A virtual table module that provides fuzzy search.
2862 */
2863 static sqlite3_module spellfix1Module = {
2864   0,                       /* iVersion */
2865   spellfix1Create,         /* xCreate - handle CREATE VIRTUAL TABLE */
2866   spellfix1Connect,        /* xConnect - reconnected to an existing table */
2867   spellfix1BestIndex,      /* xBestIndex - figure out how to do a query */
2868   spellfix1Disconnect,     /* xDisconnect - close a connection */
2869   spellfix1Destroy,        /* xDestroy - handle DROP TABLE */
2870   spellfix1Open,           /* xOpen - open a cursor */
2871   spellfix1Close,          /* xClose - close a cursor */
2872   spellfix1Filter,         /* xFilter - configure scan constraints */
2873   spellfix1Next,           /* xNext - advance a cursor */
2874   spellfix1Eof,            /* xEof - check for end of scan */
2875   spellfix1Column,         /* xColumn - read data */
2876   spellfix1Rowid,          /* xRowid - read data */
2877   spellfix1Update,         /* xUpdate */
2878   0,                       /* xBegin */
2879   0,                       /* xSync */
2880   0,                       /* xCommit */
2881   0,                       /* xRollback */
2882   0,                       /* xFindMethod */
2883   spellfix1Rename,         /* xRename */
2884 };
2885 
2886 /*
2887 ** Register the various functions and the virtual table.
2888 */
2889 static int spellfix1Register(sqlite3 *db){
2890   int rc = SQLITE_OK;
2891   int i;
2892   rc = sqlite3_create_function(db, "spellfix1_translit", 1, SQLITE_UTF8, 0,
2893                                   transliterateSqlFunc, 0, 0);
2894   if( rc==SQLITE_OK ){
2895     rc = sqlite3_create_function(db, "spellfix1_editdist", 2, SQLITE_UTF8, 0,
2896                                   editdistSqlFunc, 0, 0);
2897   }
2898   if( rc==SQLITE_OK ){
2899     rc = sqlite3_create_function(db, "spellfix1_phonehash", 1, SQLITE_UTF8, 0,
2900                                   phoneticHashSqlFunc, 0, 0);
2901   }
2902   if( rc==SQLITE_OK ){
2903     rc = sqlite3_create_function(db, "spellfix1_scriptcode", 1, SQLITE_UTF8, 0,
2904                                   scriptCodeSqlFunc, 0, 0);
2905   }
2906   if( rc==SQLITE_OK ){
2907     rc = sqlite3_create_module(db, "spellfix1", &spellfix1Module, 0);
2908   }
2909   if( rc==SQLITE_OK ){
2910     rc = editDist3Install(db);
2911   }
2912 
2913   /* Verify sanity of the translit[] table */
2914   for(i=0; i<sizeof(translit)/sizeof(translit[0])-1; i++){
2915     assert( translit[i].cFrom<translit[i+1].cFrom );
2916   }
2917 
2918   return rc;
2919 }
2920 
2921 #endif /* SQLITE_OMIT_VIRTUALTABLE */
2922 
2923 /*
2924 ** Extension load function.
2925 */
2926 #ifdef _WIN32
2927 __declspec(dllexport)
2928 #endif
2929 int sqlite3_spellfix_init(
2930   sqlite3 *db,
2931   char **pzErrMsg,
2932   const sqlite3_api_routines *pApi
2933 ){
2934   SQLITE_EXTENSION_INIT2(pApi);
2935 #ifndef SQLITE_OMIT_VIRTUALTABLE
2936   return spellfix1Register(db);
2937 #endif
2938   return SQLITE_OK;
2939 }
2940